Repository: liangjiandeng/DLPan-Toolbox Branch: main Commit: a34f884af889 Files: 1396 Total size: 5.7 MB Directory structure: gitextract_54ils51p/ ├── .gitignore ├── 01-DL-toolbox(Pytorch)/ │ ├── LICENSE │ ├── UDL/ │ │ ├── AutoDL/ │ │ │ ├── __init__.py │ │ │ └── trainer.py │ │ ├── Basis/ │ │ │ ├── auxiliary/ │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── fp16_utils.py │ │ │ │ ├── torchstat/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __main__.py │ │ │ │ │ ├── compute_flops.py │ │ │ │ │ ├── compute_madd.py │ │ │ │ │ ├── compute_memory.py │ │ │ │ │ ├── model_hook.py │ │ │ │ │ ├── reporter.py │ │ │ │ │ ├── stat_tree.py │ │ │ │ │ └── statistics.py │ │ │ │ └── utils.py │ │ │ ├── cal_ssim.py │ │ │ ├── config.py │ │ │ ├── criterion_metrics.py │ │ │ ├── dist_utils.py │ │ │ ├── kill_dist.sh │ │ │ ├── launch.py │ │ │ ├── logger.py │ │ │ ├── metrics.py │ │ │ ├── optim.py │ │ │ ├── option.py │ │ │ ├── postprocess.py │ │ │ ├── python_sub_class.py │ │ │ ├── slurm_train.sh │ │ │ ├── snmn_d.sh │ │ │ └── variance_sacling_initializer.py │ │ ├── Data/ │ │ │ └── pansharpening/ │ │ │ ├── test_data/ │ │ │ │ └── readme-test.txt │ │ │ ├── training_data/ │ │ │ │ └── readme-test.txt │ │ │ └── validation_data/ │ │ │ └── readme-test.txt │ │ ├── mmcv/ │ │ │ ├── CITATION.cff │ │ │ ├── CONTRIBUTING.md │ │ │ ├── Dockerfile │ │ │ ├── Jenkinsfile │ │ │ ├── LICENSE │ │ │ ├── LICENSES.md │ │ │ ├── MANIFEST.in │ │ │ ├── README_zh-CN.md │ │ │ ├── TERMINOLOGY.md │ │ │ ├── docs/ │ │ │ │ ├── en/ │ │ │ │ │ ├── Makefile │ │ │ │ │ ├── _static/ │ │ │ │ │ │ └── css/ │ │ │ │ │ │ └── readthedocs.css │ │ │ │ │ ├── api.rst │ │ │ │ │ ├── community/ │ │ │ │ │ │ └── pr.md │ │ │ │ │ ├── compatibility.md │ │ │ │ │ ├── conf.py │ │ │ │ │ ├── deployment/ │ │ │ │ │ │ ├── mmcv_ops_definition.md │ │ │ │ │ │ ├── onnx.md │ │ │ │ │ │ ├── onnxruntime_custom_ops.md │ │ │ │ │ │ ├── onnxruntime_op.md │ │ │ │ │ │ ├── tensorrt_custom_ops.md │ │ │ │ │ │ └── tensorrt_plugin.md │ │ │ │ │ ├── faq.md │ │ │ │ │ ├── get_started/ │ │ │ │ │ │ ├── build.md │ │ │ │ │ │ ├── installation.md │ │ │ │ │ │ ├── introduction.md │ │ │ │ │ │ └── previous_versions.md │ │ │ │ │ ├── index.rst │ │ │ │ │ ├── make.bat │ │ │ │ │ └── understand_mmcv/ │ │ │ │ │ ├── cnn.md │ │ │ │ │ ├── config.md │ │ │ │ │ ├── data_process.md │ │ │ │ │ ├── io.md │ │ │ │ │ ├── ops.md │ │ │ │ │ ├── registry.md │ │ │ │ │ ├── runner.md │ │ │ │ │ ├── utils.md │ │ │ │ │ └── visualization.md │ │ │ │ └── zh_cn/ │ │ │ │ ├── Makefile │ │ │ │ ├── _static/ │ │ │ │ │ └── css/ │ │ │ │ │ └── readthedocs.css │ │ │ │ ├── api.rst │ │ │ │ ├── community/ │ │ │ │ │ ├── contributing.md │ │ │ │ │ └── pr.md │ │ │ │ ├── compatibility.md │ │ │ │ ├── conf.py │ │ │ │ ├── deployment/ │ │ │ │ │ ├── onnx.md │ │ │ │ │ ├── onnxruntime_custom_ops.md │ │ │ │ │ ├── onnxruntime_op.md │ │ │ │ │ ├── tensorrt_custom_ops.md │ │ │ │ │ └── tensorrt_plugin.md │ │ │ │ ├── faq.md │ │ │ │ ├── get_started/ │ │ │ │ │ ├── build.md │ │ │ │ │ ├── installation.md │ │ │ │ │ ├── introduction.md │ │ │ │ │ └── previous_versions.md │ │ │ │ ├── index.rst │ │ │ │ ├── make.bat │ │ │ │ └── understand_mmcv/ │ │ │ │ ├── cnn.md │ │ │ │ ├── config.md │ │ │ │ ├── data_process.md │ │ │ │ ├── io.md │ │ │ │ ├── ops.md │ │ │ │ ├── registry.md │ │ │ │ ├── runner.md │ │ │ │ ├── utils.md │ │ │ │ └── visualization.md │ │ │ ├── examples/ │ │ │ │ └── train.py │ │ │ ├── mmcv/ │ │ │ │ ├── __init__.py │ │ │ │ ├── arraymisc/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── quantization.py │ │ │ │ ├── cnn/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── alexnet.py │ │ │ │ │ ├── bricks/ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── activation.py │ │ │ │ │ │ ├── context_block.py │ │ │ │ │ │ ├── conv.py │ │ │ │ │ │ ├── conv2d_adaptive_padding.py │ │ │ │ │ │ ├── conv_module.py │ │ │ │ │ │ ├── conv_ws.py │ │ │ │ │ │ ├── depthwise_separable_conv_module.py │ │ │ │ │ │ ├── drop.py │ │ │ │ │ │ ├── generalized_attention.py │ │ │ │ │ │ ├── hsigmoid.py │ │ │ │ │ │ ├── hswish.py │ │ │ │ │ │ ├── non_local.py │ │ │ │ │ │ ├── norm.py │ │ │ │ │ │ ├── padding.py │ │ │ │ │ │ ├── plugin.py │ │ │ │ │ │ ├── registry.py │ │ │ │ │ │ ├── scale.py │ │ │ │ │ │ ├── swish.py │ │ │ │ │ │ ├── transformer.py │ │ │ │ │ │ ├── upsample.py │ │ │ │ │ │ └── wrappers.py │ │ │ │ │ ├── builder.py │ │ │ │ │ ├── resnet.py │ │ │ │ │ ├── utils/ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── flops_counter.py │ │ │ │ │ │ ├── fuse_conv_bn.py │ │ │ │ │ │ ├── sync_bn.py │ │ │ │ │ │ └── weight_init.py │ │ │ │ │ └── vgg.py │ │ │ │ ├── engine/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── test.py │ │ │ │ ├── fileio/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── file_client.py │ │ │ │ │ ├── handlers/ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── base.py │ │ │ │ │ │ ├── json_handler.py │ │ │ │ │ │ ├── pickle_handler.py │ │ │ │ │ │ └── yaml_handler.py │ │ │ │ │ ├── io.py │ │ │ │ │ └── parse.py │ │ │ │ ├── image/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── colorspace.py │ │ │ │ │ ├── geometric.py │ │ │ │ │ ├── io.py │ │ │ │ │ ├── misc.py │ │ │ │ │ └── photometric.py │ │ │ │ ├── onnx/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── info.py │ │ │ │ │ ├── onnx_utils/ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── symbolic_helper.py │ │ │ │ │ └── symbolic.py │ │ │ │ ├── ops/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── active_rotated_filter.py │ │ │ │ │ ├── assign_score_withk.py │ │ │ │ │ ├── ball_query.py │ │ │ │ │ ├── bbox.py │ │ │ │ │ ├── border_align.py │ │ │ │ │ ├── box_iou_rotated.py │ │ │ │ │ ├── carafe.py │ │ │ │ │ ├── cc_attention.py │ │ │ │ │ ├── contour_expand.py │ │ │ │ │ ├── convex_iou.py │ │ │ │ │ ├── corner_pool.py │ │ │ │ │ ├── correlation.py │ │ │ │ │ ├── csrc/ │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── common/ │ │ │ │ │ │ │ ├── box_iou_rotated_utils.hpp │ │ │ │ │ │ │ ├── cuda/ │ │ │ │ │ │ │ │ ├── active_rotated_filter_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── assign_score_withk_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── ball_query_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── bbox_overlaps_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── border_align_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── box_iou_rotated_cuda.cuh │ │ │ │ │ │ │ │ ├── carafe_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── carafe_naive_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── common_cuda_helper.hpp │ │ │ │ │ │ │ │ ├── convex_iou_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── correlation_cuda.cuh │ │ │ │ │ │ │ │ ├── deform_conv_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── deform_roi_pool_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── furthest_point_sample_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── gather_points_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── group_points_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── iou3d_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── knn_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── masked_conv2d_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── min_area_polygons_cuda.cuh │ │ │ │ │ │ │ │ ├── modulated_deform_conv_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── ms_deform_attn_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── nms_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── nms_rotated_cuda.cuh │ │ │ │ │ │ │ │ ├── parrots_cudawarpfunction.cuh │ │ │ │ │ │ │ │ ├── points_in_boxes_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── points_in_polygons_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── psamask_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── riroi_align_rotated_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── roi_align_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── roi_align_rotated_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── roi_pool_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── roiaware_pool3d_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── roipoint_pool3d_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── rotated_feature_align_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── scatter_points_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── sigmoid_focal_loss_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── softmax_focal_loss_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── sync_bn_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── three_interpolate_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── three_nn_cuda_kernel.cuh │ │ │ │ │ │ │ │ ├── tin_shift_cuda_kernel.cuh │ │ │ │ │ │ │ │ └── voxelization_cuda_kernel.cuh │ │ │ │ │ │ │ ├── parrots_cpp_helper.hpp │ │ │ │ │ │ │ ├── parrots_cuda_helper.hpp │ │ │ │ │ │ │ ├── pytorch_cpp_helper.hpp │ │ │ │ │ │ │ ├── pytorch_cuda_helper.hpp │ │ │ │ │ │ │ └── pytorch_device_registry.hpp │ │ │ │ │ │ ├── onnxruntime/ │ │ │ │ │ │ │ ├── corner_pool.h │ │ │ │ │ │ │ ├── cpu/ │ │ │ │ │ │ │ │ ├── corner_pool.cpp │ │ │ │ │ │ │ │ ├── deform_conv.cpp │ │ │ │ │ │ │ │ ├── gridSample.cpp │ │ │ │ │ │ │ │ ├── modulated_deform_conv.cpp │ │ │ │ │ │ │ │ ├── nms.cpp │ │ │ │ │ │ │ │ ├── onnxruntime_register.cpp │ │ │ │ │ │ │ │ ├── reduce_ops.cpp │ │ │ │ │ │ │ │ ├── roi_align.cpp │ │ │ │ │ │ │ │ ├── roi_align_rotated.cpp │ │ │ │ │ │ │ │ └── soft_nms.cpp │ │ │ │ │ │ │ ├── deform_conv.h │ │ │ │ │ │ │ ├── grid_sample.h │ │ │ │ │ │ │ ├── modulated_deform_conv.h │ │ │ │ │ │ │ ├── nms.h │ │ │ │ │ │ │ ├── onnxruntime_register.h │ │ │ │ │ │ │ ├── onnxruntime_session_options_config_keys.h │ │ │ │ │ │ │ ├── ort_mmcv_utils.h │ │ │ │ │ │ │ ├── reduce_ops.h │ │ │ │ │ │ │ ├── roi_align.h │ │ │ │ │ │ │ ├── roi_align_rotated.h │ │ │ │ │ │ │ └── soft_nms.h │ │ │ │ │ │ ├── parrots/ │ │ │ │ │ │ │ ├── active_rotated_filter.cpp │ │ │ │ │ │ │ ├── active_rotated_filter_parrots.cpp │ │ │ │ │ │ │ ├── active_rotated_filter_pytorch.h │ │ │ │ │ │ │ ├── assign_score_withk.cpp │ │ │ │ │ │ │ ├── assign_score_withk_parrots.cpp │ │ │ │ │ │ │ ├── assign_score_withk_pytorch.h │ │ │ │ │ │ │ ├── ball_query._parrots.cpp │ │ │ │ │ │ │ ├── ball_query.cpp │ │ │ │ │ │ │ ├── ball_query_pytorch.h │ │ │ │ │ │ │ ├── bbox_overlaps.cpp │ │ │ │ │ │ │ ├── bbox_overlaps_parrots.cpp │ │ │ │ │ │ │ ├── bbox_overlaps_pytorch.h │ │ │ │ │ │ │ ├── border_align.cpp │ │ │ │ │ │ │ ├── border_align_parrots.cpp │ │ │ │ │ │ │ ├── border_align_pytorch.h │ │ │ │ │ │ │ ├── box_iou_rotated.cpp │ │ │ │ │ │ │ ├── box_iou_rotated_parrots.cpp │ │ │ │ │ │ │ ├── box_iou_rotated_pytorch.h │ │ │ │ │ │ │ ├── carafe.cpp │ │ │ │ │ │ │ ├── carafe_naive.cpp │ │ │ │ │ │ │ ├── carafe_naive_parrots.cpp │ │ │ │ │ │ │ ├── carafe_naive_pytorch.h │ │ │ │ │ │ │ ├── carafe_parrots.cpp │ │ │ │ │ │ │ ├── carafe_pytorch.h │ │ │ │ │ │ │ ├── contour_expand.cpp │ │ │ │ │ │ │ ├── contour_expand_parrots.cpp │ │ │ │ │ │ │ ├── contour_expand_pytorch.h │ │ │ │ │ │ │ ├── convex_iou.cpp │ │ │ │ │ │ │ ├── convex_iou_parrots.cpp │ │ │ │ │ │ │ ├── convex_iou_pytorch.h │ │ │ │ │ │ │ ├── corner_pool.cpp │ │ │ │ │ │ │ ├── corner_pool_parrots.cpp │ │ │ │ │ │ │ ├── corner_pool_pytorch.h │ │ │ │ │ │ │ ├── correlation.cpp │ │ │ │ │ │ │ ├── correlation_parrots.cpp │ │ │ │ │ │ │ ├── correlation_pytorch.h │ │ │ │ │ │ │ ├── cudabind.cpp │ │ │ │ │ │ │ ├── deform_conv.cpp │ │ │ │ │ │ │ ├── deform_conv_parrots.cpp │ │ │ │ │ │ │ ├── deform_conv_pytorch.h │ │ │ │ │ │ │ ├── deform_roi_pool.cpp │ │ │ │ │ │ │ ├── deform_roi_pool_parrots.cpp │ │ │ │ │ │ │ ├── deform_roi_pool_pytorch.h │ │ │ │ │ │ │ ├── focal_loss.cpp │ │ │ │ │ │ │ ├── focal_loss_parrots.cpp │ │ │ │ │ │ │ ├── focal_loss_pytorch.h │ │ │ │ │ │ │ ├── furthest_point_sample.cpp │ │ │ │ │ │ │ ├── furthest_point_sample_parrots.cpp │ │ │ │ │ │ │ ├── furthest_point_sample_pytorch.h │ │ │ │ │ │ │ ├── fused_bias_leakyrelu.cpp │ │ │ │ │ │ │ ├── fused_bias_parrots.cpp │ │ │ │ │ │ │ ├── gather_points.cpp │ │ │ │ │ │ │ ├── gather_points_parrots.cpp │ │ │ │ │ │ │ ├── gather_points_pytorch.h │ │ │ │ │ │ │ ├── group_points.cpp │ │ │ │ │ │ │ ├── group_points_parrots.cpp │ │ │ │ │ │ │ ├── group_points_pytorch.h │ │ │ │ │ │ │ ├── info.cpp │ │ │ │ │ │ │ ├── iou3d.cpp │ │ │ │ │ │ │ ├── iou3d_parrots.cpp │ │ │ │ │ │ │ ├── iou3d_pytorch.h │ │ │ │ │ │ │ ├── knn.cpp │ │ │ │ │ │ │ ├── knn_parrots.cpp │ │ │ │ │ │ │ ├── knn_pytorch.h │ │ │ │ │ │ │ ├── masked_conv2d.cpp │ │ │ │ │ │ │ ├── masked_conv2d_parrots.cpp │ │ │ │ │ │ │ ├── masked_conv2d_pytorch.h │ │ │ │ │ │ │ ├── min_area_polygons.cpp │ │ │ │ │ │ │ ├── min_area_polygons_parrots.cpp │ │ │ │ │ │ │ ├── min_area_polygons_pytorch.h │ │ │ │ │ │ │ ├── modulated_deform_conv.cpp │ │ │ │ │ │ │ ├── modulated_deform_conv_parrots.cpp │ │ │ │ │ │ │ ├── modulated_deform_conv_pytorch.h │ │ │ │ │ │ │ ├── ms_deform_attn.cpp │ │ │ │ │ │ │ ├── ms_deform_attn_parrots.cpp │ │ │ │ │ │ │ ├── nms.cpp │ │ │ │ │ │ │ ├── nms_parrots.cpp │ │ │ │ │ │ │ ├── nms_pytorch.h │ │ │ │ │ │ │ ├── nms_rotated.cpp │ │ │ │ │ │ │ ├── pixel_group.cpp │ │ │ │ │ │ │ ├── pixel_group_parrots.cpp │ │ │ │ │ │ │ ├── pixel_group_pytorch.h │ │ │ │ │ │ │ ├── points_in_boxes.cpp │ │ │ │ │ │ │ ├── points_in_boxes_parrots.cpp │ │ │ │ │ │ │ ├── points_in_boxes_pytorch.h │ │ │ │ │ │ │ ├── points_in_polygons.cpp │ │ │ │ │ │ │ ├── points_in_polygons_parrots.cpp │ │ │ │ │ │ │ ├── points_in_polygons_pytorch.h │ │ │ │ │ │ │ ├── psamask.cpp │ │ │ │ │ │ │ ├── psamask_parrots.cpp │ │ │ │ │ │ │ ├── psamask_pytorch.h │ │ │ │ │ │ │ ├── riroi_align_rotated.cpp │ │ │ │ │ │ │ ├── riroi_align_rotated_parrots.cpp │ │ │ │ │ │ │ ├── riroi_align_rotated_pytorch.h │ │ │ │ │ │ │ ├── roi_align.cpp │ │ │ │ │ │ │ ├── roi_align_parrots.cpp │ │ │ │ │ │ │ ├── roi_align_pytorch.h │ │ │ │ │ │ │ ├── roi_align_rotated.cpp │ │ │ │ │ │ │ ├── roi_align_rotated_parrots.cpp │ │ │ │ │ │ │ ├── roi_align_rotated_pytorch.h │ │ │ │ │ │ │ ├── roi_pool.cpp │ │ │ │ │ │ │ ├── roi_pool_parrots.cpp │ │ │ │ │ │ │ ├── roi_pool_pytorch.h │ │ │ │ │ │ │ ├── roiaware_pool3d.cpp │ │ │ │ │ │ │ ├── roiaware_pool3d_parrots.cpp │ │ │ │ │ │ │ ├── roiaware_pool3d_pytorch.h │ │ │ │ │ │ │ ├── roipoint_pool3d.cpp │ │ │ │ │ │ │ ├── roipoint_pool3d_parrots.cpp │ │ │ │ │ │ │ ├── roipoint_pool3d_pytorch.h │ │ │ │ │ │ │ ├── rotated_feature_align.cpp │ │ │ │ │ │ │ ├── rotated_feature_align_parrots.cpp │ │ │ │ │ │ │ ├── rotated_feature_align_pytorch.h │ │ │ │ │ │ │ ├── sync_bn.cpp │ │ │ │ │ │ │ ├── sync_bn_parrots.cpp │ │ │ │ │ │ │ ├── sync_bn_pytorch.h │ │ │ │ │ │ │ ├── three_interpolate.cpp │ │ │ │ │ │ │ ├── three_interpolate_parrots.cpp │ │ │ │ │ │ │ ├── three_interpolate_pytorch.h │ │ │ │ │ │ │ ├── three_nn.cpp │ │ │ │ │ │ │ ├── three_nn_parrots.cpp │ │ │ │ │ │ │ ├── three_nn_pytorch.h │ │ │ │ │ │ │ ├── tin_shift.cpp │ │ │ │ │ │ │ ├── tin_shift_parrots.cpp │ │ │ │ │ │ │ ├── tin_shift_pytorch.h │ │ │ │ │ │ │ ├── upfirdn2d.cpp │ │ │ │ │ │ │ ├── upfirdn2d_parrots.cpp │ │ │ │ │ │ │ ├── voxelization.cpp │ │ │ │ │ │ │ ├── voxelization_parrots.cpp │ │ │ │ │ │ │ └── voxelization_pytorch.h │ │ │ │ │ │ ├── pytorch/ │ │ │ │ │ │ │ ├── active_rotated_filter.cpp │ │ │ │ │ │ │ ├── assign_score_withk.cpp │ │ │ │ │ │ │ ├── ball_query.cpp │ │ │ │ │ │ │ ├── bbox_overlaps.cpp │ │ │ │ │ │ │ ├── border_align.cpp │ │ │ │ │ │ │ ├── box_iou_rotated.cpp │ │ │ │ │ │ │ ├── carafe.cpp │ │ │ │ │ │ │ ├── carafe_naive.cpp │ │ │ │ │ │ │ ├── contour_expand.cpp │ │ │ │ │ │ │ ├── convex_iou.cpp │ │ │ │ │ │ │ ├── corner_pool.cpp │ │ │ │ │ │ │ ├── correlation.cpp │ │ │ │ │ │ │ ├── cpu/ │ │ │ │ │ │ │ │ ├── active_rotated_filter.cpp │ │ │ │ │ │ │ │ ├── box_iou_rotated.cpp │ │ │ │ │ │ │ │ ├── deform_conv.cpp │ │ │ │ │ │ │ │ ├── modulated_deform_conv.cpp │ │ │ │ │ │ │ │ ├── nms.cpp │ │ │ │ │ │ │ │ ├── nms_rotated.cpp │ │ │ │ │ │ │ │ ├── pixel_group.cpp │ │ │ │ │ │ │ │ ├── points_in_boxes.cpp │ │ │ │ │ │ │ │ ├── psamask.cpp │ │ │ │ │ │ │ │ ├── roi_align.cpp │ │ │ │ │ │ │ │ ├── roi_align_rotated.cpp │ │ │ │ │ │ │ │ └── voxelization.cpp │ │ │ │ │ │ │ ├── cuda/ │ │ │ │ │ │ │ │ ├── active_rotated_filter_cuda.cu │ │ │ │ │ │ │ │ ├── assign_score_withk_cuda.cu │ │ │ │ │ │ │ │ ├── ball_query_cuda.cu │ │ │ │ │ │ │ │ ├── bbox_overlaps_cuda.cu │ │ │ │ │ │ │ │ ├── border_align_cuda.cu │ │ │ │ │ │ │ │ ├── box_iou_rotated_cuda.cu │ │ │ │ │ │ │ │ ├── carafe_cuda.cu │ │ │ │ │ │ │ │ ├── carafe_naive_cuda.cu │ │ │ │ │ │ │ │ ├── convex_iou.cu │ │ │ │ │ │ │ │ ├── correlation_cuda.cu │ │ │ │ │ │ │ │ ├── cudabind.cpp │ │ │ │ │ │ │ │ ├── deform_conv_cuda.cu │ │ │ │ │ │ │ │ ├── deform_roi_pool_cuda.cu │ │ │ │ │ │ │ │ ├── focal_loss_cuda.cu │ │ │ │ │ │ │ │ ├── furthest_point_sample_cuda.cu │ │ │ │ │ │ │ │ ├── fused_bias_leakyrelu_cuda.cu │ │ │ │ │ │ │ │ ├── gather_points_cuda.cu │ │ │ │ │ │ │ │ ├── group_points_cuda.cu │ │ │ │ │ │ │ │ ├── iou3d_cuda.cu │ │ │ │ │ │ │ │ ├── knn_cuda.cu │ │ │ │ │ │ │ │ ├── masked_conv2d_cuda.cu │ │ │ │ │ │ │ │ ├── min_area_polygons.cu │ │ │ │ │ │ │ │ ├── modulated_deform_conv_cuda.cu │ │ │ │ │ │ │ │ ├── ms_deform_attn_cuda.cu │ │ │ │ │ │ │ │ ├── nms_cuda.cu │ │ │ │ │ │ │ │ ├── nms_rotated_cuda.cu │ │ │ │ │ │ │ │ ├── points_in_boxes_cuda.cu │ │ │ │ │ │ │ │ ├── points_in_polygons_cuda.cu │ │ │ │ │ │ │ │ ├── psamask_cuda.cu │ │ │ │ │ │ │ │ ├── riroi_align_rotated_cuda.cu │ │ │ │ │ │ │ │ ├── roi_align_cuda.cu │ │ │ │ │ │ │ │ ├── roi_align_rotated_cuda.cu │ │ │ │ │ │ │ │ ├── roi_pool_cuda.cu │ │ │ │ │ │ │ │ ├── roiaware_pool3d_cuda.cu │ │ │ │ │ │ │ │ ├── roipoint_pool3d_cuda.cu │ │ │ │ │ │ │ │ ├── rotated_feature_align_cuda.cu │ │ │ │ │ │ │ │ ├── scatter_points_cuda.cu │ │ │ │ │ │ │ │ ├── sync_bn_cuda.cu │ │ │ │ │ │ │ │ ├── three_interpolate_cuda.cu │ │ │ │ │ │ │ │ ├── three_nn_cuda.cu │ │ │ │ │ │ │ │ ├── tin_shift_cuda.cu │ │ │ │ │ │ │ │ ├── upfirdn2d_kernel.cu │ │ │ │ │ │ │ │ └── voxelization_cuda.cu │ │ │ │ │ │ │ ├── deform_conv.cpp │ │ │ │ │ │ │ ├── deform_roi_pool.cpp │ │ │ │ │ │ │ ├── focal_loss.cpp │ │ │ │ │ │ │ ├── furthest_point_sample.cpp │ │ │ │ │ │ │ ├── fused_bias_leakyrelu.cpp │ │ │ │ │ │ │ ├── gather_points.cpp │ │ │ │ │ │ │ ├── group_points.cpp │ │ │ │ │ │ │ ├── info.cpp │ │ │ │ │ │ │ ├── iou3d.cpp │ │ │ │ │ │ │ ├── knn.cpp │ │ │ │ │ │ │ ├── masked_conv2d.cpp │ │ │ │ │ │ │ ├── min_area_polygons.cpp │ │ │ │ │ │ │ ├── modulated_deform_conv.cpp │ │ │ │ │ │ │ ├── ms_deform_attn.cpp │ │ │ │ │ │ │ ├── nms.cpp │ │ │ │ │ │ │ ├── nms_rotated.cpp │ │ │ │ │ │ │ ├── pixel_group.cpp │ │ │ │ │ │ │ ├── points_in_boxes.cpp │ │ │ │ │ │ │ ├── points_in_polygons.cpp │ │ │ │ │ │ │ ├── psamask.cpp │ │ │ │ │ │ │ ├── pybind.cpp │ │ │ │ │ │ │ ├── riroi_align_rotated.cpp │ │ │ │ │ │ │ ├── roi_align.cpp │ │ │ │ │ │ │ ├── roi_align_rotated.cpp │ │ │ │ │ │ │ ├── roi_pool.cpp │ │ │ │ │ │ │ ├── roiaware_pool3d.cpp │ │ │ │ │ │ │ ├── roipoint_pool3d.cpp │ │ │ │ │ │ │ ├── rotated_feature_align.cpp │ │ │ │ │ │ │ ├── scatter_points.cpp │ │ │ │ │ │ │ ├── sync_bn.cpp │ │ │ │ │ │ │ ├── three_interpolate.cpp │ │ │ │ │ │ │ ├── three_nn.cpp │ │ │ │ │ │ │ ├── tin_shift.cpp │ │ │ │ │ │ │ ├── upfirdn2d.cpp │ │ │ │ │ │ │ └── voxelization.cpp │ │ │ │ │ │ └── tensorrt/ │ │ │ │ │ │ ├── plugins/ │ │ │ │ │ │ │ ├── trt_corner_pool.cpp │ │ │ │ │ │ │ ├── trt_corner_pool_kernel.cu │ │ │ │ │ │ │ ├── trt_cuda_helper.cu │ │ │ │ │ │ │ ├── trt_cummaxmin.cpp │ │ │ │ │ │ │ ├── trt_cummaxmin_kernel.cu │ │ │ │ │ │ │ ├── trt_deform_conv.cpp │ │ │ │ │ │ │ ├── trt_deform_conv_kernel.cu │ │ │ │ │ │ │ ├── trt_grid_sampler.cpp │ │ │ │ │ │ │ ├── trt_grid_sampler_kernel.cu │ │ │ │ │ │ │ ├── trt_instance_norm.cpp │ │ │ │ │ │ │ ├── trt_modulated_deform_conv.cpp │ │ │ │ │ │ │ ├── trt_modulated_deform_conv_kernel.cu │ │ │ │ │ │ │ ├── trt_nms.cpp │ │ │ │ │ │ │ ├── trt_nms_kernel.cu │ │ │ │ │ │ │ ├── trt_plugin.cpp │ │ │ │ │ │ │ ├── trt_roi_align.cpp │ │ │ │ │ │ │ ├── trt_roi_align_kernel.cu │ │ │ │ │ │ │ ├── trt_scatternd.cpp │ │ │ │ │ │ │ └── trt_scatternd_kernel.cu │ │ │ │ │ │ ├── trt_corner_pool.hpp │ │ │ │ │ │ ├── trt_cuda_helper.cuh │ │ │ │ │ │ ├── trt_cummaxmin.hpp │ │ │ │ │ │ ├── trt_deform_conv.hpp │ │ │ │ │ │ ├── trt_grid_sampler.hpp │ │ │ │ │ │ ├── trt_instance_norm.hpp │ │ │ │ │ │ ├── trt_modulated_deform_conv.hpp │ │ │ │ │ │ ├── trt_nms.hpp │ │ │ │ │ │ ├── trt_plugin.hpp │ │ │ │ │ │ ├── trt_plugin_helper.hpp │ │ │ │ │ │ ├── trt_roi_align.hpp │ │ │ │ │ │ ├── trt_scatternd.hpp │ │ │ │ │ │ └── trt_serialize.hpp │ │ │ │ │ ├── deform_conv.py │ │ │ │ │ ├── deform_roi_pool.py │ │ │ │ │ ├── deprecated_wrappers.py │ │ │ │ │ ├── focal_loss.py │ │ │ │ │ ├── furthest_point_sample.py │ │ │ │ │ ├── fused_bias_leakyrelu.py │ │ │ │ │ ├── gather_points.py │ │ │ │ │ ├── group_points.py │ │ │ │ │ ├── info.py │ │ │ │ │ ├── iou3d.py │ │ │ │ │ ├── knn.py │ │ │ │ │ ├── masked_conv.py │ │ │ │ │ ├── merge_cells.py │ │ │ │ │ ├── min_area_polygons.py │ │ │ │ │ ├── modulated_deform_conv.py │ │ │ │ │ ├── multi_scale_deform_attn.py │ │ │ │ │ ├── nms.py │ │ │ │ │ ├── pixel_group.py │ │ │ │ │ ├── point_sample.py │ │ │ │ │ ├── points_in_boxes.py │ │ │ │ │ ├── points_in_polygons.py │ │ │ │ │ ├── points_sampler.py │ │ │ │ │ ├── psa_mask.py │ │ │ │ │ ├── readme.md │ │ │ │ │ ├── riroi_align_rotated.py │ │ │ │ │ ├── roi_align.py │ │ │ │ │ ├── roi_align_rotated.py │ │ │ │ │ ├── roi_pool.py │ │ │ │ │ ├── roiaware_pool3d.py │ │ │ │ │ ├── roipoint_pool3d.py │ │ │ │ │ ├── rotated_feature_align.py │ │ │ │ │ ├── saconv.py │ │ │ │ │ ├── scatter_points.py │ │ │ │ │ ├── sync_bn.py │ │ │ │ │ ├── three_interpolate.py │ │ │ │ │ ├── three_nn.py │ │ │ │ │ ├── tin_shift.py │ │ │ │ │ ├── upfirdn2d.py │ │ │ │ │ └── voxelize.py │ │ │ │ ├── parallel/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── _functions.py │ │ │ │ │ ├── collate.py │ │ │ │ │ ├── data_container.py │ │ │ │ │ ├── data_parallel.py │ │ │ │ │ ├── distributed.py │ │ │ │ │ ├── distributed_deprecated.py │ │ │ │ │ ├── registry.py │ │ │ │ │ ├── scatter_gather.py │ │ │ │ │ └── utils.py │ │ │ │ ├── readme.md │ │ │ │ ├── runner/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base_module.py │ │ │ │ │ ├── base_runner.py │ │ │ │ │ ├── builder.py │ │ │ │ │ ├── checkpoint.py │ │ │ │ │ ├── default_constructor.py │ │ │ │ │ ├── dist_utils.py │ │ │ │ │ ├── epoch_based_runner.py │ │ │ │ │ ├── fp16_utils.py │ │ │ │ │ ├── hooks/ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── checkpoint.py │ │ │ │ │ │ ├── closure.py │ │ │ │ │ │ ├── ema.py │ │ │ │ │ │ ├── evaluation.py │ │ │ │ │ │ ├── hook.py │ │ │ │ │ │ ├── iter_timer.py │ │ │ │ │ │ ├── logger/ │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── base.py │ │ │ │ │ │ │ ├── dvclive.py │ │ │ │ │ │ │ ├── mlflow.py │ │ │ │ │ │ │ ├── neptune.py │ │ │ │ │ │ │ ├── pavi.py │ │ │ │ │ │ │ ├── tensorboard.py │ │ │ │ │ │ │ ├── text.py │ │ │ │ │ │ │ └── wandb.py │ │ │ │ │ │ ├── lr_updater.py │ │ │ │ │ │ ├── memory.py │ │ │ │ │ │ ├── momentum_updater.py │ │ │ │ │ │ ├── nni_hook.py │ │ │ │ │ │ ├── optimizer.py │ │ │ │ │ │ ├── profiler.py │ │ │ │ │ │ ├── sampler_seed.py │ │ │ │ │ │ └── sync_buffer.py │ │ │ │ │ ├── iter_based_runner.py │ │ │ │ │ ├── log_buffer.py │ │ │ │ │ ├── misc.py │ │ │ │ │ ├── optimizer/ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── builder.py │ │ │ │ │ │ └── default_constructor.py │ │ │ │ │ ├── priority.py │ │ │ │ │ ├── record.py │ │ │ │ │ └── utils.py │ │ │ │ ├── tensorrt/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── init_plugins.py │ │ │ │ │ ├── preprocess.py │ │ │ │ │ └── tensorrt_utils.py │ │ │ │ ├── utils/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── config.py │ │ │ │ │ ├── env.py │ │ │ │ │ ├── ext_loader.py │ │ │ │ │ ├── hub.py │ │ │ │ │ ├── logging.py │ │ │ │ │ ├── misc.py │ │ │ │ │ ├── parrots_jit.py │ │ │ │ │ ├── parrots_wrapper.py │ │ │ │ │ ├── path.py │ │ │ │ │ ├── progressbar.py │ │ │ │ │ ├── registry.py │ │ │ │ │ ├── testing.py │ │ │ │ │ ├── timer.py │ │ │ │ │ ├── trace.py │ │ │ │ │ └── version_utils.py │ │ │ │ ├── version.py │ │ │ │ ├── video/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── io.py │ │ │ │ │ ├── optflow.py │ │ │ │ │ └── processing.py │ │ │ │ └── visualization/ │ │ │ │ ├── __init__.py │ │ │ │ ├── color.py │ │ │ │ ├── image.py │ │ │ │ └── optflow.py │ │ │ ├── readme.md │ │ │ ├── setup.cfg │ │ │ ├── setup.py │ │ │ └── tests/ │ │ │ ├── test_arraymisc.py │ │ │ ├── test_cnn/ │ │ │ │ ├── test_build_layers.py │ │ │ │ ├── test_context_block.py │ │ │ │ ├── test_conv2d_adaptive_padding.py │ │ │ │ ├── test_conv_module.py │ │ │ │ ├── test_depthwise_seperable_conv_module.py │ │ │ │ ├── test_flops_counter.py │ │ │ │ ├── test_fuse_conv_bn.py │ │ │ │ ├── test_generalized_attention.py │ │ │ │ ├── test_hsigmoid.py │ │ │ │ ├── test_hswish.py │ │ │ │ ├── test_model_registry.py │ │ │ │ ├── test_non_local.py │ │ │ │ ├── test_revert_syncbn.py │ │ │ │ ├── test_scale.py │ │ │ │ ├── test_swish.py │ │ │ │ ├── test_transformer.py │ │ │ │ ├── test_weight_init.py │ │ │ │ └── test_wrappers.py │ │ │ ├── test_fileclient.py │ │ │ ├── test_fileio.py │ │ │ ├── test_image/ │ │ │ │ ├── test_colorspace.py │ │ │ │ ├── test_geometric.py │ │ │ │ ├── test_image_misc.py │ │ │ │ ├── test_io.py │ │ │ │ └── test_photometric.py │ │ │ ├── test_load_model_zoo.py │ │ │ ├── test_ops/ │ │ │ │ ├── test_active_rotated_filter.py │ │ │ │ ├── test_assign_score_withk.py │ │ │ │ ├── test_ball_query.py │ │ │ │ ├── test_bbox.py │ │ │ │ ├── test_bilinear_grid_sample.py │ │ │ │ ├── test_border_align.py │ │ │ │ ├── test_box_iou_rotated.py │ │ │ │ ├── test_carafe.py │ │ │ │ ├── test_cc_attention.py │ │ │ │ ├── test_contour_expand.py │ │ │ │ ├── test_convex_iou.py │ │ │ │ ├── test_corner_pool.py │ │ │ │ ├── test_correlation.py │ │ │ │ ├── test_deform_conv.py │ │ │ │ ├── test_deform_roi_pool.py │ │ │ │ ├── test_focal_loss.py │ │ │ │ ├── test_furthest_point_sample.py │ │ │ │ ├── test_fused_bias_leakyrelu.py │ │ │ │ ├── test_gather_points.py │ │ │ │ ├── test_group_points.py │ │ │ │ ├── test_info.py │ │ │ │ ├── test_iou3d.py │ │ │ │ ├── test_knn.py │ │ │ │ ├── test_masked_conv2d.py │ │ │ │ ├── test_merge_cells.py │ │ │ │ ├── test_min_area_polygons.py │ │ │ │ ├── test_modulated_deform_conv.py │ │ │ │ ├── test_ms_deformable_attn.py │ │ │ │ ├── test_nms.py │ │ │ │ ├── test_nms_rotated.py │ │ │ │ ├── test_onnx.py │ │ │ │ ├── test_pixel_group.py │ │ │ │ ├── test_points_in_polygons.py │ │ │ │ ├── test_psa_mask.py │ │ │ │ ├── test_riroi_align_rotated.py │ │ │ │ ├── test_roi_align.py │ │ │ │ ├── test_roi_align_rotated.py │ │ │ │ ├── test_roi_pool.py │ │ │ │ ├── test_roiaware_pool3d.py │ │ │ │ ├── test_roipoint_pool3d.py │ │ │ │ ├── test_rotated_feature_align.py │ │ │ │ ├── test_saconv.py │ │ │ │ ├── test_scatter_points.py │ │ │ │ ├── test_syncbn.py │ │ │ │ ├── test_tensorrt.py │ │ │ │ ├── test_tensorrt_preprocess.py │ │ │ │ ├── test_three_interpolate.py │ │ │ │ ├── test_three_nn.py │ │ │ │ ├── test_tin_shift.py │ │ │ │ ├── test_upfirdn2d.py │ │ │ │ └── test_voxelization.py │ │ │ ├── test_parallel.py │ │ │ ├── test_runner/ │ │ │ │ ├── test_basemodule.py │ │ │ │ ├── test_checkpoint.py │ │ │ │ ├── test_dist_utils.py │ │ │ │ ├── test_eval_hook.py │ │ │ │ ├── test_fp16.py │ │ │ │ ├── test_hooks.py │ │ │ │ ├── test_optimizer.py │ │ │ │ ├── test_runner.py │ │ │ │ └── test_utils.py │ │ │ ├── test_utils/ │ │ │ │ ├── test_config.py │ │ │ │ ├── test_env.py │ │ │ │ ├── test_hub.py │ │ │ │ ├── test_logging.py │ │ │ │ ├── test_misc.py │ │ │ │ ├── test_parrots_jit.py │ │ │ │ ├── test_path.py │ │ │ │ ├── test_progressbar.py │ │ │ │ ├── test_registry.py │ │ │ │ ├── test_testing.py │ │ │ │ ├── test_timer.py │ │ │ │ ├── test_trace.py │ │ │ │ └── test_version_utils.py │ │ │ ├── test_video/ │ │ │ │ ├── test_optflow.py │ │ │ │ ├── test_processing.py │ │ │ │ └── test_reader.py │ │ │ └── test_visualization.py │ │ ├── pansharpening/ │ │ │ ├── common/ │ │ │ │ ├── dataset.py │ │ │ │ ├── dataset_hp.py │ │ │ │ ├── evaluate.py │ │ │ │ └── psdata.py │ │ │ ├── configs/ │ │ │ │ ├── __init__.py │ │ │ │ ├── hook_configs.py │ │ │ │ ├── option_bdpn.py │ │ │ │ ├── option_dicnn.py │ │ │ │ ├── option_drpnn.py │ │ │ │ ├── option_fusionnet.py │ │ │ │ ├── option_msdcnn.py │ │ │ │ ├── option_pannet.py │ │ │ │ └── option_pnn.py │ │ │ ├── evaluation/ │ │ │ │ └── ps_evaluate.py │ │ │ ├── models/ │ │ │ │ ├── APNN/ │ │ │ │ │ ├── data_qb.py │ │ │ │ │ ├── data_single_read.py │ │ │ │ │ ├── data_wv2.py │ │ │ │ │ ├── data_wv3.py │ │ │ │ │ ├── data_wv4.py │ │ │ │ │ ├── evaluate.py │ │ │ │ │ ├── main_pre_train_trainData_qb.py │ │ │ │ │ ├── main_pre_train_trainData_wv2.py │ │ │ │ │ ├── main_pre_train_trainData_wv3.py │ │ │ │ │ ├── main_pre_train_trainData_wv4.py │ │ │ │ │ ├── main_test_qb.py │ │ │ │ │ ├── main_test_wv2.py │ │ │ │ │ ├── main_test_wv3.py │ │ │ │ │ ├── main_test_wv4.py │ │ │ │ │ ├── model_qb.py │ │ │ │ │ ├── model_wv2.py │ │ │ │ │ ├── model_wv3.py │ │ │ │ │ ├── model_wv4.py │ │ │ │ │ ├── variance_sacling_initializer.py │ │ │ │ │ └── wald_utilities.py │ │ │ │ ├── BDPN/ │ │ │ │ │ ├── bdpn_main.py │ │ │ │ │ ├── loss_utils.py │ │ │ │ │ ├── main_train_wv3.py │ │ │ │ │ └── model_bdpn.py │ │ │ │ ├── DRPNN/ │ │ │ │ │ ├── drpnn_main.py │ │ │ │ │ └── model_drpnn.py │ │ │ │ ├── DiCNN/ │ │ │ │ │ ├── dicnn_main.py │ │ │ │ │ └── model_dicnn.py │ │ │ │ ├── FusionNet/ │ │ │ │ │ ├── fusionnet_main.py │ │ │ │ │ ├── model_fusionnet.py │ │ │ │ │ └── run_fusionnet.py │ │ │ │ ├── MSDCNN/ │ │ │ │ │ ├── model_msdcnn.py │ │ │ │ │ └── msdcnn_main.py │ │ │ │ ├── PNN/ │ │ │ │ │ ├── model_pnn.py │ │ │ │ │ └── pnn_main.py │ │ │ │ ├── PanNet/ │ │ │ │ │ ├── model_pannet.py │ │ │ │ │ └── pannet_main.py │ │ │ │ └── __init__.py │ │ │ ├── run_pansharpening.py │ │ │ └── run_test_pansharpening.py │ │ ├── pretrained-model/ │ │ │ ├── QB/ │ │ │ │ ├── bdpn.pth │ │ │ │ ├── dicnn1.pth │ │ │ │ ├── drpnn.pth │ │ │ │ ├── fusionnet.pth │ │ │ │ ├── msdcnn.pth │ │ │ │ ├── panet.pth │ │ │ │ ├── pnn.pth │ │ │ │ └── readme.txt │ │ │ ├── WV2/ │ │ │ │ ├── bdpn.pth │ │ │ │ ├── dicnn1.pth │ │ │ │ ├── drpnn.pth │ │ │ │ ├── fusionnet.pth │ │ │ │ ├── msdcnn.pth │ │ │ │ ├── pannet.pth │ │ │ │ ├── pnn.pth │ │ │ │ └── readme.txt │ │ │ ├── WV3/ │ │ │ │ ├── bdpn.pth │ │ │ │ ├── dicnn1.pth │ │ │ │ ├── drpnn.pth │ │ │ │ ├── fusionnet.pth │ │ │ │ ├── msdcnn.pth │ │ │ │ ├── pannet.pth │ │ │ │ └── pnn.pth │ │ │ └── WV4/ │ │ │ ├── bdpn.pth │ │ │ ├── dicnn1.pth │ │ │ ├── drpnn.pth │ │ │ ├── fusionnet.pth │ │ │ ├── msdcnn.pth │ │ │ ├── pannet.pth │ │ │ ├── pnn.pth │ │ │ └── readme.txt │ │ ├── readme.md │ │ └── results/ │ │ └── readme.txt │ ├── readme.md │ └── setup.py ├── 02-Test-toolbox-for-traditional-and-DL(Matlab)/ │ ├── 1_TestData/ │ │ ├── Datasets Testing/ │ │ │ └── Download link for WV3-NewYork test data.txt │ │ ├── QB/ │ │ │ └── readme.txt │ │ ├── WV2/ │ │ │ └── readme.txt │ │ ├── WV3/ │ │ │ └── readme.txt │ │ ├── WV4/ │ │ │ └── readme.txt │ │ └── readme.txt │ ├── 2_DL_Result/ │ │ ├── QB/ │ │ │ └── readme.txt │ │ ├── WV2/ │ │ │ └── readme.txt │ │ ├── WV3/ │ │ │ ├── APNN/ │ │ │ │ └── readme.txt │ │ │ ├── BDPN/ │ │ │ │ └── readme.txt │ │ │ ├── DRPNN/ │ │ │ │ └── readme.txt │ │ │ ├── DiCNN1/ │ │ │ │ └── readme.txt │ │ │ ├── Download link for the 8 DL methods on WV3 dataset.txt │ │ │ ├── FusionNet/ │ │ │ │ └── readme.txt │ │ │ ├── MSDCNN/ │ │ │ │ └── readme.txt │ │ │ ├── PNN/ │ │ │ │ └── readme.txt │ │ │ └── PanNet/ │ │ │ └── readme.txt │ │ ├── WV4/ │ │ │ └── readme.txt │ │ └── readme.txt │ ├── 3_EPS/ │ │ ├── QB/ │ │ │ └── readme.txt │ │ ├── WV2/ │ │ │ └── readme.txt │ │ ├── WV3/ │ │ │ └── readme.txt │ │ ├── WV4/ │ │ │ └── readme.txt │ │ └── readme.txt │ ├── AWLP/ │ │ └── AWLP.m │ ├── Avg_RR_Assessment.tex │ ├── BDSD/ │ │ ├── BDSD.m │ │ ├── BDSD_PC.m │ │ └── C_BDSD.m │ ├── BT-H/ │ │ └── BroveyRegHazeMin.m │ ├── Demo_Full_Resolution.m │ ├── Demo_Reduced_Resolution.m │ ├── FE-HPM/ │ │ ├── FE.m │ │ └── FE_HPM.m │ ├── FR_Assessment.tex │ ├── GLP/ │ │ ├── GS2_GLP.m │ │ ├── MTF_GLP.m │ │ ├── MTF_GLP_FS.m │ │ ├── MTF_GLP_HPM.m │ │ ├── MTF_GLP_HPM_Haze_min.m │ │ └── MTF_GLP_HPM_R.m │ ├── GS/ │ │ ├── GS.m │ │ ├── GSA.m │ │ └── GS_Segm.m │ ├── MF/ │ │ ├── MF_HG_Pansharpen.m │ │ └── Pyr_Dec.m │ ├── PRACS/ │ │ └── PRACS.m │ ├── PWMBF/ │ │ ├── PWMBF.m │ │ ├── compute_PhiTX.m │ │ ├── compute_PhiX.m │ │ ├── readme │ │ └── rwt/ │ │ ├── AUTHORS │ │ ├── CMakeLists.txt │ │ ├── HACKING │ │ ├── INSTALL │ │ ├── LICENSE │ │ ├── bin/ │ │ │ ├── HardTh.m │ │ │ ├── SoftTh.m │ │ │ ├── compile.m │ │ │ ├── daubcqf.m │ │ │ ├── denoise.m │ │ │ ├── makesig.m │ │ │ ├── mdwt.m │ │ │ ├── mdwt.mexw64 │ │ │ ├── midwt.m │ │ │ ├── midwt.mexw64 │ │ │ ├── mirdwt.m │ │ │ ├── mirdwt.mexw64 │ │ │ ├── mrdwt.m │ │ │ ├── mrdwt.mexw64 │ │ │ └── setopt.m │ │ ├── dist/ │ │ │ ├── 2.01/ │ │ │ │ ├── INSTALL │ │ │ │ ├── README │ │ │ │ ├── RWT-2.01.tar.Z │ │ │ │ └── doc/ │ │ │ │ └── index.html │ │ │ └── 2.3/ │ │ │ ├── INSTALL │ │ │ ├── INSTALL_PRECOMPILED │ │ │ ├── LICENSE │ │ │ └── README │ │ ├── doc/ │ │ │ ├── CMakeLists.txt │ │ │ └── Doxyfile.in │ │ ├── lib/ │ │ │ ├── inc/ │ │ │ │ ├── rwt_init.h │ │ │ │ ├── rwt_platform.h │ │ │ │ └── rwt_transforms.h │ │ │ └── src/ │ │ │ ├── CMakeLists.txt │ │ │ ├── dwt.c │ │ │ ├── idwt.c │ │ │ ├── init.c │ │ │ ├── irdwt.c │ │ │ ├── platform.c │ │ │ └── rdwt.c │ │ ├── mex/ │ │ │ ├── mdwt.c │ │ │ ├── midwt.c │ │ │ ├── mirdwt.c │ │ │ └── mrdwt.c │ │ ├── python/ │ │ │ ├── CMakeLists.txt │ │ │ ├── LICENSE.numpy │ │ │ ├── numpy.i │ │ │ ├── rwt.i │ │ │ └── test_rwt.py │ │ ├── readme │ │ └── tests/ │ │ ├── matlab_xunit/ │ │ │ ├── Readme.html │ │ │ ├── architecture/ │ │ │ │ ├── class_diagram_a.vsd │ │ │ │ ├── class_diagram_b.vsd │ │ │ │ ├── class_diagram_c.vsd │ │ │ │ ├── html/ │ │ │ │ │ └── matlab_xunit_architecture.html │ │ │ │ ├── matlab_xunit_architecture.m │ │ │ │ └── testSample.m │ │ │ ├── doc/ │ │ │ │ ├── +abc/ │ │ │ │ │ └── +tests/ │ │ │ │ │ ├── test_that.m │ │ │ │ │ └── test_this.m │ │ │ │ ├── +abc_tests/ │ │ │ │ │ ├── test_that.m │ │ │ │ │ └── test_this.m │ │ │ │ ├── exException.m │ │ │ │ ├── exQuickStart.m │ │ │ │ ├── exRunSpecificTest.m │ │ │ │ ├── exRunTestsInADirectory.m │ │ │ │ ├── exRunTestsInPackage.m │ │ │ │ ├── exSilentRunning.m │ │ │ │ ├── exSubfunctionTests.m │ │ │ │ ├── exTestCase.m │ │ │ │ ├── exTestCaseSearching.m │ │ │ │ ├── exTestFixtures.m │ │ │ │ ├── exTolerance.m │ │ │ │ ├── example_quick_start/ │ │ │ │ │ ├── testFliplrMatrix.m │ │ │ │ │ └── testFliplrVector.m │ │ │ │ ├── example_subfunction_tests/ │ │ │ │ │ └── testFliplr.m │ │ │ │ ├── examples_general/ │ │ │ │ │ ├── TestUsingTestCase.m │ │ │ │ │ ├── testBadSinTest.m │ │ │ │ │ ├── testCos.m │ │ │ │ │ ├── testSetupExample.m │ │ │ │ │ ├── testSin.m │ │ │ │ │ └── testWithSetupError.m │ │ │ │ ├── file_exchange_description.txt │ │ │ │ ├── html/ │ │ │ │ │ ├── exException.html │ │ │ │ │ ├── exQuickStart.html │ │ │ │ │ ├── exRunSpecificTest.html │ │ │ │ │ ├── exRunTestsInADirectory.html │ │ │ │ │ ├── exRunTestsInPackage.html │ │ │ │ │ ├── exSilentRunning.html │ │ │ │ │ ├── exSubfunctionTests.html │ │ │ │ │ ├── exTestCase.html │ │ │ │ │ ├── exTestCaseSearching.html │ │ │ │ │ ├── exTestFixtures.html │ │ │ │ │ └── exTolerance.html │ │ │ │ ├── index.html │ │ │ │ ├── release-history.html │ │ │ │ └── xunit_product_page.html │ │ │ ├── license.txt │ │ │ ├── obsolete/ │ │ │ │ ├── +mtest/ │ │ │ │ │ └── +utils/ │ │ │ │ │ ├── Contents.m │ │ │ │ │ ├── compareFloats.m │ │ │ │ │ ├── containsRegexp.m │ │ │ │ │ ├── generateDoc.m │ │ │ │ │ ├── isAlmostEqual.m │ │ │ │ │ ├── isSetUpString.m │ │ │ │ │ ├── isTearDownString.m │ │ │ │ │ ├── isTestCaseSubclass.m │ │ │ │ │ ├── isTestString.m │ │ │ │ │ └── parseFloatAssertInputs.m │ │ │ │ ├── assertAlmostEqual.m │ │ │ │ ├── mtest.m │ │ │ │ └── tests/ │ │ │ │ ├── MtestTest.m │ │ │ │ ├── cwd_test/ │ │ │ │ │ ├── TestCaseSubclass.m │ │ │ │ │ ├── testFoobar.m │ │ │ │ │ └── testSubfunctions.m │ │ │ │ ├── testAssertAlmostEqual.m │ │ │ │ └── testIsAlmostEqual.m │ │ │ ├── readme │ │ │ ├── tests/ │ │ │ │ ├── +xunit/ │ │ │ │ │ └── +mocktests/ │ │ │ │ │ ├── +subpkg/ │ │ │ │ │ │ └── test_a_bit.m │ │ │ │ │ ├── A.m │ │ │ │ │ ├── B.m │ │ │ │ │ ├── FooTest.m │ │ │ │ │ ├── helper_that.m │ │ │ │ │ ├── test_that.m │ │ │ │ │ └── test_this.m │ │ │ │ ├── Readme.m │ │ │ │ ├── RuntestsTest.m │ │ │ │ ├── TestCaseTest.m │ │ │ │ ├── TestCaseWithAddPathTest.m │ │ │ │ ├── TestFuncHandleTests.m │ │ │ │ ├── TestRunLoggerTest.m │ │ │ │ ├── TestSuiteTest.m │ │ │ │ ├── ThrowsExceptionTest.m │ │ │ │ ├── almost_black.tif │ │ │ │ ├── black.tif │ │ │ │ ├── cwd_test/ │ │ │ │ │ ├── TestCaseSubclass.m │ │ │ │ │ ├── testFoobar.m │ │ │ │ │ └── testSubfunctions.m │ │ │ │ ├── dir1/ │ │ │ │ │ └── test_thatPasses.m │ │ │ │ ├── dir2/ │ │ │ │ │ └── test_thatFails.m │ │ │ │ ├── empty_file │ │ │ │ ├── helper_classes/ │ │ │ │ │ ├── BadFixture.m │ │ │ │ │ ├── Contents.m │ │ │ │ │ ├── ExceptionNotThrownTest.m │ │ │ │ │ ├── FailingTestCase.m │ │ │ │ │ ├── LoggingTestCase.m │ │ │ │ │ ├── NoTestMethods.m │ │ │ │ │ ├── PassingExceptionTest.m │ │ │ │ │ ├── TestsToBeDiscovered.m │ │ │ │ │ ├── TwoPassingTests.m │ │ │ │ │ ├── WrongExceptionThrownTest.m │ │ │ │ │ ├── notTestString.m │ │ │ │ │ ├── testFunctionHandlesA.m │ │ │ │ │ ├── testFunctionHandlesB.m │ │ │ │ │ ├── testFunctionHandlesC.m │ │ │ │ │ ├── testFunctionHandlesD.m │ │ │ │ │ ├── testFunctionHandlesE.m │ │ │ │ │ ├── testFunctionHandlesTeardownNoSetup.m │ │ │ │ │ └── testSimple.m │ │ │ │ ├── testAssertEqual.m │ │ │ │ ├── testAssertExceptionThrown.m │ │ │ │ ├── testAssertFalse.m │ │ │ │ ├── testAssertTrue.m │ │ │ │ ├── testContainsRegexp.m │ │ │ │ ├── testIsSetUpString.m │ │ │ │ ├── testIsTearDownString.m │ │ │ │ ├── testIsTestCaseSubclass.m │ │ │ │ ├── testIsTestString.m │ │ │ │ ├── testRuntestsWithDirectoryName.m │ │ │ │ ├── test_TestSuiteInDir.m │ │ │ │ ├── test_arrayToString.m │ │ │ │ ├── test_assertElementsAlmostEqual.m │ │ │ │ ├── test_assertFilesEqual.m │ │ │ │ ├── test_assertVectorsAlmostEqual.m │ │ │ │ ├── test_compareFloats.m │ │ │ │ ├── test_comparisonMessage.m │ │ │ │ ├── test_packageName.m │ │ │ │ ├── test_parseFloatAssertInputs.m │ │ │ │ └── test_stringToCellArray.m │ │ │ └── xunit/ │ │ │ ├── +xunit/ │ │ │ │ └── +utils/ │ │ │ │ ├── Contents.m │ │ │ │ ├── arrayToString.m │ │ │ │ ├── compareFloats.m │ │ │ │ ├── comparisonMessage.m │ │ │ │ ├── containsRegexp.m │ │ │ │ ├── generateDoc.m │ │ │ │ ├── isAlmostEqual.m │ │ │ │ ├── isSetUpString.m │ │ │ │ ├── isTearDownString.m │ │ │ │ ├── isTestCaseSubclass.m │ │ │ │ ├── isTestString.m │ │ │ │ ├── parseFloatAssertInputs.m │ │ │ │ └── stringToCellArray.m │ │ │ ├── CommandWindowTestRunDisplay.m │ │ │ ├── Contents.m │ │ │ ├── FunctionHandleTestCase.m │ │ │ ├── TestCase.m │ │ │ ├── TestCaseInDir.m │ │ │ ├── TestCaseWithAddPath.m │ │ │ ├── TestComponent.m │ │ │ ├── TestComponentInDir.m │ │ │ ├── TestRunDisplay.m │ │ │ ├── TestRunLogger.m │ │ │ ├── TestRunMonitor.m │ │ │ ├── TestSuite.m │ │ │ ├── TestSuiteInDir.m │ │ │ ├── VerboseTestRunDisplay.m │ │ │ ├── assertElementsAlmostEqual.m │ │ │ ├── assertEqual.m │ │ │ ├── assertExceptionThrown.m │ │ │ ├── assertFalse.m │ │ │ ├── assertFilesEqual.m │ │ │ ├── assertTrue.m │ │ │ ├── assertVectorsAlmostEqual.m │ │ │ ├── initTestSuite.m │ │ │ └── runtests.m │ │ ├── octave/ │ │ │ ├── assertEqual.m │ │ │ ├── assertVectorsAlmostEqual.m │ │ │ ├── runtests.m │ │ │ ├── test_denoise.m │ │ │ ├── test_makesig.m │ │ │ ├── test_mdwt.m │ │ │ ├── test_midwt.m │ │ │ ├── test_mirdwt.m │ │ │ ├── test_mrdwt.m │ │ │ └── test_setopt.m │ │ ├── readme │ │ ├── runtests.m │ │ ├── test_daubcqf.m │ │ ├── test_denoise.m │ │ ├── test_makesig.m │ │ ├── test_mdwt.m │ │ ├── test_midwt.m │ │ ├── test_mirdwt.m │ │ ├── test_mrdwt.m │ │ └── test_setopt.m │ ├── Quality_Indices/ │ │ ├── D_lambda.m │ │ ├── D_lambda_K.m │ │ ├── D_s.m │ │ ├── ERGAS.m │ │ ├── HQNR.m │ │ ├── Q.m │ │ ├── QNR.m │ │ ├── SAM.m │ │ ├── SCC.m │ │ ├── img_qi.m │ │ ├── norm_blocco.m │ │ ├── onion_mult.m │ │ ├── onion_mult2D.m │ │ ├── onions_quality.m │ │ ├── q2n.m │ │ └── ssim.m │ ├── RR/ │ │ ├── RRpansharp.m │ │ ├── manopt/ │ │ │ ├── CLA.txt │ │ │ ├── COPYING.txt │ │ │ ├── CREDITS.txt │ │ │ ├── LICENSE.txt │ │ │ ├── README.txt │ │ │ ├── checkinstall/ │ │ │ │ └── basicexample.m │ │ │ ├── examples/ │ │ │ │ ├── PCA_stochastic.m │ │ │ │ ├── dominant_invariant_subspace.m │ │ │ │ ├── dominant_invariant_subspace_complex.m │ │ │ │ ├── elliptope_SDP.m │ │ │ │ ├── elliptope_SDP_complex.m │ │ │ │ ├── essential_svd.m │ │ │ │ ├── generalized_eigenvalue_computation.m │ │ │ │ ├── generalized_procrustes.m │ │ │ │ ├── low_rank_dist_completion.m │ │ │ │ ├── low_rank_matrix_completion.m │ │ │ │ ├── low_rank_tensor_completion.m │ │ │ │ ├── maxcut.m │ │ │ │ ├── nonlinear_eigenspace.m │ │ │ │ ├── packing_on_the_sphere.m │ │ │ │ ├── positive_definite_karcher_mean.m │ │ │ │ ├── radio_interferometric_calibration.m │ │ │ │ ├── robust_pca.m │ │ │ │ ├── shapefit_smoothed.m │ │ │ │ ├── sparse_pca.m │ │ │ │ ├── thomson_problem.m │ │ │ │ └── truncated_svd.m │ │ │ ├── importmanopt.m │ │ │ ├── manopt/ │ │ │ │ ├── core/ │ │ │ │ │ ├── StoreDB.m │ │ │ │ │ ├── applyStatsfun.m │ │ │ │ │ ├── canGetApproxGradient.m │ │ │ │ │ ├── canGetApproxHessian.m │ │ │ │ │ ├── canGetCost.m │ │ │ │ │ ├── canGetDirectionalDerivative.m │ │ │ │ │ ├── canGetEuclideanGradient.m │ │ │ │ │ ├── canGetGradient.m │ │ │ │ │ ├── canGetHessian.m │ │ │ │ │ ├── canGetLinesearch.m │ │ │ │ │ ├── canGetPartialEuclideanGradient.m │ │ │ │ │ ├── canGetPartialGradient.m │ │ │ │ │ ├── canGetPrecon.m │ │ │ │ │ ├── canGetSqrtPrecon.m │ │ │ │ │ ├── canGetSubgradient.m │ │ │ │ │ ├── getApproxGradient.m │ │ │ │ │ ├── getApproxHessian.m │ │ │ │ │ ├── getCost.m │ │ │ │ │ ├── getCostGrad.m │ │ │ │ │ ├── getDirectionalDerivative.m │ │ │ │ │ ├── getEuclideanGradient.m │ │ │ │ │ ├── getGlobalDefaults.m │ │ │ │ │ ├── getGradient.m │ │ │ │ │ ├── getGradientFD.m │ │ │ │ │ ├── getHessian.m │ │ │ │ │ ├── getHessianFD.m │ │ │ │ │ ├── getLinesearch.m │ │ │ │ │ ├── getPartialEuclideanGradient.m │ │ │ │ │ ├── getPartialGradient.m │ │ │ │ │ ├── getPrecon.m │ │ │ │ │ ├── getSqrtPrecon.m │ │ │ │ │ ├── getStore.m │ │ │ │ │ ├── getSubgradient.m │ │ │ │ │ ├── handle_light.m │ │ │ │ │ ├── mergeOptions.m │ │ │ │ │ ├── purgeStoredb.m │ │ │ │ │ ├── setStore.m │ │ │ │ │ └── stoppingcriterion.m │ │ │ │ ├── manifolds/ │ │ │ │ │ ├── complexcircle/ │ │ │ │ │ │ ├── complexcirclefactory.m │ │ │ │ │ │ └── realphasefactory.m │ │ │ │ │ ├── essential/ │ │ │ │ │ │ ├── README_Essential.txt │ │ │ │ │ │ ├── essential_costE2cost.m │ │ │ │ │ │ ├── essential_egradE2egrad.m │ │ │ │ │ │ ├── essential_ehessE2ehess.m │ │ │ │ │ │ ├── essential_flat.m │ │ │ │ │ │ ├── essential_hat3.m │ │ │ │ │ │ ├── essential_sharp.m │ │ │ │ │ │ ├── essentialfactory.m │ │ │ │ │ │ └── privateessential/ │ │ │ │ │ │ ├── essential_closestRepresentative.m │ │ │ │ │ │ ├── essential_distMinAngle.m │ │ │ │ │ │ ├── essential_distMinAnglePair.m │ │ │ │ │ │ ├── essential_distMinAnglePair_base.m │ │ │ │ │ │ ├── essential_distMinAnglePair_computeDfBreak.m │ │ │ │ │ │ ├── essential_distMinAnglePair_dfNewton.m │ │ │ │ │ │ ├── essential_distMinAnglePair_discontinuityDistance.m │ │ │ │ │ │ ├── essential_distMinAnglePair_ft.m │ │ │ │ │ │ ├── essential_distMinAnglePair_ftFromQ.m │ │ │ │ │ │ ├── essential_distMinAnglePair_test.m │ │ │ │ │ │ └── modAngle.m │ │ │ │ │ ├── euclidean/ │ │ │ │ │ │ ├── centeredmatrixfactory.m │ │ │ │ │ │ ├── euclideancomplexfactory.m │ │ │ │ │ │ ├── euclideanfactory.m │ │ │ │ │ │ ├── shapefitfactory.m │ │ │ │ │ │ ├── skewsymmetricfactory.m │ │ │ │ │ │ └── symmetricfactory.m │ │ │ │ │ ├── fixedrank/ │ │ │ │ │ │ ├── fixedrankMNquotientfactory.m │ │ │ │ │ │ ├── fixedrankembeddedfactory.m │ │ │ │ │ │ ├── fixedrankfactory_2factors.m │ │ │ │ │ │ ├── fixedrankfactory_2factors_preconditioned.m │ │ │ │ │ │ ├── fixedrankfactory_2factors_subspace_projection.m │ │ │ │ │ │ ├── fixedrankfactory_3factors.m │ │ │ │ │ │ └── fixedrankfactory_3factors_preconditioned.m │ │ │ │ │ ├── fixedranktensors/ │ │ │ │ │ │ ├── fixedrankfactory_tucker_preconditioned.m │ │ │ │ │ │ └── tucker2multiarray.m │ │ │ │ │ ├── grassmann/ │ │ │ │ │ │ ├── grassmanncomplexfactory.m │ │ │ │ │ │ ├── grassmannfactory.m │ │ │ │ │ │ └── grassmanngeneralizedfactory.m │ │ │ │ │ ├── multinomial/ │ │ │ │ │ │ └── multinomialfactory.m │ │ │ │ │ ├── oblique/ │ │ │ │ │ │ ├── obliquecomplexfactory.m │ │ │ │ │ │ └── obliquefactory.m │ │ │ │ │ ├── rotations/ │ │ │ │ │ │ ├── randrot.m │ │ │ │ │ │ ├── randskew.m │ │ │ │ │ │ └── rotationsfactory.m │ │ │ │ │ ├── specialeuclidean/ │ │ │ │ │ │ └── specialeuclideanfactory.m │ │ │ │ │ ├── sphere/ │ │ │ │ │ │ ├── spherecomplexfactory.m │ │ │ │ │ │ ├── spherefactory.m │ │ │ │ │ │ └── spheresymmetricfactory.m │ │ │ │ │ ├── stiefel/ │ │ │ │ │ │ ├── stiefelcomplexfactory.m │ │ │ │ │ │ ├── stiefelfactory.m │ │ │ │ │ │ ├── stiefelgeneralizedfactory.m │ │ │ │ │ │ └── stiefelstackedfactory.m │ │ │ │ │ └── symfixedrank/ │ │ │ │ │ ├── elliptopefactory.m │ │ │ │ │ ├── spectrahedronfactory.m │ │ │ │ │ ├── symfixedrankYYcomplexfactory.m │ │ │ │ │ ├── symfixedrankYYfactory.m │ │ │ │ │ └── sympositivedefinitefactory.m │ │ │ │ ├── readme │ │ │ │ ├── solvers/ │ │ │ │ │ ├── barzilaiborwein/ │ │ │ │ │ │ └── barzilaiborwein.m │ │ │ │ │ ├── bfgs/ │ │ │ │ │ │ └── rlbfgs.m │ │ │ │ │ ├── conjugategradient/ │ │ │ │ │ │ ├── conjugategradient.m │ │ │ │ │ │ └── linear_conjugategradient.m │ │ │ │ │ ├── gradientapproximations/ │ │ │ │ │ │ └── approxgradientFD.m │ │ │ │ │ ├── hessianapproximations/ │ │ │ │ │ │ └── approxhessianFD.m │ │ │ │ │ ├── linesearch/ │ │ │ │ │ │ ├── linesearch.m │ │ │ │ │ │ ├── linesearch_adaptive.m │ │ │ │ │ │ ├── linesearch_decrease.m │ │ │ │ │ │ └── linesearch_hint.m │ │ │ │ │ ├── neldermead/ │ │ │ │ │ │ ├── centroid.m │ │ │ │ │ │ └── neldermead.m │ │ │ │ │ ├── preconditioners/ │ │ │ │ │ │ └── preconhessiansolve.m │ │ │ │ │ ├── pso/ │ │ │ │ │ │ └── pso.m │ │ │ │ │ ├── steepestdescent/ │ │ │ │ │ │ └── steepestdescent.m │ │ │ │ │ ├── stochasticgradient/ │ │ │ │ │ │ ├── stepsize_sg.m │ │ │ │ │ │ └── stochasticgradient.m │ │ │ │ │ └── trustregions/ │ │ │ │ │ ├── license for original GenRTR code.txt │ │ │ │ │ ├── tCG.m │ │ │ │ │ └── trustregions.m │ │ │ │ └── tools/ │ │ │ │ ├── checkdiff.m │ │ │ │ ├── checkgradient.m │ │ │ │ ├── checkhessian.m │ │ │ │ ├── checkretraction.m │ │ │ │ ├── criticalpointfinder.m │ │ │ │ ├── dexpm.m │ │ │ │ ├── dfunm.m │ │ │ │ ├── diagsum.m │ │ │ │ ├── dlogm.m │ │ │ │ ├── dsqrtm.m │ │ │ │ ├── grammatrix.m │ │ │ │ ├── hashmd5.m │ │ │ │ ├── hessianextreme.m │ │ │ │ ├── hessianmatrix.m │ │ │ │ ├── hessianspectrum.m │ │ │ │ ├── identify_linear_piece.m │ │ │ │ ├── lincomb.m │ │ │ │ ├── manoptsolve.m │ │ │ │ ├── matrixlincomb.m │ │ │ │ ├── multihconj.m │ │ │ │ ├── multiherm.m │ │ │ │ ├── multiprod.m │ │ │ │ ├── multiprodmultitransp_license.txt │ │ │ │ ├── multiscale.m │ │ │ │ ├── multiskew.m │ │ │ │ ├── multisqnorm.m │ │ │ │ ├── multisym.m │ │ │ │ ├── multitrace.m │ │ │ │ ├── multitransp.m │ │ │ │ ├── orthogonalize.m │ │ │ │ ├── plotprofile.m │ │ │ │ ├── powermanifold.m │ │ │ │ ├── productmanifold.m │ │ │ │ ├── smallestinconvexhull.m │ │ │ │ ├── statsfunhelper.m │ │ │ │ ├── surfprofile.m │ │ │ │ ├── tangent2vec.m │ │ │ │ ├── tangentorthobasis.m │ │ │ │ ├── tangentspacefactory.m │ │ │ │ └── tangentspherefactory.m │ │ │ ├── manopt_version.m │ │ │ └── readme │ │ └── readme │ ├── RR_Assessment.tex │ ├── SR-D/ │ │ ├── CS.m │ │ ├── Dict_Learn.m │ │ ├── OMP.m │ │ └── OMP_Rec_Detile.m │ ├── TV/ │ │ └── TV_pansharpen.m │ ├── Tools/ │ │ ├── LPfilter.m │ │ ├── LPfilterGauss.m │ │ ├── LPfilterPlusDec.m │ │ ├── MTF.m │ │ ├── MTF_PAN.m │ │ ├── estimation_alpha.m │ │ ├── genMTF.m │ │ ├── gen_LP_image.m │ │ ├── indexes_evaluation.m │ │ ├── indexes_evaluation_FS.m │ │ ├── indwt2_working.m │ │ ├── interp23tap.m │ │ ├── k_means_clustering.m │ │ ├── matrix2latex.m │ │ ├── ndwt2_working.m │ │ ├── printAllImagesImWriteFR.m │ │ ├── printAllImagesImWriteRR.m │ │ ├── printImage.m │ │ ├── rectangleonimage.m │ │ ├── resize_images.m │ │ ├── showImage4.m │ │ ├── showImage4LR.m │ │ ├── showImage4LR_zoomin.m │ │ ├── showImage4_zoomin.m │ │ ├── showImage8.m │ │ ├── showImage8LR.m │ │ ├── showImage8LR_zoomin.m │ │ ├── showImage8_zoomin.m │ │ ├── showImagesAll.m │ │ ├── showImagesAllOld.m │ │ ├── showPan.m │ │ ├── showPan_zoomin.m │ │ ├── tight_subplot.m │ │ ├── viewimage.m │ │ └── viewimage2.m │ └── readme.md ├── 03-Data-Simulation(Matlab)/ │ ├── 01-DataSimu/ │ │ └── QB/ │ │ └── readme.md.txt │ ├── Demo_DataSimu_qb.m │ ├── imgs/ │ │ └── readme │ └── segImg_new.m ├── LICENSE ├── README.md └── docs/ ├── en/ │ ├── DLPanToolbox/ │ │ ├── Evaluation.md │ │ ├── Example.md │ │ ├── PreProcess.md │ │ └── Simulation.md │ ├── Makefile │ ├── _static/ │ │ └── css/ │ │ └── readthedocs.css │ ├── _templates/ │ │ └── classtemplate.rst │ ├── citation.md │ ├── conf.py │ ├── docutils.conf │ ├── faq.md │ ├── get_started/ │ │ ├── Installation.md │ │ └── Introduction.md │ ├── index.rst │ ├── make.bat │ └── switch_language.md ├── requirements.txt ├── run.sh └── zh-cn/ ├── DLPanToolbox/ │ ├── Evaluation.md │ ├── Example.md │ ├── PreProcess.md │ └── Simulation.md ├── Makefile ├── _static/ │ └── css/ │ └── readthedocs.css ├── _templates/ │ └── classtemplate.rst ├── citation.md ├── conf.py ├── docutils.conf ├── faq.md ├── get_started/ │ ├── Installation.md │ └── Introduction.md ├── index.rst ├── make.bat ├── related.md └── switch_language.md ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ *.log *.pyc *.xml *.json *.mat *.eps *.cpython-37.pyc /DLPan-Toolbox/01-DL-toolbox(Pytorch)/results/* /bak/* ================================================ FILE: 01-DL-toolbox(Pytorch)/LICENSE ================================================ GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/AutoDL/__init__.py ================================================ # GPL License # Copyright (C) 2021 , UESTC # All Rights Reserved # @Author : Xiao Wu, LiangJian Deng # @reference: from UDL.Basis.python_sub_class import PanSharpeningModel, TaskDispatcher, ModelDispatcher import UDL.Basis.option def build_model(arch, task, cfg=None): if task == "pansharpening": from UDL.pansharpening.models import PanSharpeningModel as MODELS return MODELS.build_model(cfg) else: raise NotImplementedError(f"It's not supported in {task}") def getDataSession(cfg): task = cfg.task if task in ["pansharpening"]: from UDL.pansharpening.common.psdata import PansharpeningSession as DataSession else: raise NotImplementedError return DataSession(cfg) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/AutoDL/trainer.py ================================================ # GPL License # Copyright (C) UESTC # All Rights Reserved # @Author : Xiao Wu, LiangJian Deng # @reference: import argparse import copy import os import os.path as osp import warnings import random import numpy as np import torch import torch.distributed as dist import time tic = time.time() # 1.14s import sys sys.path.append('../..') sys.path.append('../mmcv') from UDL.AutoDL import build_model, getDataSession, ModelDispatcher from UDL.Basis.auxiliary import init_random_seed, set_random_seed from mmcv.utils.logging import print_log, create_logger # 1.5s from mmcv.runner import init_dist, find_latest_checkpoint from mmcv.parallel import MMDataParallel, MMDistributedDataParallel from mmcv.runner import (DistSamplerSeedHook, EpochBasedRunner, Fp16OptimizerHook, OptimizerHook, build_optimizer, build_runner, get_dist_info) # 10s # from mmdet.datasets import (build_dataloader, build_dataset, # replace_ImageToTensor) def trainer(cfg, logger, distributed=False, meta=None): model, criterion, optimizer, scheduler = build_model(cfg.arch, cfg.task, cfg) if hasattr(model, 'init_weights'): model.init_weights() sess = getDataSession(cfg) if cfg.eval: cfg.workflow = [('val', 1)] if not any('train' in mode for mode, _ in cfg.workflow): cfg.eval = True # put model on gpus if distributed: find_unused_parameters = cfg.get('find_unused_parameters', False) # Sets the `find_unused_parameters` parameter in # torch.nn.parallel.DistributedDataParallel model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False, find_unused_parameters=find_unused_parameters) else: if not hasattr(model, 'train'): if isinstance(model.model, dict): for name, m in model.model.items(): model.model[name] = MMDataParallel(m, device_ids=cfg.gpu_ids) else: model.model = MMDataParallel(model.model, device_ids=cfg.gpu_ids) else: model = MMDataParallel(model, device_ids=cfg.gpu_ids) if cfg.get('optimizer', None) is not None: optimizer = build_optimizer(model, cfg.optimizer) if 'runner' not in cfg: cfg.runner = { 'type': 'EpochBasedRunner', 'max_epochs': cfg.epochs # argparser } warnings.warn( 'config is now expected to have a `runner` section, ' 'please set `runner` in your config.', UserWarning) else: if 'epochs' in cfg and 'max_iters' not in cfg.runner: cfg.runner['max_epochs'] = cfg.epochs # assert cfg.epochs == cfg.runner['max_epochs'], print(cfg.epochs, cfg.runner['max_epochs']) runner = build_runner( cfg.runner, default_args=dict( model=model, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta, opt_cfg={'print_freq': cfg.print_freq, 'accumulated_step': cfg.accumulated_step, 'clip_max_norm': cfg.clip_max_norm, 'dataset': cfg.dataset, 'img_range': cfg.img_range, 'metrics': cfg.metrics, 'save_fmt': cfg.save_fmt, 'mode': cfg.mode, 'eval': cfg.eval, 'save_dir': cfg.work_dir + "/results"})) # an ugly workaround to make .log and .log.json filenames the same # runner.timestamp = timestamp # fp16 setting fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: optimizer_config = Fp16OptimizerHook( **cfg.optimizer_config, **fp16_cfg, distributed=distributed) elif distributed and 'type' not in cfg.optimizer_config: optimizer_config = OptimizerHook(**cfg.optimizer_config) else: optimizer_config = cfg.get('optimizer_config', None) ############################################################ # register training hooks ############################################################ if cfg.get('config', None) is not None: ''' optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=None) lr_config = dict(policy='step', step=[100, 150]) checkpoint_config = dict(interval=1) log_config = dict( interval=100, hooks=[ dict(type='TextLoggerHook'), # dict(type='TensorboardLoggerHook') ]) ''' runner.register_training_hooks( cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config, cfg.get('momentum_config', None), custom_hooks_config=cfg.get('custom_hooks', None)) elif cfg.get('log_config', None) is None and len(cfg.workflow) and cfg.workflow[0][0] != 'simple_train': if cfg.mode == 'nni': runner.register_custom_hooks({'type': 'NNIHook', 'priority': 'very_low'}) if scheduler is not None: runner.register_lr_hook(dict(policy=scheduler.__class__.__name__[:-2], step=scheduler.step_size)) runner.register_checkpoint_hook( dict(type='ModelCheckpoint', indicator='loss', save_top_k=cfg.save_top_k, print_freq=cfg.save_print_freq)) runner.register_optimizer_hook(dict(grad_clip=10)) # ExternOptimizer runner.register_timer_hook(dict(type='IterTimerHook')) log_config = [dict(type='TextLoggerHook')] if cfg.use_tfb: log_config.append(dict(type='TensorboardLoggerHook')) runner.register_logger_hooks(dict( interval=cfg.print_freq, hooks=log_config)) else: runner.register_checkpoint_hook(dict(type='ModelCheckpoint', indicator='loss')) if distributed: if isinstance(runner, EpochBasedRunner): runner.register_hook(DistSamplerSeedHook()) data_loaders = {} ############################################################ # load data ############################################################ for flow in cfg.workflow: mode, _ = flow if 'val' in mode: # cfg.dataset = cfg.dataset + '_OrigScale_multiExm1.h5' # cfg.dataset = cfg.dataset + '_multiExm1.h5' eval_loader, eval_sampler = sess.get_eval_dataloader(cfg.dataset[mode], distributed) eval_cfg = cfg.get('evaluation', {}) eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner' from mmcv.runner import EvalHook, DistEvalHook eval_hook = DistEvalHook if distributed else EvalHook # In this PR (https://github.com/open-mmlab/mmcv/pull/1193), the # priority of IterTimerHook has been modified from 'NORMAL' to 'LOW'. if mode != 'simple_val': runner.register_hook( eval_hook(eval_loader, **eval_cfg), priority='LOW') data_loaders[mode] = eval_loader # if len(cfg.workflow) == 0: # cfg.workflow.append(('val', 1)) if 'train' in mode: train_loader, train_sampler = sess.get_dataloader(cfg.dataset[mode], distributed) if cfg.once_epoch: train_loader = iter(list(train_loader)) data_loaders[mode] = train_loader if len(cfg.workflow) == 0: cfg.workflow.append(('simple_train', 1)) ############################################################ # load model ############################################################ resume_from = None if cfg.get('resume_from', None) is None and cfg.get('auto_resume'): resume_from = find_latest_checkpoint(cfg.work_dir) if resume_from is not None: cfg.resume_from = resume_from # if cfg.get('resume_from', None): runner.resume(cfg.resume_from, cfg.resume_mode, cfg.reset_lr, cfg.lr) if cfg.get('load_from', None) and cfg.get('resume_from', None) is not None: runner.load_checkpoint(cfg.load_from, cfg.resume_mode) ############################################################ # run train/val/test ############################################################ runner.run(data_loaders, cfg.workflow) def main(cfg): # init distributed env first, since logger depends on the dist info. if cfg.launcher == 'none': distributed = False else: distributed = True init_dist(cfg.launcher, **cfg.dist_params) # re-set gpu_ids with distributed training mode _, world_size = get_dist_info() cfg.gpu_ids = range(world_size) logger, out_dir, model_save_dir, tfb_dir = create_logger(cfg, cfg.experimental_desc, 0) cfg.out_dir = cfg.work_dir = model_save_dir seed = init_random_seed(cfg.seed) print_log(f'Set random seed to {seed}', logger=logger) set_random_seed(seed) # if cfg.checkpoint_config is not None: # # save mmdet version, config file content and class names in # # checkpoints as meta data # cfg.checkpoint_config.meta = dict( # mmdet_version=__version__ + get_git_hash()[:7], # CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience trainer( cfg, logger, distributed=distributed, meta={}) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/__init__.py ================================================ from ..auxiliary.utils import AverageMeter, accuracy, MetricLogger, SmoothedValue, set_random_seed, init_random_seed, show_memory_info ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/base.py ================================================ from nvidia.dali.plugin.pytorch import DALIGenericIterator class DALIDataloader(DALIGenericIterator): def __init__(self, pipeline, size, batch_size, output_map=["data", "label"], auto_reset=True, onehot_label=False): # self.size = size self.batch_size = batch_size self.onehot_label = onehot_label self.output_map = output_map super().__init__(pipelines=pipeline, size=size, auto_reset=auto_reset, output_map=output_map) def __next__(self): if self._first_batch is not None: batch = self._first_batch self._first_batch = None return batch data = super().__next__()[0] if self.onehot_label: return [data[self.output_map[0]], data[self.output_map[1]].squeeze().long()] else: return [data[self.output_map[0]], data[self.output_map[1]]] def __len__(self): if self.size % self.batch_size == 0: return self.size // self.batch_size else: return self.size // self.batch_size + 1 ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/fp16_utils.py ================================================ import torch import torch.nn as nn from torch.autograd import Variable from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors class tofp16(nn.Module): """ Utility module that implements:: def forward(self, input): return input.half() """ def __init__(self): super(tofp16, self).__init__() def forward(self, input): return input.half() def BN_convert_float(module): """ Utility function for network_to_half(). Retained for legacy purposes. """ if isinstance(module, torch.nn.modules.batchnorm._BatchNorm) and module.affine is True: module.float() for child in module.children(): BN_convert_float(child) return module def network_to_half(network): """ Convert model to half precision in a batchnorm-safe way. Retained for legacy purposes. It is recommended to use FP16Model. """ return nn.Sequential(tofp16(), BN_convert_float(network.half())) def convert_module(module, dtype): """ Converts a module's immediate parameters and buffers to dtype. """ for param in module.parameters(recurse=False): if param is not None: if param.data.dtype.is_floating_point: param.data = param.data.to(dtype=dtype) if param._grad is not None and param._grad.data.dtype.is_floating_point: param._grad.data = param._grad.data.to(dtype=dtype) for buf in module.buffers(recurse=False): if buf is not None and buf.data.dtype.is_floating_point: buf.data = buf.data.to(dtype=dtype) def convert_network(network, dtype): """ Converts a network's parameters and buffers to dtype. """ for module in network.modules(): if isinstance(module, torch.nn.modules.batchnorm._BatchNorm) and module.affine is True: continue convert_module(module, dtype) if isinstance(module, torch.nn.RNNBase) or isinstance(module, torch.nn.modules.rnn.RNNBase): module.flatten_parameters() return network class FP16Model(nn.Module): """ Convert model to half precision in a batchnorm-safe way. """ def __init__(self, network): super(FP16Model, self).__init__() self.network = convert_network(network, dtype=torch.half) def forward(self, *inputs): inputs = tuple(t.half() for t in inputs) return self.network(*inputs) def backwards_debug_hook(grad): raise RuntimeError("master_params recieved a gradient in the backward pass!") def prep_param_lists(model, flat_master=False): """ Creates a list of FP32 master parameters for a given model, as in `Training Neural Networks with Mixed Precision: Real Examples`_. Args: model (torch.nn.Module): Existing Pytorch model flat_master (bool, optional, default=False): Flatten the master parameters into a single tensor, as a performance optimization. Returns: A tuple (``model_params``, ``master_params``). ``model_params`` is a list of the model's parameters for later use with :func:`model_grads_to_master_grads` and :func:`master_params_to_model_params`. ``master_params`` is a list of FP32 master gradients. If ``flat_master=True``, ``master_params`` will be a list with one element. Example:: model_params, master_params = prep_param_lists(model) .. warning:: Currently, if ``flat_master=True``, all the model's parameters must be the same type. If the model has parameters of different types, use ``flat_master=False``, or use :class:`FP16_Optimizer`. .. _`Training Neural Networks with Mixed Precision: Real Examples`: http://on-demand.gputechconf.com/gtc/2018/video/S81012/ """ model_params = [param for param in model.parameters() if param.requires_grad] if flat_master: # Give the user some more useful error messages try: # flatten_dense_tensors returns a contiguous flat array. # http://pytorch.org/docs/master/_modules/torch/_utils.html master_params = _flatten_dense_tensors([param.data for param in model_params]).float() except: print("Error in prep_param_lists: model may contain a mixture of parameters " "of different types. Use flat_master=False, or use F16_Optimizer.") raise master_params = torch.nn.Parameter(master_params) master_params.requires_grad = True # master_params.register_hook(backwards_debug_hook) if master_params.grad is None: master_params.grad = master_params.new(*master_params.size()) return model_params, [master_params] else: master_params = [param.clone().float().detach() for param in model_params] for param in master_params: param.requires_grad = True return model_params, master_params def model_grads_to_master_grads(model_params, master_params, flat_master=False): """ Copy model gradients to master gradients. Args: model_params: List of model parameters created by :func:`prep_param_lists`. master_params: List of FP32 master parameters created by :func:`prep_param_lists`. If ``master_params`` was created with ``flat_master=True``, ``flat_master=True`` should also be supplied to :func:`model_grads_to_master_grads`. """ if flat_master: # The flattening may incur one more deep copy than is necessary. master_params[0].grad.data.copy_( _flatten_dense_tensors([p.grad.data for p in model_params])) else: for model, master in zip(model_params, master_params): if model.grad is not None: if master.grad is None: master.grad = Variable(master.data.new(*master.data.size())) master.grad.data.copy_(model.grad.data) else: master.grad = None def master_params_to_model_params(model_params, master_params, flat_master=False): """ Copy master parameters to model parameters. Args: model_params: List of model parameters created by :func:`prep_param_lists`. master_params: List of FP32 master parameters created by :func:`prep_param_lists`. If ``master_params`` was created with ``flat_master=True``, ``flat_master=True`` should also be supplied to :func:`master_params_to_model_params`. """ if flat_master: for model, master in zip(model_params, _unflatten_dense_tensors(master_params[0].data, model_params)): model.data.copy_(master) else: for model, master in zip(model_params, master_params): model.data.copy_(master.data) # Backward compatibility fixes def to_python_float(t): if hasattr(t, 'item'): return t.item() else: return t[0] TORCH_MAJOR = int(torch.__version__.split('.')[0]) TORCH_MINOR = int(torch.__version__.split('.')[1]) if TORCH_MAJOR == 0 and TORCH_MINOR <= 4: clip_grad_norm = torch.nn.utils.clip_grad_norm else: clip_grad_norm = torch.nn.utils.clip_grad_norm_ ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/torchstat/__init__.py ================================================ __copyright__ = 'Copyright (C) 2018 Swall0w' __version__ = '0.0.7' __author__ = 'Swall0w' __url__ = 'https://github.com/Swall0w/torchstat' from torchstat.compute_memory import compute_memory from torchstat.compute_madd import compute_madd from torchstat.compute_flops import compute_flops from torchstat.stat_tree import StatTree, StatNode from torchstat.model_hook import ModelHook from torchstat.reporter import report_format from torchstat.statistics import stat, ModelStat __all__ = ['report_format', 'StatTree', 'StatNode', 'compute_madd', 'compute_flops', 'ModelHook', 'stat', 'ModelStat', '__main__', 'compute_memory'] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/torchstat/__main__.py ================================================ from torchstat import stat import argparse import importlib.util import torch def arg(): parser = argparse.ArgumentParser(description='Torch model statistics') parser.add_argument('--file', '-f', type=str, help='Module file.') parser.add_argument('--model', '-m', type=str, help='Model name') parser.add_argument('--size', '-s', type=str, default='3x224x224', help='Input size. channels x height x width (default: 3x224x224)') return parser.parse_args() def main(): args = arg() try: spec = importlib.util.spec_from_file_location('models', args.file) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) model = getattr(module, args.model)() except Exception: import traceback print(f'Tried to import {args.model} from {args.file}. but failed.') traceback.print_exc() import sys sys.exit() input_size = tuple(int(x) for x in args.size.split('x')) stat(model, input_size, query_granularity=1) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/torchstat/compute_flops.py ================================================ import torch.nn as nn import torch import numpy as np import inspect def compute_flops(module, inp, out): # print(module.__class__) # if 'attn' in module.__name__: # print(module.__class__) # print(list(filter(lambda m: not m.startswith("__") and not m.endswith("__") and callable(getattr(module, m)), dir(module)))) if isinstance(module, nn.Conv2d): return compute_Conv2d_flops(module, inp, out) elif isinstance(module, nn.BatchNorm2d): return compute_BatchNorm2d_flops(module, inp, out) elif isinstance(module, nn.LayerNorm) or 'LayerNorm' in type(module).__name__: return compute_LayerNorm_flops(module, inp, out) elif isinstance(module, (nn.AvgPool2d, nn.MaxPool2d)): return compute_Pool2d_flops(module, inp, out) elif isinstance(module, (nn.ReLU, nn.ReLU6, nn.PReLU, nn.ELU, nn.LeakyReLU)): return compute_ReLU_flops(module, inp, out) # elif isinstance(module, nn.Upsample): # return compute_Upsample_flops(module, inp, out) elif isinstance(module, nn.Linear): return compute_Linear_flops(module, inp, out) elif 'SwinTEB' in module.__class__.__name__:# return compute_WindowAttention_flops(module, inp, out) elif 'XCTEB' in module.__class__.__name__: return compute_XCA_flops(module, inp, out) elif 'MSA' in module.__class__.__name__: return compute_MSA_flops(module, inp, out) elif 'cGCN' == module.__class__.__name__: return compute_cGCN_flops(module, inp, out) elif 'sGCN' == module.__class__.__name__: return compute_sGCN_flops(module, inp, out) else: print(f"[Flops]: {module.__class__.__name__} is not supported!") return 0 pass def compute_cGCN_flops(module, inp, out): batch_size, dim, H, W = inp.size() dim = dim // 2 L = H * W # N = window_size ** 2 # num_patches = H * W // N # calculate flops for 1 window with token length of N flops = 0 # qkv = self.qkv(x) # flops += N * dim * 3 * dim # attn = (q @ k.transpose(-2, -1)) b head c (h w) b head (h w) c flops += dim * (dim//2) * L # x = (attn @ v) b head c c b head c (h w) flops += L * dim * (dim//2) return batch_size * flops def compute_sGCN_flops(module, inp, out): batch_size, dim, H, W = inp.size() dim = dim // 2 L = H * W # calculate flops for 1 window with token length of N flops = 0 # qkv = self.qkv(x) # flops += N * dim * 3 * dim # attn = (q @ k.transpose(-2, -1)) b head c (h w) b head (h w) c flops += dim * dim * L # x = (attn @ v) b head c c b head c (h w) flops += L * dim * dim return batch_size * flops def compute_Conv2d_flops(module, inp, out): # Can have multiple inputs, getting the first one assert isinstance(module, nn.Conv2d) assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) batch_size = inp.size()[0] in_c = inp.size()[1] k_h, k_w = module.kernel_size out_c, out_h, out_w = out.size()[1:] groups = module.groups filters_per_channel = out_c // groups conv_per_position_flops = k_h * k_w * in_c * filters_per_channel active_elements_count = batch_size * out_h * out_w total_conv_flops = conv_per_position_flops * active_elements_count bias_flops = 0 if module.bias is not None: bias_flops = out_c * active_elements_count # k * k * c * H * W * o = (乘法 + 加法 + bias) * active_elements_count total_flops = total_conv_flops + bias_flops return total_flops def compute_BatchNorm2d_flops(module, inp, out): assert isinstance(module, nn.BatchNorm2d) assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) in_c, in_h, in_w = inp.size()[1:] batch_flops = np.prod(inp.shape) if module.affine: batch_flops *= 2 return batch_flops def compute_LayerNorm_flops(module, inp, out): # assert isinstance(module, nn.LayerNorm) if len(inp.size()) == 3: inp = inp.unsqueeze(0) if len(out.size()) == 3: out = out.unsqueeze(0) assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) flops = np.prod(inp.shape) return flops def compute_ReLU_flops(module, inp, out): assert isinstance(module, (nn.ReLU, nn.ReLU6, nn.PReLU, nn.ELU, nn.LeakyReLU)) batch_size = inp.size()[0] active_elements_count = batch_size for s in inp.size()[1:]: active_elements_count *= s return active_elements_count def compute_Pool2d_flops(module, inp, out): assert isinstance(module, nn.MaxPool2d) or isinstance(module, nn.AvgPool2d) assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) return np.prod(inp.shape) def compute_Linear_flops(module, inp, out): assert isinstance(module, nn.Linear) if len(inp.size()) > 3: inp = inp.reshape(inp.size(0), inp.size(1), -1) if len(out.size()) > 3: out = out.reshape(out.size(0), out.size(1), -1) batch_size = inp.size()[0] if len(inp.size()) == 3:# and inp.size(0) == 1: inp = inp[0, ...]#.squeeze(0) if len(out.size()) == 3:# and out.size(0) == 1: out = out[0, ...]#.squeeze(0) assert len(inp.size()) == 2 and len(out.size()) == 2 return batch_size * inp.size()[1] * out.size()[1] def compute_Upsample_flops(module, inp, out): assert isinstance(module, nn.Upsample) output_size = out[0] batch_size = inp.size()[0] output_elements_count = batch_size # for s in output_size. def compute_MSA_flops(module, inp, out): # q = inp[0] if isinstance(inp, tuple): inp = inp[0] if module.__class__.__name__ == "MSA": N, batch_size, dim = inp.size() elif module.__class__.__name__ == "MSA_BNC": batch_size, N, dim = inp.size() # window_size = module.window_size if hasattr(module, 'num_heads'): num_heads = module.num_heads elif hasattr(module, 'n_heads'): num_heads = module.num_heads num_patches = 1#H * W // N # num_patches = module.num_patches # batch_size /= num_patches# B*nH*nW # assert batch_size == 1, print(f"{inp.size()} is not compatiable with {num_patches}") # print(inp.size(), out.size(), dir(module)) # calculate flops for 1 window with token length of N flops = 0 # qkv = self.qkv(x) # flops += N * dim * 3 * dim # attn = (q @ k.transpose(-2, -1)) flops += num_heads * N * (dim // num_heads) * N # x = (attn @ v) flops += num_heads * N * N * (dim // num_heads) # x = self.proj(x) # flops += N * dim * dim return batch_size * num_patches * flops def compute_WindowAttention_flops(module, inp, out): # inp = inp[0].permute(0, 3, 1, 2) # B, p, L, C # out = out.permute(0, 3, 1, 2) # dim = out.size(1) if isinstance(inp, tuple): inp = inp[0] # inp = inp[0] L = len(inp.size()) if L == 3: batch_size, HW, dim = inp.size() H = W = int(np.sqrt(HW)) elif L == 4: batch_size, dim, H, W = inp.size() window_size = module.window_size num_heads = module.num_heads N = window_size ** 2 num_patches = H * W // N # num_patches = module.num_patches # batch_size /= num_patches# B*nH*nW # assert batch_size == 1, print(f"{inp.size()} is not compatiable with {num_patches}") # print(inp.size(), out.size(), dir(module)) # calculate flops for 1 window with token length of N flops = 0 # qkv = self.qkv(x) # flops += N * dim * 3 * dim # attn = (q @ k.transpose(-2, -1)) flops += num_heads * N * (dim // num_heads) * N # x = (attn @ v) flops += num_heads * N * N * (dim // num_heads) # x = self.proj(x) # flops += N * dim * dim # module.__base__ = f'{module.__class__.__name__}(dim={dim}, win_size={window_size}, nh={num_heads}, n_p={num_patches}, size=({H}, {W}))' # print(f'{module.__class__.__name__}, dim={dim}, win_size={window_size}, num_heads={num_heads},' # f'num_patches={num_patches}, img_size=({H}, {W})') return batch_size * num_patches * flops def compute_XCA_flops(module, inp, out): dim = out.size(1) batch_size, _, H, W = inp.size() if hasattr(module, "window_size"): window_size = module.window_size N = window_size ** 2 num_patches = H * W // N else: num_patches = 1 window_size = 1 N = H * W # window_size = module.window_size num_heads = module.num_heads # N = window_size ** 2 # num_patches = H * W // N # calculate flops for 1 window with token length of N flops = 0 # qkv = self.qkv(x) # flops += N * dim * 3 * dim # attn = (q @ k.transpose(-2, -1)) b head c (h w) b head (h w) c flops += num_heads * (dim // num_heads) * (dim // num_heads) * N # x = (attn @ v) b head c c b head c (h w) flops += num_heads * N * (dim // num_heads) * (dim // num_heads) # x = self.proj(x) # flops += N * dim * dim return batch_size * num_patches * flops ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/torchstat/compute_madd.py ================================================ """ compute Multiply-Adds(MAdd) of each leaf module """ import torch.nn as nn def compute_Conv2d_madd(module, inp, out): assert isinstance(module, nn.Conv2d) assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) in_c = inp.size()[1] k_h, k_w = module.kernel_size out_c, out_h, out_w = out.size()[1:] groups = module.groups # ops per output element kernel_mul = k_h * k_w * (in_c // groups) kernel_add = kernel_mul - 1 + (0 if module.bias is None else 1) kernel_mul_group = kernel_mul * out_h * out_w * (out_c // groups) kernel_add_group = kernel_add * out_h * out_w * (out_c // groups) total_mul = kernel_mul_group * groups total_add = kernel_add_group * groups return total_mul + total_add def compute_ConvTranspose2d_madd(module, inp, out): assert isinstance(module, nn.ConvTranspose2d) assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) in_c, in_h, in_w = inp.size()[1:] k_h, k_w = module.kernel_size out_c, out_h, out_w = out.size()[1:] groups = module.groups kernel_mul = k_h * k_w * (in_c // groups) kernel_add = kernel_mul - 1 + (0 if module.bias is None else 1) kernel_mul_group = kernel_mul * in_h * in_w * (out_c // groups) kernel_add_group = kernel_add * in_h * in_w * (out_c // groups) total_mul = kernel_mul_group * groups total_add = kernel_add_group * groups return total_mul + total_add def compute_LayerNorm_madd(module, inp, out): # assert isinstance(module, nn.LayerNorm) if len(inp.size()) == 3: inp = inp.unsqueeze(0) if len(out.size()) == 3: out = out.unsqueeze(0) assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) in_c, in_h, in_w = inp.size()[1:] # 1. sub mean # 2. div standard deviation # 3. mul alpha # 4. add beta return 4 * in_h * in_w def compute_BatchNorm2d_madd(module, inp, out): assert isinstance(module, nn.BatchNorm2d) assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) in_c, in_h, in_w = inp.size()[1:] # 1. sub mean # 2. div standard deviation # 3. mul alpha # 4. add beta return 4 * in_c * in_h * in_w def compute_MaxPool2d_madd(module, inp, out): assert isinstance(module, nn.MaxPool2d) assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) if isinstance(module.kernel_size, (tuple, list)): k_h, k_w = module.kernel_size else: k_h, k_w = module.kernel_size, module.kernel_size out_c, out_h, out_w = out.size()[1:] return (k_h * k_w - 1) * out_h * out_w * out_c def compute_AvgPool2d_madd(module, inp, out): assert isinstance(module, nn.AvgPool2d) assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) if isinstance(module.kernel_size, (tuple, list)): k_h, k_w = module.kernel_size else: k_h, k_w = module.kernel_size, module.kernel_size out_c, out_h, out_w = out.size()[1:] kernel_add = k_h * k_w - 1 kernel_avg = 1 return (kernel_add + kernel_avg) * (out_h * out_w) * out_c def compute_ReLU_madd(module, inp, out): assert isinstance(module, (nn.ReLU, nn.ReLU6)) count = 1 for i in inp.size()[1:]: count *= i return count def compute_Softmax_madd(module, inp, out): assert isinstance(module, nn.Softmax) assert len(inp.size()) > 1 count = 1 for s in inp.size()[1:]: count *= s exp = count add = count - 1 div = count return exp + add + div def compute_Linear_madd(module, inp, out): assert isinstance(module, nn.Linear) if len(inp.size()) > 3: inp = inp.reshape(inp.size(0), inp.size(1), -1) if len(out.size()) > 3: out = out.reshape(out.size(0), out.size(1), -1) if len(inp.size()) == 3:# and inp.size(0) == 1 inp = inp[0, ...]#.squeeze(0) if len(out.size()) == 3:# and out.size(0) == 1 out = out[0, ...]#.squeeze(0) assert len(inp.size()) == 2 and len(out.size()) == 2, print(inp.size(), out.size()) num_in_features = inp.size()[1] num_out_features = out.size()[1] mul = num_in_features add = num_in_features - 1 return num_out_features * (mul + add) def compute_Bilinear_madd(module, inp1, inp2, out): assert isinstance(module, nn.Bilinear) assert len(inp1.size()) == 2 and len(inp2.size()) == 2 and len(out.size()) == 2 num_in_features_1 = inp1.size()[1] num_in_features_2 = inp2.size()[1] num_out_features = out.size()[1] mul = num_in_features_1 * num_in_features_2 + num_in_features_2 add = num_in_features_1 * num_in_features_2 + num_in_features_2 - 1 return num_out_features * (mul + add) def compute_madd(module, inp, out): if isinstance(module, nn.Conv2d): return compute_Conv2d_madd(module, inp, out) elif isinstance(module, nn.ConvTranspose2d): return compute_ConvTranspose2d_madd(module, inp, out) elif isinstance(module, nn.BatchNorm2d): return compute_BatchNorm2d_madd(module, inp, out) elif isinstance(module, nn.LayerNorm) or 'LayerNorm' in type(module).__name__: return compute_LayerNorm_madd(module, inp, out) elif isinstance(module, nn.MaxPool2d): return compute_MaxPool2d_madd(module, inp, out) elif isinstance(module, nn.AvgPool2d): return compute_AvgPool2d_madd(module, inp, out) elif isinstance(module, (nn.ReLU, nn.ReLU6)): return compute_ReLU_madd(module, inp, out) elif isinstance(module, nn.Softmax): return compute_Softmax_madd(module, inp, out) elif isinstance(module, nn.Linear): return compute_Linear_madd(module, inp, out) elif isinstance(module, nn.Bilinear): return compute_Bilinear_madd(module, inp[0], inp[1], out) else: print(f"[MAdd]: {type(module).__name__} is not supported!") return 0 ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/torchstat/compute_memory.py ================================================ import torch.nn as nn import torch import numpy as np def compute_memory(module, inp, out): if isinstance(module, (nn.ReLU, nn.ReLU6, nn.ELU, nn.LeakyReLU)): return compute_ReLU_memory(module, inp, out) elif isinstance(module, nn.PReLU): return compute_PReLU_memory(module, inp, out) elif isinstance(module, nn.Conv2d): return compute_Conv2d_memory(module, inp, out) elif isinstance(module, nn.BatchNorm2d): return compute_BatchNorm2d_memory(module, inp, out) elif isinstance(module, nn.LayerNorm) or 'LayerNorm' in type(module).__name__: return compute_LayerNorm_memory(module, inp, out) elif isinstance(module, nn.Linear): return compute_Linear_memory(module, inp, out) elif isinstance(module, (nn.AvgPool2d, nn.MaxPool2d)): return compute_Pool2d_memory(module, inp, out) else: print(f"[Memory]: {type(module).__name__} is not supported!") return (0, 0) pass def num_params(module): return sum(p.numel() for p in module.parameters() if p.requires_grad) def compute_ReLU_memory(module, inp, out): assert isinstance(module, (nn.ReLU, nn.ReLU6, nn.ELU, nn.LeakyReLU)) batch_size = inp.size()[0] mread = batch_size * inp.size()[1:].numel() mwrite = batch_size * inp.size()[1:].numel() return (mread, mwrite) def compute_PReLU_memory(module, inp, out): assert isinstance(module, (nn.PReLU)) batch_size = inp.size()[0] mread = batch_size * (inp.size()[1:].numel() + num_params(module)) mwrite = batch_size * inp.size()[1:].numel() return (mread, mwrite) def compute_Conv2d_memory(module, inp, out): # Can have multiple inputs, getting the first one assert isinstance(module, nn.Conv2d) assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) batch_size = inp.size()[0] in_c = inp.size()[1] out_c, out_h, out_w = out.size()[1:] # This includes weighs with bias if the module contains it. mread = batch_size * (inp.size()[1:].numel() + num_params(module)) mwrite = batch_size * out_c * out_h * out_w return (mread, mwrite) def compute_BatchNorm2d_memory(module, inp, out): assert isinstance(module, nn.BatchNorm2d) assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) batch_size, in_c, in_h, in_w = inp.size() mread = batch_size * (inp.size()[1:].numel() + 2 * in_c) mwrite = inp.size().numel() return (mread, mwrite) def compute_LayerNorm_memory(module, inp, out): # assert isinstance(module, nn.LayerNorm) if len(inp.size()) == 3: inp = inp.unsqueeze(0) if len(out.size()) == 3: out = out.unsqueeze(0) assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) batch_size, in_c = inp.size()[:2] mread = batch_size * (inp.size()[2:].numel() + 2 * in_c) mwrite = inp.size().numel() return (mread, mwrite) def compute_Linear_memory(module, inp, out): assert isinstance(module, nn.Linear) if len(inp.size()) > 3: inp = inp.reshape(inp.size(0), inp.size(1), -1) if len(out.size()) > 3: out = out.reshape(out.size(0), out.size(1), -1) batch_size = inp.size()[0] if len(inp.size()) == 3:# and inp.size(0) == 1: inp = inp[0, ...]#.squeeze(0) if len(out.size()) == 3:# and out.size(0) == 1: out = out[0, ...]#.squeeze(0) assert len(inp.size()) == 2 and len(out.size()) == 2 mread = batch_size * (inp.size()[1:].numel() + num_params(module)) mwrite = out.size().numel() return (mread, mwrite) def compute_Pool2d_memory(module, inp, out): assert isinstance(module, (nn.MaxPool2d, nn.AvgPool2d)) assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) batch_size = inp.size()[0] mread = batch_size * inp.size()[1:].numel() mwrite = batch_size * out.size()[1:].numel() return (mread, mwrite) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/torchstat/model_hook.py ================================================ import time from collections import OrderedDict import numpy as np import torch import torch.nn as nn from functools import partial from torchstat import compute_madd from torchstat import compute_flops from torchstat import compute_memory class ModelHook(object): def __init__(self, model, input_size, device="cuda", debug_layers=[]): assert isinstance(model, nn.Module) assert isinstance(input_size, (list, tuple)) self.leaf_modules = [] self.debug_layers = debug_layers self._model = model self._input_size = input_size self._origin_call = dict() # sub module call hook self.hooks = [] self._hook_model() # x = [torch.rand(1, *self._input_size)] # add module duration time device = device.lower() assert device in [ "cuda", "cpu", ], "Input device is not valid, please specify 'cuda' or 'cpu'" if device == "cuda" and torch.cuda.is_available(): dtype = torch.cuda.FloatTensor else: dtype = torch.FloatTensor x = [torch.rand(*in_size).type(dtype) for in_size in input_size] self._model.eval() self._model(*x) # if len(debug_layers) > 0: # self.debug_partial_layer(debug_layers) @staticmethod def _register_buffer(module): assert isinstance(module, nn.Module) if len(list(module.children())) > 0: return module.register_buffer('input_shape', torch.zeros(3).int()) module.register_buffer('output_shape', torch.zeros(3).int()) module.register_buffer('parameter_quantity', torch.zeros(1).int()) module.register_buffer('inference_memory', torch.zeros(1).long()) module.register_buffer('MAdd', torch.zeros(1).long()) module.register_buffer('duration', torch.zeros(1).float()) module.register_buffer('Flops', torch.zeros(1).long()) module.register_buffer('Memory', torch.zeros(2).long()) def _sub_module_call_hook(self): def wrap_call(module, *input, **kwargs): assert module.__class__ in self._origin_call # Itemsize for memory try: itemsize = input[0].detach().numpy().itemsize except: itemsize = input[0].detach().cpu().numpy().itemsize start = time.time() output = self._origin_call[module.__class__](module, *input, **kwargs) # 都是nn.Conv2D则有相同的_call__不需要重复存储 end = time.time() module.duration = torch.from_numpy( np.array([end - start], dtype=np.float32)) # c, h, w module.input_shape = torch.from_numpy( np.array(input[0].size()[1:], dtype=np.int32)) module.output_shape = torch.from_numpy( np.array(output.size()[1:], dtype=np.int32)) # print(module.name) parameter_quantity = 0 inference_memory = 1 # iterate through parameters and count num params if 'XCTEB' in module.__class__.__name__: c, h, w = module.input_shape num_heads = module.num_heads parameter_quantity += c * c * num_heads elif 'SwinTEB' in module.__class__.__name__: if len(module.input_shape) == 3: # c, h, w = module.input_shape # c, h, w _, N, c = module.input_shape # N = h * w elif len(module.input_shape) == 2: N = module.input_shape[0] num_heads = module.num_heads # hh = nH * h WindowAttention只减少了flops并没有减少显存占用,因此参数量按照图像大小算 parameter_quantity += N * N * num_heads print(parameter_quantity, N, module.input_shape) elif 'MSA' == module.__class__.__name__: # L, B, D # if hasattr(module, '__name__'): # print('model.body.decoder.layers.0.self_attn') # print(module.__name__, module.input_shape) module.input_shape = torch.from_numpy( np.array(input[0].permute(1, 2, 0).size()[1:], dtype=np.int32)) c, L = module.input_shape num_heads = module.num_heads parameter_quantity += L * L * num_heads # print(L, c) elif 'MSA_BNC' == module.__class__.__name__: # B, L, C module.input_shape = torch.from_numpy( np.array(input[0].permute(0, 2, 1).size()[1:], dtype=np.int32)) c, L = module.input_shape num_heads = module.num_heads parameter_quantity += L * L * num_heads # print(L, c) elif 'sGCN' == module.__class__.__name__: module.input_shape = torch.from_numpy( np.array(input[0][0].permute(0, 2, 1).size(), dtype=np.int32)) c, H, W = module.input_shape c = c // 2 parameter_quantity += c * c elif 'cGCN' == module.__class__.__name__: module.input_shape = torch.from_numpy( np.array(input[0][0].permute(0, 2, 1).size(), dtype=np.int32)) c, H, W = module.input_shape c = c // 2 parameter_quantity += c * c // 2 else: for s in output.size()[1:]: inference_memory *= s # memory += parameters_number # exclude parameter memory for name, p in module._parameters.items(): parameter_quantity += (0 if p is None else torch.numel(p.data)) module.parameter_quantity = torch.from_numpy( np.array([parameter_quantity], dtype=np.long)) inference_memory = inference_memory * 4 / (1024 ** 2) # shown as MB unit module.inference_memory = torch.from_numpy( np.array([inference_memory], dtype=np.float32)) if len(input) == 1: madd = compute_madd(module, input[0], output) flops = compute_flops(module, input[0], output) Memory = compute_memory(module, input[0], output) elif len(input) > 1: madd = compute_madd(module, input, output) flops = compute_flops(module, input, output) Memory = compute_memory(module, input, output) else: # error madd = 0 flops = 0 Memory = (0, 0) module.MAdd = torch.from_numpy( np.array([madd], dtype=np.int64)) module.Flops = torch.from_numpy( np.array([flops], dtype=np.int64)) Memory = np.array(Memory, dtype=np.int64) * itemsize module.Memory = torch.from_numpy(Memory) return output leaf_modules = self.leaf_modules # for m in self._model.modules(): # print(m.__class__) for name, module in self._model.named_modules(): if len(list(module.children())) == 0: module.name = name leaf_modules.append((name, module)) if module.__class__ not in self._origin_call: # 只记录一类与具体实例无关的__call__ self._origin_call[module.__class__] = module.__class__.__call__ module.__class__.__call__ = wrap_call elif name != '' and len(list(module.children())) > 0 and any([L in module.__class__.__name__ for L in self.debug_layers]): #name in self.debug_layers:# module.__class__.__name__ in self.debug_layers # if module.__class__.__name__ in self.debug_layers: # print("111") leaf_modules.append((name, module)) if module.__class__ not in self._origin_call: self._origin_call[module.__class__] = module.__class__.__call__ module.__class__.__call__ = wrap_call print(name, module.__class__.__name__) # for module in self._model.modules(): # if len(list(module.children())) == 0 and module.__class__ not in self._origin_call: # self.hooks.append(module.register_forward_hook(wrap_call)) def _hook_model(self): self._model.apply(self._register_buffer) self._sub_module_call_hook() def clear_hooks(self) -> None: """Clear model hooks""" # for handle in self.hook_handles: # handle.pop() def unwarp_calls(module): if module.__class__ in self._origin_call: module.__class__.__call__ = self._origin_call[module.__class__] # module.__delattr__('__name__') calls = list(map(unwarp_calls, self._model.modules())) del calls # for module in self._model.modules(): # if module.__class__ in self._origin_call: # module.__class__.__call__ = self._origin_call[module.__class__] # @staticmethod # def _retrieve_leaf_modules(model): # leaf_modules = [] # for name, m in model.named_modules(): # if len(list(m.children())) == 0: # leaf_modules.append((name, m)) # return leaf_modules def retrieve_leaf_modules(self): return OrderedDict(self.leaf_modules) # return OrderedDict(self._retrieve_leaf_modules(self._model)) def debug_partial_layer(self, target_keys): target_layers = [] submodule_name = dict(list(self._model.named_modules())[1:]).keys() for t in target_keys: for name in submodule_name: if t in name: target_layers.append(name) return target_layers ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/torchstat/reporter.py ================================================ import pandas as pd pd.set_option('display.width', 1000) pd.set_option('display.max_rows', 10000) pd.set_option('display.max_columns', 10000) def round_value(value, binary=False): divisor = 1024. if binary else 1000. if value // divisor**4 > 0: return str(round(value / divisor**4, 2)) + 'T' elif value // divisor**3 > 0: return str(round(value / divisor**3, 2)) + 'G' elif value // divisor**2 > 0: return str(round(value / divisor**2, 2)) + 'M' elif value // divisor > 0: return str(round(value / divisor, 2)) + 'K' return str(value) def report_format(collected_nodes): data = list() properties = list() for node in collected_nodes: name = node.name mtype = node.mtype input_shape = ' '.join(['{:>3d}'] * len(node.input_shape)).format( *[e for e in node.input_shape]) output_shape = ' '.join(['{:>3d}'] * len(node.output_shape)).format( *[e for e in node.output_shape]) parameter_quantity = node.parameter_quantity inference_memory = node.inference_memory MAdd = node.MAdd Flops = node.Flops mread, mwrite = [i for i in node.Memory] duration = node.duration data.append([name, input_shape, output_shape, parameter_quantity, inference_memory, MAdd, duration, Flops, mread, mwrite]) properties.append(mtype) pd.set_option('display.max_columns', None) df = pd.DataFrame(data) df_properties = pd.DataFrame(properties) df.columns = ['module name', 'input shape', 'output shape', 'params', 'memory(MB)', 'MAdd', 'duration', 'Flops', 'MemRead(B)', 'MemWrite(B)'] df['duration[%]'] = df['duration'] / (df['duration'].sum() + 1e-7) df['MemR+W(B)'] = df['MemRead(B)'] + df['MemWrite(B)'] df['type'] = df_properties total_parameters_quantity = df['params'].sum() total_memory = df['memory(MB)'].sum() total_operation_quantity = df['MAdd'].sum() total_flops = df['Flops'].sum() total_duration = df['duration[%]'].sum() total_mread = df['MemRead(B)'].sum() total_mwrite = df['MemWrite(B)'].sum() total_memrw = df['MemR+W(B)'].sum() del df['duration'] # Add Total row total_df = pd.Series([total_parameters_quantity, total_memory, total_operation_quantity, total_flops, total_duration, mread, mwrite, total_memrw], index=['params', 'memory(MB)', 'MAdd', 'Flops', 'duration[%]', 'MemRead(B)', 'MemWrite(B)', 'MemR+W(B)'], name='total') # df_properties = pd.DataFrame(properties, columns=['type']) df = df.append([total_df]) df = df.fillna(' ') df['memory(MB)'] = df['memory(MB)'].apply( lambda x: '{:.2f}'.format(x)) df['duration[%]'] = df['duration[%]'].apply(lambda x: '{:.2%}'.format(x)) df['MAdd'] = df['MAdd'].apply(lambda x: '{:,}'.format(x)) df['Flops'] = df['Flops'].apply(lambda x: '{:,}'.format(x)) summary = str(df) + '\n' summary += "=" * len(str(df).split('\n')[0]) summary += '\n' summary += "Total params: {:,}\n".format(total_parameters_quantity) summary += "-" * len(str(df).split('\n')[0]) summary += '\n' summary += "Total memory: {:.2f}MB\n".format(total_memory) summary += "Total MAdd: {}MAdd\n".format(round_value(total_operation_quantity)) summary += "Total Flops: {}Flops\n".format(round_value(total_flops)) summary += "Total MemR+W: {}B\n".format(round_value(total_memrw, True)) return summary ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/torchstat/stat_tree.py ================================================ import queue class StatTree(object): def __init__(self, root_node): assert isinstance(root_node, StatNode) self.root_node = root_node def get_same_level_max_node_depth(self, query_node): if query_node.name == self.root_node.name: return 0 same_level_depth = max([child.depth for child in query_node.parent.children]) return same_level_depth def update_stat_nodes_granularity(self): q = queue.Queue() q.put(self.root_node) while not q.empty(): node = q.get() node.granularity = self.get_same_level_max_node_depth(node) for child in node.children: q.put(child) def get_collected_stat_nodes(self, debug_layers, query_granularity): self.update_stat_nodes_granularity() collected_nodes = [] stack = list() stack.append(self.root_node) while len(stack) > 0: node = stack.pop() if any([L in node.mtype for L in debug_layers]): #node.name collected_nodes.append(node) for child in reversed(node.children): stack.append(child) if node.depth == query_granularity: collected_nodes.append(node) if node.depth < query_granularity <= node.granularity: collected_nodes.append(node) return collected_nodes class StatNode(object): def __init__(self, name=str(), mtype=str(), parent=None): self._name = name self._mtype = mtype self._input_shape = None self._output_shape = None self._parameter_quantity = 0 self._inference_memory = 0 self._MAdd = 0 self._Memory = (0, 0) self._Flops = 0 self._duration = 0 self._duration_percent = 0 self._granularity = 1 self._depth = 1 self.parent = parent self.children = list() @property def name(self): return self._name @name.setter def name(self, name): self._name = name @property def mtype(self): return self._mtype @mtype.setter def mtype(self, mtype): self._mtype = mtype @property def granularity(self): return self._granularity @granularity.setter def granularity(self, g): self._granularity = g @property def depth(self): d = self._depth if len(self.children) > 0: d += max([child.depth for child in self.children]) return d @property def input_shape(self): if len(self.children) == 0: # leaf return self._input_shape else: return self.children[0].input_shape @input_shape.setter def input_shape(self, input_shape): assert isinstance(input_shape, (list, tuple)) self._input_shape = input_shape @property def output_shape(self): if len(self.children) == 0: # leaf return self._output_shape else: return self.children[-1].output_shape @output_shape.setter def output_shape(self, output_shape): assert isinstance(output_shape, (list, tuple)) self._output_shape = output_shape @property def parameter_quantity(self): # return self.parameters_quantity total_parameter_quantity = self._parameter_quantity # for child in self.children: # total_parameter_quantity += child.parameter_quantity return total_parameter_quantity @parameter_quantity.setter def parameter_quantity(self, parameter_quantity): assert parameter_quantity >= 0 self._parameter_quantity = parameter_quantity @property def inference_memory(self): total_inference_memory = self._inference_memory for child in self.children: total_inference_memory += child.inference_memory return total_inference_memory @inference_memory.setter def inference_memory(self, inference_memory): self._inference_memory = inference_memory @property def MAdd(self): total_MAdd = self._MAdd # for child in self.children: # total_MAdd += child.MAdd return total_MAdd @MAdd.setter def MAdd(self, MAdd): self._MAdd = MAdd @property def Flops(self): total_Flops = self._Flops # for child in self.children: # total_Flops += child.Flops return total_Flops @Flops.setter def Flops(self, Flops): self._Flops = Flops @property def Memory(self): total_Memory = self._Memory # for child in self.children: # total_Memory[0] += child.Memory[0] # total_Memory[1] += child.Memory[1] # print(total_Memory) return total_Memory @Memory.setter def Memory(self, Memory): assert isinstance(Memory, (list, tuple)) self._Memory = Memory @property def duration(self): total_duration = self._duration # for child in self.children: # total_duration += child.duration return total_duration @duration.setter def duration(self, duration): self._duration = duration def find_child_index(self, child_name): assert isinstance(child_name, str) index = -1 for i in range(len(self.children)): if child_name == self.children[i].name: index = i return index def add_child(self, node): assert isinstance(node, StatNode) if self.find_child_index(node.name) == -1: # not exist self.children.append(node) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/torchstat/statistics.py ================================================ import torch import torch.nn as nn from torchstat import ModelHook from collections import OrderedDict from torchstat import StatTree, StatNode, report_format def get_parent_node(root_node, stat_node_name): assert isinstance(root_node, StatNode) node = root_node names = stat_node_name.split('.') for i in range(len(names) - 1): node_name = '.'.join(names[0:i+1]) child_index = node.find_child_index(node_name) assert child_index != -1 node = node.children[child_index] return node def convert_leaf_modules_to_stat_tree(leaf_modules): assert isinstance(leaf_modules, OrderedDict) create_index = 1 root_node = StatNode(name='root', parent=None) for leaf_module_name, leaf_module in leaf_modules.items(): if 'model.body.decoder.layers.0.self_attn' in leaf_module_name: print("111", leaf_module_name, leaf_module.__class__.__name__) names = leaf_module_name.split('.') for i in range(len(names)): create_index += 1 stat_node_name = '.'.join(names[0:i+1]) parent_node = get_parent_node(root_node, stat_node_name) node = StatNode(name=stat_node_name, mtype=leaf_module.__base__ if hasattr(leaf_module, '__base__') else leaf_module.__class__.__name__, parent=parent_node)#.__class__.__name__ parent_node.add_child(node) if i == len(names) - 1: # leaf module itself input_shape = leaf_module.input_shape.numpy().tolist() output_shape = leaf_module.output_shape.numpy().tolist() node.input_shape = input_shape node.output_shape = output_shape node.parameter_quantity = leaf_module.parameter_quantity.numpy()[0] node.inference_memory = leaf_module.inference_memory.numpy()[0] node.MAdd = leaf_module.MAdd.numpy()[0] node.Flops = leaf_module.Flops.numpy()[0] node.duration = leaf_module.duration.numpy()[0] node.Memory = leaf_module.Memory.numpy().tolist() return StatTree(root_node) class ModelStat(object): def __init__(self, model, input_size, query_granularity=1, debug_layers=[]): assert isinstance(model, nn.Module) # assert isinstance(input_size, (tuple, list)) and len(input_size) == 3 self._model = model self._input_size = input_size self._query_granularity = query_granularity self.debug_layers = debug_layers def _analyze_model(self): model_hook = ModelHook(self._model, self._input_size, debug_layers=self.debug_layers) leaf_modules = model_hook.retrieve_leaf_modules() stat_tree = convert_leaf_modules_to_stat_tree(leaf_modules) collected_nodes = stat_tree.get_collected_stat_nodes(self.debug_layers, self._query_granularity) model_hook.clear_hooks() return collected_nodes def show_report(self): collected_nodes = self._analyze_model() report = report_format(collected_nodes) print(report) def stat(model, input_size, query_granularity=1, debug_layers=["MSA", "SwinTEB", "XCTEB", "MSA_BNC", 'cGCN', 'sGCN']): ms = ModelStat(model, input_size, query_granularity, debug_layers) ms.show_report() ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/utils.py ================================================ import os import datetime import torch import psutil from collections import defaultdict, deque import time import sys sys.path.append('../..') sys.path.append('../mmcv') from mmcv.utils.logging import print_log import numpy as np import random import torch.backends.cudnn as cudnn import torch.distributed as dist from functools import partial def get_dist_info(): if dist.is_available() and dist.is_initialized(): rank = dist.get_rank() world_size = dist.get_world_size() else: rank = 0 world_size = 1 return rank, world_size def init_random_seed(seed=None, device='cuda'): """Initialize random seed. If the seed is not set, the seed will be automatically randomized, and then broadcast to all processes to prevent some potential bugs. Args: seed (int, Optional): The seed. Default to None. device (str): The device where the seed will be put on. Default to 'cuda'. Returns: int: Seed to be used. """ if seed is not None: return seed # Make sure all ranks share the same random seed to prevent # some potential bugs. Please refer to # https://github.com/open-mmlab/mmdetection/issues/6339 rank, world_size = get_dist_info() seed = np.random.randint(2**31) if world_size == 1: return seed if rank == 0: random_num = torch.tensor(seed, dtype=torch.int32, device=device) else: random_num = torch.tensor(0, dtype=torch.int32, device=device) dist.broadcast(random_num, src=0) return random_num.item() def set_random_seed(seed, deterministic=True): """Set random seed. Args: seed (int): Seed to be used. deterministic (bool): Whether to set the deterministic option for CUDNN backend, i.e., set `torch.backends.cudnn.deterministic` to True and `torch.backends.cudnn.benchmark` to False. Default: False. """ random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) if deterministic: torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # def set_random_seed(seed): # np.random.seed(seed) # random.seed(seed) # torch.manual_seed(seed) # torch.cuda.manual_seed(seed) # torch.cuda.manual_seed_all(seed) # cudnn.deterministic = True def show_memory_info(hint): pid = os.getpid() p = psutil.Process(pid) info = p.memory_full_info() memory = info.uss / 1024. / 1024 print('{} memory used: {} MB'.format(hint, memory)) # class OrderedAverageMeter(object): # def __init__(self): class AverageMeter(object): """Computes and stores the average and current value""" def __init__(self, name=None, fmt=":f"): # self.name = name # self.fmt = fmt self.reset() def reset(self): self.val = 0 self.avg = 0 self.sum = 0 self.count = 0 def update(self, val, n=1): self.val = val self.sum += val * n self.count += n self.avg = self.sum / self.count # def __str__(self): # fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' # return fmtstr.format(**self.__dict__) class ProgressMeter(object): def __init__(self, num_batches, meters, prefix=""): self.batch_fmtstr = self._get_batch_fmtstr(num_batches) self.meters = meters self.prefix = prefix def display(self, batch): entries = [self.prefix + self.batch_fmtstr.format(batch)] entries += [str(meter) for meter in self.meters] print('\t'.join(entries)) def _get_batch_fmtstr(self, num_batches): num_digits = len(str(num_batches // 1)) fmt = '{:' + str(num_digits) + 'd}' return '[' + fmt + '/' + fmt.format(num_batches) + ']' def accuracy(output, target, topk=(1,)): """Computes the precision@k for the specified values of k""" with torch.no_grad(): maxk = max(topk) batch_size = target.size(0) _, pred = output.topk(maxk, 1, True, True) pred = pred.t() correct = pred.eq(target.view(1, -1).expand_as(pred)) res = [] for k in topk: correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) res.append(correct_k.mul_(100.0 / batch_size)) return res # class logger(): # def __init__(self, obj, LOG_DIR, parser): # logname = 'log_train' + datetime.datetime.now().strftime('%Y_%m_%d-%H_%M_%S')+'.txt' # self.LOG_FOUT = open(os.path.join(LOG_DIR, logname), 'w') # self.LOG_FOUT.write(str(parser)+'\n') # def __call__(self, out_str): # self.LOG_FOUT.write(out_str+'\n') # self.LOG_FOUT.flush() # print(out_str) def is_dist_avail_and_initialized(): if not dist.is_available(): return False if not dist.is_initialized(): return False return True class SmoothedValue(object): """Track a series of values and provide access to smoothed values over a window or the global series average. """ def __init__(self, window_size=20, fmt=None, eval=False): if fmt is None: if not eval: fmt = "{value:.7f} (avg:{avg:.7f})" else: fmt = "{value:.7f} (avg:{avg:.7f}, std:{std:.7f})" self.reset(window_size) self.fmt = fmt def reset(self, window_size): self.deque = deque(maxlen=window_size) self.val = 0 self.avg = 0 self.total = 0 self.count = 0 def update(self, value, n=1): self.deque.append(value) self.val = value self.count += n self.total += value * n self.avg = self.total / self.count def synchronize_between_processes(self): """ Warning: does not synchronize the deque! """ if not is_dist_avail_and_initialized(): return t = torch.tensor([self.val, self.count, self.total], dtype=torch.float64, device='cuda') dist.barrier() dist.all_reduce(t) t = t.tolist() self.val = t[0] self.count = int(t[1]) self.total = t[2] self.avg = self.total / self.count @property def median(self): d = torch.tensor(list(self.deque)) return d.median().item() @property def std(self): return torch.tensor(list(self.deque)).std().item() # @property # def avg(self): # d = torch.tensor(list(self.deque), dtype=torch.float32) # return d.mean().item() # @property # def global_avg(self): # return self.total / self.count @property def max(self): return max(self.deque) # # @property # def value(self): # return self.deque[-1] def __str__(self): # return self.fmt.format( # median=self.median, # avg=self.avg, # global_avg=self.global_avg, # max=self.max, # value=self.value) return self.fmt.format( median=self.median, avg=self.avg, max=self.max, value=self.val, std=self.std) class MetricLogger(object): def __init__(self, logger=None, delimiter="\t", dist_print=0, window_size=20, eval=False): self.meters = defaultdict(partial(SmoothedValue, window_size=window_size, eval=eval)) self.delimiter = delimiter self.dist_print = dist_print # self.log = get_root_logger("UDL") # self.logger = logger # {k:v}打印,对每个k都有val、avg、max、deque属性 def update(self, **kwargs): # dist.barrier() for k, v in kwargs.items(): if isinstance(v, torch.Tensor): v = torch.mean(v) if hasattr(v, 'item'): v = v.item() assert isinstance(v, (float, int, str)), print("type: ", type(v)) self.meters[k].update(v) # {k:v}打印,对每个k都有val、avg、max、deque属性 def update_dict(self, kwargs: dict): # dist.barrier() for k, v in kwargs.items(): if isinstance(v, torch.Tensor): v = torch.mean(v) if hasattr(v, 'item'): v = v.item() assert isinstance(v, (float, int, str)), print("type: ", type(v)) self.meters[k].update(v) def __getattr__(self, attr): if attr in self.meters: return self.meters[attr] if attr in self.__dict__: return self.__dict__[attr] raise AttributeError("'{}' object has no attribute '{}'".format( type(self).__name__, attr)) def __str__(self): loss_str = [] for name, meter in self.meters.items(): loss_str.append( "{}: {}".format(name, str(meter)) ) return self.delimiter.join(loss_str) def synchronize_between_processes(self): for meter in self.meters.values(): meter.synchronize_between_processes() def add_meter(self, name, meter): self.meters[name] = meter def clear(self): self.meters.clear() def log_every(self, iterable, print_freq, header=None): i = 1 if not header: header = '' start_time = time.time() end = time.time() iter_time = SmoothedValue(fmt='{avg:.4f}') data_time = SmoothedValue(fmt='{avg:.4f}') space_fmt = ':' + str(len(str(len(iterable)))) + 'd' if torch.cuda.is_available(): log_msg = self.delimiter.join([ header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}', 'max mem: {memory:.0f}MB' ]) else: log_msg = self.delimiter.join([ header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}' ]) MB = 1024.0 * 1024.0 # log_string = self.logger.info for obj in iterable: data_time.update(time.time() - end) yield obj, i iter_time.update(time.time() - end) if i % print_freq == 0 or i == len(iterable): eta_seconds = iter_time.avg * (len(iterable) - i) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if torch.cuda.is_available(): if self.dist_print == 0: print_log(log_msg.format( i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time), memory=torch.cuda.max_memory_allocated() / MB)) else: print_log(log_msg.format( i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time))) i += 1 end = time.time() total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) if self.dist_print == 0: print_log('{} Total time: {} ({:.4f} s / it)'.format( header, total_time_str, total_time / len(iterable))) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/cal_ssim.py ================================================ # GPL License # Copyright (C) UESTC # All Rights Reserved # @Author : Xiao Wu, LiangJian Deng # @reference: import torch import torch.nn.functional as F from torch.autograd import Variable import numpy as np from math import exp def gaussian(window_size, sigma): gauss = torch.Tensor([exp(-(x - window_size//2)**2/float(2*sigma**2)) for x in range(window_size)]) return gauss/gauss.sum() def create_window(window_size, channel, sigma=1.5): _1D_window = gaussian(window_size, sigma).unsqueeze(1) _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0) window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous()) return window def _ssim(img1, img2, window, window_size, channel, size_average = True): mu1 = F.conv2d(img1, window, padding = window_size//2, groups = channel) mu2 = F.conv2d(img2, window, padding = window_size//2, groups = channel) mu1_sq = mu1.pow(2) mu2_sq = mu2.pow(2) mu1_mu2 = mu1*mu2 sigma1_sq = F.conv2d(img1*img1, window, padding = window_size//2, groups = channel) - mu1_sq sigma2_sq = F.conv2d(img2*img2, window, padding = window_size//2, groups = channel) - mu2_sq sigma12 = F.conv2d(img1*img2, window, padding = window_size//2, groups = channel) - mu1_mu2 C1 = 0.01**2 C2 = 0.03**2 ssim_map = ((2*mu1_mu2 + C1)*(2*sigma12 + C2))/((mu1_sq + mu2_sq + C1)*(sigma1_sq + sigma2_sq + C2)) if size_average: return ssim_map.mean() else: return ssim_map.mean(1).mean(1).mean(1) class SSIM(torch.nn.Module): def __init__(self, win_size=11, win_sigma=1.5, data_range=1, size_average=True, channel=3): super(SSIM, self).__init__() self.window_size = win_size self.size_average = size_average self.channel = channel self.window = create_window(win_size, self.channel, win_sigma) self.win_sigma = win_sigma def forward(self, img1, img2): #print(img1.size()) (_, channel, _, _) = img1.size() if channel == self.channel and self.window.data.type() == img1.data.type(): window = self.window else: window = create_window(self.window_size, channel, self.win_sigma) if img1.is_cuda: window = window.cuda(img1.get_device()) window = window.type_as(img1) self.window = window self.channel = channel return _ssim(img1, img2, window, self.window_size, channel, self.size_average) def ssim(img1, img2, win_size = 11, data_range=1, size_average = True): (_, channel, _, _) = img1.size() window = create_window(win_size, channel) if img1.is_cuda: window = window.cuda(img1.get_device()) window = window.type_as(img1) return _ssim(img1, img2, window, win_size, channel, size_average) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/config.py ================================================ # Copyright (c) Open-MMLab. All rights reserved. # GPL License # Copyright (C) UESTC # All Rights Reserved # @Author : Xiao Wu, LiangJian Deng # @reference: import ast import copy import os import os.path as osp import platform import shutil import sys import tempfile import uuid import warnings from argparse import Action, ArgumentParser, Namespace from collections import abc from importlib import import_module from addict import Dict from yapf.yapflib.yapf_api import FormatCode # from .misc import import_modules_from_strings # from .path import check_file_exist def import_modules_from_strings(imports, allow_failed_imports=False): """Import modules from the given list of strings. Args: imports (list | str | None): The given module names to be imported. allow_failed_imports (bool): If True, the failed imports will return None. Otherwise, an ImportError is raise. Default: False. Returns: list[module] | module | None: The imported modules. Examples: >>> osp, sys = import_modules_from_strings( ... ['os.path', 'sys']) >>> import os.path as osp_ >>> import sys as sys_ >>> assert osp == osp_ >>> assert sys == sys_ """ if not imports: return single_import = False if isinstance(imports, str): single_import = True imports = [imports] if not isinstance(imports, list): raise TypeError( f'custom_imports must be a list but got type {type(imports)}') imported = [] for imp in imports: if not isinstance(imp, str): raise TypeError( f'{imp} is of type {type(imp)} and cannot be imported.') try: imported_tmp = import_module(imp) except ImportError: if allow_failed_imports: warnings.warn(f'{imp} failed to import and is ignored.', UserWarning) imported_tmp = None else: raise ImportError imported.append(imported_tmp) if single_import: imported = imported[0] return imported def check_file_exist(filename, msg_tmpl='file "{}" does not exist'): if not osp.isfile(filename): raise FileNotFoundError(msg_tmpl.format(filename)) if platform.system() == 'Windows': import regex as re else: import re BASE_KEY = '_base_' DELETE_KEY = '_delete_' RESERVED_KEYS = ['filename', 'text', 'pretty_text'] class ConfigDict(Dict): def __missing__(self, name): raise KeyError(name) def __getattr__(self, name): try: value = super(ConfigDict, self).__getattr__(name) except KeyError: ex = AttributeError(f"'{self.__class__.__name__}' object has no " f"attribute '{name}'") except Exception as e: ex = e else: return value raise ex def add_args(parser, cfg, prefix=''): for k, v in cfg.items(): if isinstance(v, str): parser.add_argument('--' + prefix + k) elif isinstance(v, int): parser.add_argument('--' + prefix + k, type=int) elif isinstance(v, float): parser.add_argument('--' + prefix + k, type=float) elif isinstance(v, bool): parser.add_argument('--' + prefix + k, action='store_true') elif isinstance(v, dict): add_args(parser, v, prefix + k + '.') elif isinstance(v, abc.Iterable): parser.add_argument('--' + prefix + k, type=type(v[0]), nargs='+') else: print(f'cannot parse key {prefix + k} of type {type(v)}') return parser class Config: """A facility for config and config files. It supports common file formats as configs: python/json/yaml. The interface is the same as a dict object and also allows access config values as attributes. Example: >>> cfg = Config(dict(a=1, b=dict(b1=[0, 1]))) >>> cfg.a 1 >>> cfg.b {'b1': [0, 1]} >>> cfg.b.b1 [0, 1] >>> cfg = Config.fromfile('tests/data/config/a.py') >>> cfg.filename "/home/kchen/projects/mmcv/tests/data/config/a.py" >>> cfg.item4 'test' >>> cfg "Config [path: /home/kchen/projects/mmcv/tests/data/config/a.py]: " "{'item1': [1, 2], 'item2': {'a': 0}, 'item3': True, 'item4': 'test'}" """ @staticmethod def _validate_py_syntax(filename): with open(filename, 'r', encoding='utf-8') as f: # Setting encoding explicitly to resolve coding issue on windows content = f.read() try: ast.parse(content) except SyntaxError as e: raise SyntaxError('There are syntax errors in config ' f'file {filename}: {e}') @staticmethod def _substitute_predefined_vars(filename, temp_config_name): file_dirname = osp.dirname(filename) file_basename = osp.basename(filename) file_basename_no_extension = osp.splitext(file_basename)[0] file_extname = osp.splitext(filename)[1] support_templates = dict( fileDirname=file_dirname, fileBasename=file_basename, fileBasenameNoExtension=file_basename_no_extension, fileExtname=file_extname) with open(filename, 'r', encoding='utf-8') as f: # Setting encoding explicitly to resolve coding issue on windows config_file = f.read() for key, value in support_templates.items(): regexp = r'\{\{\s*' + str(key) + r'\s*\}\}' value = value.replace('\\', '/') config_file = re.sub(regexp, value, config_file) with open(temp_config_name, 'w') as tmp_config_file: tmp_config_file.write(config_file) @staticmethod def _pre_substitute_base_vars(filename, temp_config_name): """Substitute base variable placehoders to string, so that parsing would work.""" with open(filename, 'r', encoding='utf-8') as f: # Setting encoding explicitly to resolve coding issue on windows config_file = f.read() base_var_dict = {} regexp = r'\{\{\s*' + BASE_KEY + r'\.([\w\. ]+)\s*\}\}' base_vars = set(re.findall(regexp, config_file)) for base_var in base_vars: randstr = f'_{base_var}_{uuid.uuid4().hex.lower()[:6]}' base_var_dict[randstr] = base_var regexp = r'\{\{\s*' + BASE_KEY + r'\.' + base_var + r'\s*\}\}' config_file = re.sub(regexp, f'"{randstr}"', config_file) with open(temp_config_name, 'w') as tmp_config_file: tmp_config_file.write(config_file) return base_var_dict @staticmethod def _substitute_base_vars(cfg, base_var_dict, base_cfg): """Substitute variable strings to their actual values.""" cfg = copy.deepcopy(cfg) if isinstance(cfg, dict): for k, v in cfg.items(): if isinstance(v, str) and v in base_var_dict: new_v = base_cfg for new_k in base_var_dict[v].split('.'): new_v = new_v[new_k] cfg[k] = new_v elif isinstance(v, (list, tuple, dict)): cfg[k] = Config._substitute_base_vars( v, base_var_dict, base_cfg) elif isinstance(cfg, tuple): cfg = tuple( Config._substitute_base_vars(c, base_var_dict, base_cfg) for c in cfg) elif isinstance(cfg, list): cfg = [ Config._substitute_base_vars(c, base_var_dict, base_cfg) for c in cfg ] elif isinstance(cfg, str) and cfg in base_var_dict: new_v = base_cfg for new_k in base_var_dict[cfg].split('.'): new_v = new_v[new_k] cfg = new_v return cfg @staticmethod def _file2dict(filename, use_predefined_variables=True): filename = osp.abspath(osp.expanduser(filename)) check_file_exist(filename) fileExtname = osp.splitext(filename)[1] if fileExtname not in ['.py', '.json', '.yaml', '.yml']: raise IOError('Only py/yml/yaml/json type are supported now!') with tempfile.TemporaryDirectory() as temp_config_dir: temp_config_file = tempfile.NamedTemporaryFile( dir=temp_config_dir, suffix=fileExtname) if platform.system() == 'Windows': temp_config_file.close() temp_config_name = osp.basename(temp_config_file.name) # Substitute predefined variables if use_predefined_variables: Config._substitute_predefined_vars(filename, temp_config_file.name) else: shutil.copyfile(filename, temp_config_file.name) # Substitute base variables from placeholders to strings base_var_dict = Config._pre_substitute_base_vars( temp_config_file.name, temp_config_file.name) if filename.endswith('.py'): temp_module_name = osp.splitext(temp_config_name)[0] sys.path.insert(0, temp_config_dir) Config._validate_py_syntax(filename) mod = import_module(temp_module_name) sys.path.pop(0) cfg_dict = {} for name, value in mod.__dict__.items(): if not name.startswith('__'): if callable(value): name = 'data' cfg_dict.update({ name: value }) # cfg_dict = {name: value for name, value in mod.__dict__.items() if not name.startswith('__')} # delete imported module del sys.modules[temp_module_name] elif filename.endswith(('.yml', '.yaml', '.json')): import mmcv cfg_dict = mmcv.load(temp_config_file.name) # close temp file temp_config_file.close() cfg_text = filename + '\n' with open(filename, 'r', encoding='utf-8') as f: # Setting encoding explicitly to resolve coding issue on windows cfg_text += f.read() if BASE_KEY in cfg_dict: cfg_dir = osp.dirname(filename) base_filename = cfg_dict.pop(BASE_KEY) base_filename = base_filename if isinstance( base_filename, list) else [base_filename] cfg_dict_list = list() cfg_text_list = list() for f in base_filename: _cfg_dict, _cfg_text = Config._file2dict(osp.join(cfg_dir, f)) cfg_dict_list.append(_cfg_dict) cfg_text_list.append(_cfg_text) base_cfg_dict = dict() for c in cfg_dict_list: if len(base_cfg_dict.keys() & c.keys()) > 0: raise KeyError('Duplicate key is not allowed among bases') base_cfg_dict.update(c) # Subtitute base variables from strings to their actual values cfg_dict = Config._substitute_base_vars(cfg_dict, base_var_dict, base_cfg_dict) base_cfg_dict = Config._merge_a_into_b(cfg_dict, base_cfg_dict) cfg_dict = base_cfg_dict # merge cfg_text cfg_text_list.append(cfg_text) cfg_text = '\n'.join(cfg_text_list) return cfg_dict, cfg_text @staticmethod def _merge_a_into_b(a, b, allow_list_keys=False): """merge dict ``a`` into dict ``b`` (non-inplace). Values in ``a`` will overwrite ``b``. ``b`` is copied first to avoid in-place modifications. Args: a (dict): The source dict to be merged into ``b``. b (dict): The origin dict to be fetch keys from ``a``. allow_list_keys (bool): If True, int string keys (e.g. '0', '1') are allowed in source ``a`` and will replace the element of the corresponding index in b if b is a list. Default: False. Returns: dict: The modified dict of ``b`` using ``a``. Examples: # Normally merge a into b. >>> Config._merge_a_into_b( ... dict(obj=dict(a=2)), dict(obj=dict(a=1))) {'obj': {'a': 2}} # Delete b first and merge a into b. >>> Config._merge_a_into_b( ... dict(obj=dict(_delete_=True, a=2)), dict(obj=dict(a=1))) {'obj': {'a': 2}} # b is a list >>> Config._merge_a_into_b( ... {'0': dict(a=2)}, [dict(a=1), dict(b=2)], True) [{'a': 2}, {'b': 2}] """ b = b.copy() for k, v in a.items(): if allow_list_keys and k.isdigit() and isinstance(b, list): k = int(k) if len(b) <= k: raise KeyError(f'Index {k} exceeds the length of list {b}') b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys) elif isinstance(v, dict) and k in b and not v.pop(DELETE_KEY, False): allowed_types = (dict, list) if allow_list_keys else dict if not isinstance(b[k], allowed_types): raise TypeError( f'{k}={v} in child config cannot inherit from base ' f'because {k} is a dict in the child config but is of ' f'type {type(b[k])} in base config. You may set ' f'`{DELETE_KEY}=True` to ignore the base config') b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys) else: b[k] = v return b @staticmethod def fromfile(filename, use_predefined_variables=True, import_custom_modules=True): cfg_dict, cfg_text = Config._file2dict(filename, use_predefined_variables) if import_custom_modules and cfg_dict.get('custom_imports', None): import_modules_from_strings(**cfg_dict['custom_imports']) return Config(cfg_dict, cfg_text=cfg_text, filename=filename) @staticmethod def fromstring(cfg_str, file_format): """Generate config from config str. Args: cfg_str (str): Config str. file_format (str): Config file format corresponding to the config str. Only py/yml/yaml/json type are supported now! Returns: obj:`Config`: Config obj. """ if file_format not in ['.py', '.json', '.yaml', '.yml']: raise IOError('Only py/yml/yaml/json type are supported now!') if file_format != '.py' and 'dict(' in cfg_str: # check if users specify a wrong suffix for python warnings.warn( 'Please check "file_format", the file format may be .py') with tempfile.NamedTemporaryFile( 'w', suffix=file_format, delete=False) as temp_file: temp_file.write(cfg_str) # on windows, previous implementation cause error # see PR 1077 for details cfg = Config.fromfile(temp_file.name) os.remove(temp_file.name) return cfg @staticmethod def auto_argparser(description=None): """Generate argparser from config file automatically (experimental)""" partial_parser = ArgumentParser(description=description) partial_parser.add_argument('--config', help='config file path', default="../../dev/config_detr.yml") cfg_file = partial_parser.parse_known_args()[0].config cfg = Config.fromfile(cfg_file) parser = ArgumentParser(description=description) parser.add_argument('config', help='config file path') add_args(parser, cfg) return parser, cfg @staticmethod def fromargparse(args): cfg_dict = {} for k, v in args._get_kwargs(): cfg_dict.update({k: v}) return cfg_dict def merge_args2cfg(self, args, allow_list_keys=True): cfg_dict = super(Config, self).__getattribute__('_cfg_dict') option_cfg_dict = self.fromargparse(args) #cfg_dict super(Config, self).__setattr__( '_cfg_dict', Config._merge_a_into_b( option_cfg_dict, cfg_dict, allow_list_keys=allow_list_keys)) def __init__(self, cfg_dict=None, cfg_text=None, filename=None): if cfg_dict is None: cfg_dict = dict() elif isinstance(cfg_dict, Namespace): cfg_dict = self.fromargparse(cfg_dict) elif not isinstance(cfg_dict, (dict, Namespace)): raise TypeError('cfg_dict must be a dict or Namespace, but ' f'got {type(cfg_dict)}') for key in cfg_dict: if key in RESERVED_KEYS: raise KeyError(f'{key} is reserved for config file') super(Config, self).__setattr__('_cfg_dict', ConfigDict(cfg_dict)) super(Config, self).__setattr__('_filename', filename) if cfg_text: text = cfg_text elif filename: with open(filename, 'r') as f: text = f.read() else: text = '' super(Config, self).__setattr__('_text', text) @property def filename(self): return self._filename @property def text(self): return self._text @property def pretty_text(self): indent = 4 def _indent(s_, num_spaces): s = s_.split('\n') if len(s) == 1: return s_ first = s.pop(0) s = [(num_spaces * ' ') + line for line in s] s = '\n'.join(s) s = first + '\n' + s return s def _format_basic_types(k, v, use_mapping=False): if isinstance(v, str): v_str = f"'{v}'" else: v_str = str(v) if use_mapping: k_str = f"'{k}'" if isinstance(k, str) else str(k) attr_str = f'{k_str}: {v_str}' else: attr_str = f'{str(k)}={v_str}' attr_str = _indent(attr_str, indent) return attr_str def _format_list(k, v, use_mapping=False): # check if all items in the list are dict if all(isinstance(_, dict) for _ in v): v_str = '[\n' v_str += '\n'.join( f'dict({_indent(_format_dict(v_), indent)}),' for v_ in v).rstrip(',') if use_mapping: k_str = f"'{k}'" if isinstance(k, str) else str(k) attr_str = f'{k_str}: {v_str}' else: attr_str = f'{str(k)}={v_str}' attr_str = _indent(attr_str, indent) + ']' else: attr_str = _format_basic_types(k, v, use_mapping) return attr_str def _contain_invalid_identifier(dict_str): contain_invalid_identifier = False for key_name in dict_str: contain_invalid_identifier |= \ (not str(key_name).isidentifier()) return contain_invalid_identifier def _format_dict(input_dict, outest_level=False): r = '' s = [] use_mapping = _contain_invalid_identifier(input_dict) if use_mapping: r += '{' for idx, (k, v) in enumerate(input_dict.items()): is_last = idx >= len(input_dict) - 1 end = '' if outest_level or is_last else ',' if isinstance(v, dict): v_str = '\n' + _format_dict(v) if use_mapping: k_str = f"'{k}'" if isinstance(k, str) else str(k) attr_str = f'{k_str}: dict({v_str}' else: attr_str = f'{str(k)}=dict({v_str}' attr_str = _indent(attr_str, indent) + ')' + end elif isinstance(v, list): attr_str = _format_list(k, v, use_mapping) + end else: attr_str = _format_basic_types(k, v, use_mapping) + end s.append(attr_str) r += '\n'.join(s) if use_mapping: r += '}' return r cfg_dict = self._cfg_dict.to_dict() text = _format_dict(cfg_dict, outest_level=True) # copied from setup.cfg yapf_style = dict( based_on_style='pep8', blank_line_before_nested_class_or_def=True, split_before_expression_after_opening_paren=True) text, _ = FormatCode(text.replace('\\', '/'), style_config=yapf_style, verify=True) return text def __repr__(self): return f'Config (path: {self.filename}): {self._cfg_dict.__repr__()}' def __len__(self): return len(self._cfg_dict) def __getattr__(self, name): return getattr(self._cfg_dict, name) def __delattr__(self, name): return delattr(self._cfg_dict, name) def __getitem__(self, name): return self._cfg_dict.__getitem__(name) def __setattr__(self, name, value): if isinstance(value, dict): value = ConfigDict(value) self._cfg_dict.__setattr__(name, value) def __setitem__(self, name, value): if isinstance(value, dict): value = ConfigDict(value) self._cfg_dict.__setitem__(name, value) def __iter__(self): return iter(self._cfg_dict) def __getstate__(self): return (self._cfg_dict, self._filename, self._text) def __setstate__(self, state): _cfg_dict, _filename, _text = state super(Config, self).__setattr__('_cfg_dict', _cfg_dict) super(Config, self).__setattr__('_filename', _filename) super(Config, self).__setattr__('_text', _text) def dump(self, file=None): cfg_dict = super(Config, self).__getattribute__('_cfg_dict').to_dict() if self.filename.endswith('.py'): if file is None: return self.pretty_text else: with open(file, 'w') as f: f.write(self.pretty_text) else: import mmcv if file is None: file_format = self.filename.split('.')[-1] return mmcv.dump(cfg_dict, file_format=file_format) else: mmcv.dump(cfg_dict, file) def merge_from_dict(self, options, allow_list_keys=True): """Merge list into cfg_dict. Merge the dict parsed by MultipleKVAction into this cfg. Examples: >>> options = {'model.backbone.depth': 50, ... 'model.backbone.with_cp':True} >>> cfg = Config(dict(model=dict(backbone=dict(type='ResNet')))) >>> cfg.merge_from_dict(options) >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict') >>> assert cfg_dict == dict( ... model=dict(backbone=dict(depth=50, with_cp=True))) # Merge list element >>> cfg = Config(dict(pipeline=[ ... dict(type='LoadImage'), dict(type='LoadAnnotations')])) >>> options = dict(pipeline={'0': dict(type='SelfLoadImage')}) >>> cfg.merge_from_dict(options, allow_list_keys=True) >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict') >>> assert cfg_dict == dict(pipeline=[ ... dict(type='SelfLoadImage'), dict(type='LoadAnnotations')]) Args: options (dict): dict of configs to merge from. allow_list_keys (bool): If True, int string keys (e.g. '0', '1') are allowed in ``options`` and will replace the element of the corresponding index in the config if the config is a list. Default: True. """ option_cfg_dict = {} for full_key, v in options.items(): d = option_cfg_dict key_list = full_key.split('.') for subkey in key_list[:-1]: d.setdefault(subkey, ConfigDict()) d = d[subkey] subkey = key_list[-1] d[subkey] = v cfg_dict = super(Config, self).__getattribute__('_cfg_dict') super(Config, self).__setattr__( '_cfg_dict', Config._merge_a_into_b( option_cfg_dict, cfg_dict, allow_list_keys=allow_list_keys)) class DictAction(Action): """ argparse action to split an argument into KEY=VALUE form on the first = and append to a dictionary. List options can be passed as comma separated values, i.e 'KEY=V1,V2,V3', or with explicit brackets, i.e. 'KEY=[V1,V2,V3]'. It also support nested brackets to build list/tuple values. e.g. 'KEY=[(V1,V2),(V3,V4)]' """ @staticmethod def _parse_int_float_bool(val): try: return int(val) except ValueError: pass try: return float(val) except ValueError: pass if val.lower() in ['true', 'false']: return True if val.lower() == 'true' else False return val @staticmethod def _parse_iterable(val): """Parse iterable values in the string. All elements inside '()' or '[]' are treated as iterable values. Args: val (str): Value string. Returns: list | tuple: The expanded list or tuple from the string. Examples: >>> DictAction._parse_iterable('1,2,3') [1, 2, 3] >>> DictAction._parse_iterable('[a, b, c]') ['a', 'b', 'c'] >>> DictAction._parse_iterable('[(1, 2, 3), [a, b], c]') [(1, 2, 3), ['a', 'b'], 'c'] """ def find_next_comma(string): """Find the position of next comma in the string. If no ',' is found in the string, return the string length. All chars inside '()' and '[]' are treated as one element and thus ',' inside these brackets are ignored. """ assert (string.count('(') == string.count(')')) and ( string.count('[') == string.count(']')), \ f'Imbalanced brackets exist in {string}' end = len(string) for idx, char in enumerate(string): pre = string[:idx] # The string before this ',' is balanced if ((char == ',') and (pre.count('(') == pre.count(')')) and (pre.count('[') == pre.count(']'))): end = idx break return end # Strip ' and " characters and replace whitespace. val = val.strip('\'\"').replace(' ', '') is_tuple = False if val.startswith('(') and val.endswith(')'): is_tuple = True val = val[1:-1] elif val.startswith('[') and val.endswith(']'): val = val[1:-1] elif ',' not in val: # val is a single value return DictAction._parse_int_float_bool(val) values = [] while len(val) > 0: comma_idx = find_next_comma(val) element = DictAction._parse_iterable(val[:comma_idx]) values.append(element) val = val[comma_idx + 1:] if is_tuple: values = tuple(values) return values def __call__(self, parser, namespace, values, option_string=None): options = {} for kv in values: key, val = kv.split('=', maxsplit=1) options[key] = self._parse_iterable(val) setattr(namespace, self.dest, options) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/criterion_metrics.py ================================================ # GPL License # Copyright (C) 2021 , UESTC # All Rights Reserved # @Author : Xiao Wu, LiangJian Deng # @reference: from torch import nn import torch from torch import distributed as dist from collections import OrderedDict class SetCriterion(nn.Module): """ This class computes the loss for DETR. The process happens in two steps: 1) we compute hungarian assignment between ground truth boxes and the outputs of the model 2) we supervise each pair of matched ground-truth / prediction (supervise class and box) """ def __init__(self, losses, weight_dict): """ Create the criterion. Parameters: num_classes: n able to compute a matching between targets and proposals weight_dict: dict containing as key the names of the losses and as values their relative weight. eos_coef: relatiumber of object categories, omitting the special no-object category matcher: moduleve classification weight applied to the no-object category losses: list of all the losses to be applied. See get_loss for list of available losses. """ super().__init__() self.weight_dict = weight_dict self.losses = losses self.loss_dicts = {} def forward(self, outputs, targets, *args, **kwargs): """ This performs the loss computation. Parameters: outputs: dict of tensors, see the output specification of the model for the format targets: list of dicts, such that len(targets) == batch_size. The expected keys in each dict depends on the losses applied, see each loss' doc """ # Compute all the requested losses for k in self.losses.keys(): # k, loss = loss_dict if k == 'loss': loss = self.losses[k] loss_dicts = loss(outputs, targets) if isinstance(loss_dicts, dict): self.loss_dicts.update(loss(outputs, targets)) else: self.loss_dicts.update({k: loss(outputs, targets)}) else: loss = self.losses[k] loss_dicts = loss(outputs, targets, *args) if isinstance(loss_dicts, dict): self.loss_dicts.update(loss(outputs, targets, *args)) else: self.loss_dicts.update({k: loss(outputs, targets, *args)}) return self.loss_dicts#self._parse_losses(self.loss_dicts) def _parse_losses(self, losses): log_vars = OrderedDict() for loss_name, loss_value in losses.items(): if isinstance(loss_value, torch.Tensor): log_vars[loss_name] = loss_value.mean() elif isinstance(loss_value, list): log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value) # top-1, top-5 both belong to accuracy elif isinstance(loss_value, dict): # log_vars[loss_name] = {} for name, value in loss_value.items(): log_vars[name] = value # log_vars[loss_name].update({name: value.item()}) else: raise TypeError( f'{loss_name} is not a tensor or list of tensors') assert 'loss' not in log_vars.keys(), KeyError("key: 'loss' can't be set from cfg_file.") loss = sum(_value for _key, _value in log_vars.items() if 'top' not in _key)#if 'loss' in _key log_vars['loss'] = loss # output = log_vars.pop('acc') #get for loss_name, loss_value in log_vars.items(): # reduce loss when distributed training if dist.is_available() and dist.is_initialized(): loss_value = loss_value.data.clone() dist.all_reduce(loss_value.div_(dist.get_world_size())) log_vars[loss_name] = loss_value.item() # log_vars.update(acc=output) return loss, log_vars ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/dist_utils.py ================================================ import os import subprocess import torch from torch import nn import torch.multiprocessing as mp from torch import distributed as dist from torch.nn.parallel.distributed import DistributedDataParallel from logging import info as log_string try: from apex.parallel.distributed import DistributedDataParallel as DDP except: Warning("No module named 'apex") def scaled_all_reduce(tensors): """Performs the scaled all_reduce operation on the provided tensors. The input tensors are modified in-place. Currently supports only the sum reduction operator. The reduced values are scaled by the inverse size of the process group. """ # There is no need for reduction in the single-proc case gpus = dist.get_world_size() if gpus == 1: return tensors # Queue the reductions reductions = [] for tensor in tensors: reduction = dist.all_reduce(tensor, async_op=True) reductions.append(reduction) # Wait for reductions to finish for reduction in reductions: reduction.wait() # Scale the results for tensor in tensors: tensor.mul_(1.0 / gpus) return tensors def init_dist(launcher, args, backend='nccl', **kwargs): if 'LOCAL_RANK' not in os.environ: os.environ['LOCAL_RANK'] = str(args.local_rank) if mp.get_start_method(allow_none=True) is None: mp.set_start_method('spawn') if launcher == 'pytorch': _init_dist_pytorch(backend, **kwargs) elif launcher == 'mpi': _init_dist_mpi(backend, **kwargs) elif launcher == 'slurm': _init_dist_slurm(backend, **kwargs) else: raise ValueError(f'Invalid launcher type: {launcher}') def get_dist_info(): if dist.is_available(): initialized = dist.is_initialized() else: initialized = False if initialized: rank = dist.get_rank() world_size = dist.get_world_size() else: rank = 0 world_size = 1 # print(f"DDP: {dist.is_available()} {world_size}") return rank, world_size def _init_dist_pytorch(backend, **kwargs): # TODO: use local_rank instead of rank % num_gpus rank = int(os.environ['RANK']) num_gpus = torch.cuda.device_count() torch.cuda.set_device(rank % num_gpus) dist.init_process_group(backend=backend, **kwargs) def _init_dist_mpi(backend, **kwargs): # TODO: use local_rank instead of rank % num_gpus rank = int(os.environ['OMPI_COMM_WORLD_RANK']) num_gpus = torch.cuda.device_count() torch.cuda.set_device(rank % num_gpus) dist.init_process_group(backend=backend, **kwargs) def _init_dist_slurm(backend, port=None, **kwargs): """Initialize slurm distributed training environment. If argument ``port`` is not specified, then the master port will be system environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system environment variable, then a default port ``29500`` will be used. Args: backend (str): Backend of torch.distributed. port (int, optional): Master port. Defaults to None. """ proc_id = int(os.environ['SLURM_PROCID']) ntasks = int(os.environ['SLURM_NTASKS']) node_list = os.environ['SLURM_NODELIST'] num_gpus = torch.cuda.device_count() torch.cuda.set_device(proc_id % num_gpus) addr = subprocess.getoutput( f'scontrol show hostname {node_list} | head -n1') # print(proc_id, ntasks, node_list, addr) # specify master port if port is not None: os.environ['MASTER_PORT'] = str(port) elif 'MASTER_PORT' in os.environ: pass # use MASTER_PORT in the environment variable else: # 29500 is torch.distributed default port os.environ['MASTER_PORT'] = '29500' # use MASTER_ADDR in the environment variable if it already exists if 'MASTER_ADDR' not in os.environ: os.environ['MASTER_ADDR'] = addr os.environ['WORLD_SIZE'] = str(ntasks) os.environ['LOCAL_RANK'] = str(proc_id % num_gpus) os.environ['RANK'] = str(proc_id) # print(os.environ) dist.init_process_group(backend=backend) def reduce_mean(tensor, nprocs=None): if nprocs is None: _, nprocs = get_dist_info() if nprocs == 1: return tensor # print("reduce_mean", tensor) rt = tensor.clone() dist.all_reduce(rt, op=dist.ReduceOp.SUM) # print(rt, nprocs) rt /= nprocs # print(rt) return rt class MMDistributedDataParallel(DistributedDataParallel): def __init__(self, model, device_ids): super(MMDistributedDataParallel, self).__init__(model, device_ids, find_unused_parameters=True) self.ddp = model def reduce_mean(self, tensor, nprocs=None): if nprocs is None: _, nprocs = get_dist_info() rt = tensor.clone() dist.all_reduce(rt, op=dist.ReduceOp.SUM) rt /= nprocs return rt def ddp_step(self, loss_dicts): losses = {} _, world_size = get_dist_info() if world_size == 1: return loss_dicts dist.barrier() # keys = loss_dicts.keys() # reduced_loss = scaled_all_reduce(loss_dicts.values()) # losses = {k: v for k, v in zip(keys, reduced_loss)} for k, loss in loss_dicts.items(): reduced_loss = self.reduce_mean(loss) losses.update({k: reduced_loss}) return losses def dist_train_v1(args, model): if args.mode == "DDP": if args.global_rank == 0: log_string(f'Distributed training: {args.distributed}') if args.distributed: if args.amp is not None: if not args.amp: # delay_allreduce delays all communication to the end of the backward pass. log_string("IN apex DistributedDataParallel mode.") model = DDP(model, delay_allreduce=True) else: # model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank]) model = MMDistributedDataParallel(model, device_ids=[args.local_rank]) # train_sampler = torch.auxiliary.data.distributed.DistributedSampler(train_dataset) # val_sampler = torch.auxiliary.data.distributed.DistributedSampler(val_dataset) elif args.mode == "DP": log_string(f'DataParallel training') model = nn.DataParallel(model, device_ids=args.device_ids) return model ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/kill_dist.sh ================================================ kill -9 $(ps aux | grep main.py | grep -v grep | awk '{print $2}') ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/launch.py ================================================ r""" `torch.distributed.launch` is a module that spawns up multiple distributed training processes on each of the training nodes. The utility can be used for single-node distributed training, in which one or more processes per node will be spawned. The utility can be used for either CPU training or GPU training. If the utility is used for GPU training, each distributed process will be operating on a single GPU. This can achieve well-improved single-node training performance. It can also be used in multi-node distributed training, by spawning up multiple processes on each node for well-improved multi-node distributed training performance as well. This will especially be benefitial for systems with multiple Infiniband interfaces that have direct-GPU support, since all of them can be utilized for aggregated communication bandwidth. In both cases of single-node distributed training or multi-node distributed training, this utility will launch the given number of processes per node (``--nproc_per_node``). If used for GPU training, this number needs to be less or equal to the number of GPUs on the current system (``nproc_per_node``), and each process will be operating on a single GPU from *GPU 0 to GPU (nproc_per_node - 1)*. **How to use this module:** 1. Single-Node multi-process distributed training :: >>> #python -m torch.distributed.launch --nproc_per_node=NUM_GPUS_YOU_HAVE YOUR_TRAINING_SCRIPT.py (--arg1 --arg2 --arg3 and all other arguments of your training script) 2. Multi-Node multi-process distributed training: (e.g. two nodes) Node 1: *(IP: 192.168.1.1, and has a free port: 1234)* :: >>> #python -m torch.distributed.launch --nproc_per_node=NUM_GPUS_YOU_HAVE --nnodes=2 --node_rank=0 --master_addr="192.168.1.1" --master_port=1234 YOUR_TRAINING_SCRIPT.py (--arg1 --arg2 --arg3 and all other arguments of your training script) Node 2: :: >>> #python -m torch.distributed.launch --nproc_per_node=NUM_GPUS_YOU_HAVE --nnodes=2 --node_rank=1 --master_addr="192.168.1.1" --master_port=1234 YOUR_TRAINING_SCRIPT.py (--arg1 --arg2 --arg3 and all other arguments of your training script) 3. To look up what optional arguments this module offers: :: >>> #python -m torch.distributed.launch --help **Important Notices:** 1. This utility and multi-process distributed (single-node or multi-node) GPU training currently only achieves the best performance using the NCCL distributed backend. Thus NCCL backend is the recommended backend to use for GPU training. 2. In your training program, you must parse the command-line argument: ``--local_rank=LOCAL_PROCESS_RANK``, which will be provided by this module. If your training program uses GPUs, you should ensure that your code only runs on the GPU device of LOCAL_PROCESS_RANK. This can be done by: Parsing the local_rank argument :: # >>> import argparse # >>> parser = argparse.ArgumentParser() # >>> parser.add_argument("--local_rank", type=int) # >>> args = parser.parse_args() # Set your device to local rank using either # :: # >>> torch.cuda.set_device(args.local_rank) # before your code runs # or # :: # >>> with torch.cuda.device(args.local_rank): # >>> # your code to run # 3. In your training program, you are supposed to call the following function at the beginning to start the distributed backend. You need to make sure that the init_method uses ``env://``, which is the only supported ``init_method`` by this module. :: torch.distributed.init_process_group(backend='YOUR BACKEND', init_method='env://') 4. In your training program, you can either use regular distributed functions or use :func:`torch.nn.parallel.DistributedDataParallel` module. If your training program uses GPUs for training and you would like to use :func:`torch.nn.parallel.DistributedDataParallel` module, here is how to configure it. :: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], output_device=args.local_rank) Please ensure that ``device_ids`` argument is set to be the only GPU device id that your code will be operating on. This is generally the local rank of the process. In other words, the ``device_ids`` needs to be ``[args.local_rank]``, and ``output_device`` needs to be ``args.local_rank`` in order to use this utility 5. Another way to pass ``local_rank`` to the subprocesses via environment variable ``LOCAL_RANK``. This behavior is enabled when you launch the script with ``--use_env=True``. You must adjust the subprocess example above to replace ``args.local_rank`` with ``os.environ['LOCAL_RANK']``; the launcher will not pass ``--local_rank`` when you specify this flag. .. warning:: ``local_rank`` is NOT globally unique: it is only unique per process on a machine. Thus, don't use it to decide if you should, e.g., write to a networked filesystem. See https://github.com/pytorch/pytorch/issues/12042 for an example of how things can go wrong if you don't do this correctly. """ import time import signal import sys import subprocess import os from argparse import ArgumentParser, REMAINDER from typing import Optional, IO, List, Any node_local_rank_stdout_filename = "node_{}_local_rank_{}_stdout" node_local_rank_stderr_filename = "node_{}_local_rank_{}_stderr" def parse_args(): """ Helper function parsing the command line options @retval ArgumentParser """ parser = ArgumentParser(description="PyTorch distributed training launch " "helper utility that will spawn up " "multiple distributed processes") # Optional arguments for the launch helper parser.add_argument("--nnodes", type=int, default=1, help="The number of nodes to use for distributed " "training") parser.add_argument("--node_rank", type=int, default=0, help="The rank of the node for multi-node distributed " "training") parser.add_argument("--nproc_per_node", type=int, default=1, help="The number of processes to launch on each node, " "for GPU training, this is recommended to be set " "to the number of GPUs in your system so that " "each process can be bound to a single GPU.") parser.add_argument("--master_addr", default="127.0.0.1", type=str, help="Master node (rank 0)'s address, should be either " "the IP address or the hostname of node 0, for " "single node multi-proc training, the " "--master_addr can simply be 127.0.0.1") parser.add_argument("--master_port", default=29500, type=int, help="Master node (rank 0)'s free port that needs to " "be used for communication during distributed " "training") parser.add_argument("--use_env", default=False, action="store_true", help="Use environment variable to pass " "'local rank'. For legacy reasons, the default value is False. " "If set to True, the script will not pass " "--local_rank as argument, and will instead set LOCAL_RANK.") parser.add_argument("-m", "--module", default=False, action="store_true", help="Changes each process to interpret the launch script " "as a python module, executing with the same behavior as" "'python -m'.") parser.add_argument("--no_python", default=False, action="store_true", help="Do not prepend the training script with \"python\" - just exec " "it directly. Useful when the script is not a Python script.") parser.add_argument( "--logdir", default=None, type=str, help=f"""Relative path to write subprocess logs to. Passing in a relative path will create a directory if needed, and write the stdout and stderr to files {node_local_rank_stdout_filename} and {node_local_rank_stderr_filename}. Note that successive runs with the same path to write logs to will overwrite existing logs, so be sure to save logs as needed.""", ) # positional parser.add_argument("training_script", type=str, help="The full path to the single GPU training " "program/script to be launched in parallel, " "followed by all the arguments for the " "training script") # rest from the training program parser.add_argument('training_script_args', nargs=REMAINDER) return parser.parse_args() def main(): args = parse_args() # world size in terms of number of processes dist_world_size = args.nproc_per_node * args.nnodes # set PyTorch distributed related environmental variables current_env = os.environ.copy() current_env["MASTER_ADDR"] = args.master_addr current_env["MASTER_PORT"] = str(args.master_port) current_env["WORLD_SIZE"] = str(dist_world_size) processes: List[Any] = [] if 'OMP_NUM_THREADS' not in os.environ and args.nproc_per_node > 1: current_env["OMP_NUM_THREADS"] = str(1) print("*****************************************\n" "Setting OMP_NUM_THREADS environment variable for each process " "to be {} in default, to avoid your system being overloaded, " "please further tune the variable for optimal performance in " "your application as needed. \n" "*****************************************".format(current_env["OMP_NUM_THREADS"])) if args.logdir: # Possibly create the directory to write subprocess log output to. if os.path.exists(args.logdir): if not os.path.isdir(args.logdir): raise ValueError("argument --logdir must be a path to a directory.") else: # create the relative directory os.mkdir(os.path.join(os.getcwd(), args.logdir)) subprocess_file_handles = [] for local_rank in range(0, args.nproc_per_node): # each process's rank dist_rank = args.nproc_per_node * args.node_rank + local_rank current_env["RANK"] = str(dist_rank) current_env["LOCAL_RANK"] = str(local_rank) # spawn the processes with_python = not args.no_python cmd = [] if with_python: cmd = [sys.executable, "-u"] if args.module: cmd.append("-m") else: if not args.use_env: raise ValueError("When using the '--no_python' flag, you must also set the '--use_env' flag.") if args.module: raise ValueError("Don't use both the '--no_python' flag and the '--module' flag at the same time.") cmd.append(args.training_script) if not args.use_env: cmd.append("--local_rank={}".format(local_rank)) cmd.extend(args.training_script_args) stdout_handle: Optional[IO] stderr_handle: Optional[IO] if args.logdir: directory_path = os.path.join(os.getcwd(), args.logdir) node_rank = args.node_rank stdout_file_name = node_local_rank_stdout_filename.format(node_rank, local_rank) stderr_file_name = node_local_rank_stderr_filename.format(node_rank, local_rank) stdout_handle = open(os.path.join(directory_path, stdout_file_name), "w") stderr_handle = open(os.path.join(directory_path, stderr_file_name), "w") subprocess_file_handles.append((stdout_handle, stderr_handle)) stdout_name = stdout_handle.name stderr_name = stderr_handle.name print(f"""Note: Stdout and stderr for node {node_rank} rank {local_rank} will be written to {stdout_name}, {stderr_name} respectively.""") sig_names = {2: "SIGINT", 15: "SIGTERM"} last_return_code = None def sigkill_handler(signum, frame): for process in processes: print(f"Killing subprocess {process.pid}") try: process.kill() except Exception as e: pass if last_return_code is not None: raise subprocess.CalledProcessError(returncode=last_return_code, cmd=cmd) if signum in sig_names: print(f"Main process received {sig_names[signum]}, exiting") sys.exit(1) # pass SIGINT/SIGTERM to children if the parent is being terminated signal.signal(signal.SIGINT, sigkill_handler) signal.signal(signal.SIGTERM, sigkill_handler) stdout_handle = None if not subprocess_file_handles else subprocess_file_handles[local_rank][0] stderr_handle = None if not subprocess_file_handles else subprocess_file_handles[local_rank][1] process = subprocess.Popen(cmd, env=current_env, stdout=stdout_handle, stderr=stderr_handle) processes.append(process) try: alive_processes = set(processes) while len(alive_processes): finished_processes = [] for process in alive_processes: if process.poll() is None: # the process is still running continue else: if process.returncode != 0: last_return_code = process.returncode # for sigkill_handler sigkill_handler(signal.SIGTERM, None) # not coming back else: # exited cleanly finished_processes.append(process) alive_processes = set(alive_processes) - set(finished_processes) time.sleep(1) finally: # close open file descriptors for (stdout_handle, stderr_handle) in subprocess_file_handles: stdout_handle.close() stderr_handle.close() if __name__ == "__main__": main() ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/logger.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. # GPL License # Copyright (C) 2021 , UESTC # All Rights Reserved # @Author : Xiao Wu, LiangJian Deng # @reference: import json from collections import defaultdict import logging import os import functools import torch.distributed as dist import colorlog import time from pathlib import Path logger_initialized = {} log_colors_config = { 'DEBUG': 'cyan', 'INFO': 'white', 'WARNING': 'yellow', 'ERROR': 'red', 'CRITICAL': 'red', } # def get_root_logger(name, log_file=None, log_level=logging.INFO): # return get_logger('mmcls', log_file, log_level) def get_root_logger(name=None, cfg=None, cfg_name=None, log_level=logging.INFO): return get_logger(name, cfg, cfg_name, log_level) # TODO: Depre # the same as "get_root_logger" def create_logger(cfg=None, cfg_name=None, dist_print=0, log_level=logging.INFO): return get_logger(None, cfg, cfg_name, log_level) @functools.lru_cache() # so that calling setup_logger multiple times won't add many handlers def setup_logger(name, final_log_file, color=True): # LOG_DIR = cfg.log_dir # LOG_FOUT = open(final_log_file, 'w') # head = '%(asctime)-15s %(message)s' logging.basicConfig(filename=str(final_log_file).replace('\\', '/'), format='%(message)s', level=logging.INFO) # logger = logging.getLogger() # logger.setLevel(logging.INFO) # console = logging.StreamHandler() # logging.getLogger('').addHandler(console) logger = logging.getLogger(name) # if name in logger_initialized: # return logger for handler in logger.root.handlers: if type(handler) is logging.StreamHandler: handler.setLevel(logging.ERROR) # stream_handler = logging.StreamHandler() console = colorlog.StreamHandler() handlers = [console] # logger.setLevel(logging.INFO) # formatter = colorlog.ColoredFormatter( # '%(log_color)s[%(asctime)s] [%(filename)s:%(lineno)d] [%(module)s:%(funcName)s] [%(levelname)s]- %(message)s', # log_colors=log_colors_config) # 日志输出格式 if dist.is_available() and dist.is_initialized(): rank = dist.get_rank() else: rank = 0 if rank == 0: # console = colorlog.StreamHandler() # console.setLevel(logging.DEBUG) handlers.append(console) # if color: # formatter = _ColorfulFormatter( # colored("%(message)s", "green") # ) # else: formatter = colorlog.ColoredFormatter( '%(log_color)s- %(message)s', log_colors=log_colors_config) # 日志输出格式 # console.setFormatter(formatter) # logger.addHandler(console) for handler in handlers: handler.setFormatter(formatter) handler.setLevel(logging.INFO) # log_level logger.addHandler(handler) # if rank == 0: # logger.setLevel(logging.INFO) # log_level # else: # logger.setLevel(logging.ERROR) logger_initialized[name] = True return logger def get_logger(name=None, cfg=None, cfg_name=None, phase='train', log_level=logging.INFO, file_mode='w'): # log_file=None, """Initialize and get a logger by name. If the logger has not been initialized, this method will initialize the logger by adding one or two handlers, otherwise the initialized logger will be directly returned. During initialization, a StreamHandler will always be added. If `log_file` is specified and the process rank is 0, a FileHandler will also be added. Args: name (str): Logger name. log_file (str | None): The log filename. If specified, a FileHandler will be added to the logger. log_level (int): The logger level. Note that only the process of rank 0 is affected, and other processes will set the level to "Error" thus be silent most of the time. file_mode (str): The file mode used in opening log file. Defaults to 'w'. Returns: logging.Logger: The expected logger. """ if name in logger_initialized: if cfg is None: # cfg.use_log return logging.getLogger(name) else: return None # handle hierarchical names # e.g., logger "a" is initialized, then logger "a.b" will skip the # initialization since it is a child of "a". for logger_name in logger_initialized: if name.startswith(logger_name): if cfg.use_log: return logging.getLogger(name) else: return None logger = None tensorboard_log_dir = None root_output_dir = Path(cfg.out_dir) # set up logger in root_path if not root_output_dir.exists(): # if not dist_print: #rank 0-N, 0 is False print('=> creating {}'.format(root_output_dir)) root_output_dir.mkdir(parents=True, exist_ok=True) dataset = cfg.dataset model = cfg.arch cfg_name = os.path.basename(cfg_name).split('.')[0] time_str = time.strftime('%Y-%m-%d-%H-%M-%S') # store all output except tb_log file final_output_dir = root_output_dir / dataset / model / cfg_name if cfg.eval: model_save_tmp = os.path.dirname(cfg.resume).split('/')[-1] else: model_save_tmp = "model_{}".format(time_str) model_save_dir = final_output_dir / model_save_tmp # if not dist_print: log_string('=> creating {}'.format(final_output_dir)) final_output_dir.mkdir(parents=True, exist_ok=True) model_save_dir.mkdir(parents=True, exist_ok=True) if cfg.use_log: cfg_name = '{}_{}'.format(cfg_name, time_str) # a logger to save results log_file = '{}_{}.log'.format(cfg_name, phase) if cfg.eval: final_log_file = model_save_dir / log_file else: final_log_file = final_output_dir / log_file # tensorboard_log tensorboard_log_dir = root_output_dir / Path(cfg.log_dir) / dataset / model / cfg_name # if not dist_print: print('=> creating tfb logs {}'.format(tensorboard_log_dir)) tensorboard_log_dir.mkdir(parents=True, exist_ok=True) logger = setup_logger(name, final_log_file) return logger, str(final_output_dir), str(model_save_dir), str( tensorboard_log_dir) # logger, def print_log(msg, logger=None, level=logging.INFO): """Print a log message. Args: msg (str): The message to be logged. logger (logging.Logger | str | None): The logger to be used. Some special loggers are: - "silent": no message will be printed. - other str: the logger obtained with `get_root_logger(logger)`. - None: The `print()` method will be used to print log messages. level (int): Logging level. Only available when `logger` is a Logger object or "root". """ if logger is None: print(msg) elif isinstance(logger, logging.Logger): logger.log(level, msg) elif logger == 'silent': pass elif isinstance(logger, str): _logger = get_logger(logger) _logger.log(level, msg) else: raise TypeError( 'logger should be either a logging.Logger object, str, ' f'"silent" or None, but got {type(logger)}') def load_json_log(json_log): """load and convert json_logs to log_dicts. Args: json_log (str): The path of the json log file. Returns: dict[int, dict[str, list]]: Key is the epoch, value is a sub dict. The keys in each sub dict are different metrics, e.g. memory, bbox_mAP, and the value is a list of corresponding values in all iterations in this epoch. .. code-block:: python # An example output { 1: {'iter': [100, 200, 300], 'loss': [6.94, 6.73, 6.53]}, 2: {'iter': [100, 200, 300], 'loss': [6.33, 6.20, 6.07]}, ... } """ log_dict = dict() with open(json_log, 'r') as log_file: for line in log_file: log = json.loads(line.strip()) # skip lines without `epoch` field if 'epoch' not in log: continue epoch = log.pop('epoch') if epoch not in log_dict: log_dict[epoch] = defaultdict(list) for k, v in log.items(): log_dict[epoch][k].append(v) return log_dict ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/metrics.py ================================================ # GPL License # Copyright (C) 2021 , UESTC # All Rights Reserved # @Author : Xiao Wu, LiangJian Deng # @reference: import torch import numpy as np import math import torch.nn as nn def rgb2ycbcr(img, y_only=True): """metrics""" img.astype(np.float32) if y_only: rlt = np.dot(img, [65.481, 128.553, 24.966]) / 255.0 + 16.0 return rlt def quantize(img, rgb_range): pixel_range = 255.0 / rgb_range return img.mul(pixel_range).clamp(0, 255).round().div(pixel_range) def calc_psnr(sr, hr, scale, rgb_range): """metrics""" hr = np.float32(hr) sr = np.float32(sr) diff = (sr - hr) / rgb_range # .reshape((1, 1, 3)) / 256# gray_coeffs = np.array([65.738, 129.057, 25.064]).reshape((1, 3, 1, 1)) / 256 diff = np.multiply(diff, gray_coeffs).sum(1) # (1) if hr.size == 1: return 0 if scale != 1: shave = scale else: shave = scale + 6 if scale == 1: valid = diff else: valid = diff[..., shave:-shave, shave:-shave] # valid = diff[shave:-shave, shave:-shave, ...] # mse = np.mean(np.mean(pow(valid, 2), axis=[1, 2, 3]), axis=0) mse = np.mean(pow(valid, 2)) if mse == 0: return 100 try: psnr = -10 * math.log10(mse) except Exception: print(mse) return psnr class PSNR_ycbcr(nn.Module): def __init__(self): super().__init__() self.gray_coeffs = torch.tensor([65.738, 129.057, 25.064], requires_grad=False).reshape((1, 3, 1, 1)) / 256 def quantize(self, img, rgb_range): """metrics""" pixel_range = 255 / rgb_range img = torch.multiply(img, pixel_range) img = torch.clip(img, 0, 255) img = torch.round(img) / pixel_range return img @torch.no_grad() def forward(self, sr, hr, scale, rgb_range): """metrics""" sr = self.quantize(sr, rgb_range) gray_coeffs = self.gray_coeffs.to(sr.device) hr = hr.float() sr = sr.float() diff = (sr - hr) / rgb_range diff = torch.multiply(diff, gray_coeffs).sum(1) if hr.size == 1: return 0 if scale != 1: shave = scale else: shave = scale + 6 if scale == 1: valid = diff else: valid = diff[..., shave:-shave, shave:-shave] mse = torch.mean(torch.pow(valid, 2)) return -10 * torch.log10(mse) def sub_mean(x): x = x * 255.0 red_channel_mean = 0.4488 * 255 green_channel_mean = 0.4371 * 255 blue_channel_mean = 0.4040 * 255 x[:, 0, :, :] -= red_channel_mean x[:, 1, :, :] -= green_channel_mean x[:, 2, :, :] -= blue_channel_mean return x / 255.0 def add_mean(x): x = x * 255.0 red_channel_mean = 0.4488 * 255 green_channel_mean = 0.4371 * 255 blue_channel_mean = 0.4040 * 255 x[:, 0, :, :] += red_channel_mean x[:, 1, :, :] += green_channel_mean x[:, 2, :, :] += blue_channel_mean return x ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/optim.py ================================================ import torch from torch import nn import torch.optim as optim import matplotlib.pyplot as plt class lr_scheduler(object): def __init__(self, lr, epochs): self.epochs = epochs self.lr = lr self.lr_scheduler = None # 六大学习率调整策略,lr = lr * gamma ''' ReduceLROnPlateau: mode(str)- 模式选择,有 min 和 max 两种模式, min 表示当指标不再降低(如监测loss), max 表示当指标不再升高(如监测 accuracy)。 factor(float)- 学习率调整倍数(等同于其它方法的 gamma),即学习率更新为 lr = lr * factor patience(int)- 忍受该指标多少个 step 不变化,当忍无可忍时,调整学习率。 verbose(bool)- 是否打印学习率信息, print(‘Epoch {:5d}: reducing learning rate of group {} to {:.4e}.’.format(epoch, i, new_lr)) threshold_mode(str)- 选择判断指标是否达最优的模式,有两种模式, rel 和 abs。 当 threshold_mode == rel,并且 mode == max 时, dynamic_threshold = best * ( 1 +threshold ); 当 threshold_mode == rel,并且 mode == min 时, dynamic_threshold = best * ( 1 -threshold ); 当 threshold_mode == abs,并且 mode== max 时, dynamic_threshold = best + threshold ; 当 threshold_mode == rel,并且 mode == max 时, dynamic_threshold = best - threshold; threshold(float)- 配合 threshold_mode 使用。 cooldown(int)- “冷却时间“,当调整学习率之后,让学习率调整策略冷静一下,让模型再训练一段时间,再重启监测模式。 min_lr(float or list)- 学习率下限,可为 float,或者 list,当有多个参数组时,可用 list 进行设置 ''' def set_optimizer(self, optimizer, lr_scheduler): self.optimizer = optimizer # self.lr_scheduler = lr_scheduler # self.scheduler = [] if lr_scheduler == torch.optim.lr_scheduler.StepLR: # 等间距阶段式衰减 self.lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1) elif lr_scheduler == optim.lr_scheduler.ReduceLROnPlateau: # Reduce learning rate when validation accuarcy plateau. self.lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=5, verbose=True) elif lr_scheduler == optim.lr_scheduler.MultiStepLR: # milestones=[epoch1,epoch2,...] 阶段式衰减 self.lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [100, 200, 300], gamma=0.5) # [50, 100, 150, 200, 250, 300, 350, 400], gamma=0.5) elif lr_scheduler == optim.lr_scheduler.ExponentialLR: # 指数衰减x, 0.1,0.01,0.001,... self.lr_scheduler = optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=0.1) elif lr_scheduler == optim.lr_scheduler.CosineAnnealingLR: # Cosine annealing learning rate. self.lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=30, eta_min=1e-7) elif lr_scheduler == optim.lr_scheduler.CyclicLR: self.lr_scheduler = optim.lr_scheduler.CyclicLR(optimizer, base_lr=1e-3, max_lr=1e-4, step_size_down=30, step_size_up=150, cycle_momentum=False) elif lr_scheduler == optim.lr_scheduler.LambdaLR: # 学习率 = 初始学习率 * lr_lambda(last_epoch) curves = lambda epoch: epoch // 30 # lambda2 = lambda epoch: 0.95 ** epoch # lr_lambda对应optimizer中的keys,model.parameters()就只有一个lambda函数 self.lr_scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[curves]) elif lr_scheduler == optim.lr_scheduler.CosineAnnealingWarmRestarts: # To 初始周期 # T_mult 每次循环 周期改变倍数 T_0 = T_0*T_mult # Learning rate warmup by 10 epochs. self.lr_scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2, eta_min=0) else: print("self.lr_scheduler not in pytorch") def adjust_2_learning_rate(self, epoch): """编写2种形式的学习率衰减策略的组合""" param_groups = self.optimizer.param_groups if epoch <= 5: lr = [param_groups[0]['lr'] * 0.9] for param_group, val in zip(param_groups, lr): param_group['lr'] = val else: for param_group in param_groups: if epoch % 5 == 0: # 0.09 0.009 0.0009 param_group['lr'] *= 0.9 # print(param_group['lr']) def adjust_1_learning_rate(self, epoch, mini_lr=1e-6): """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" if self.optimizer.param_groups[0]["lr"] < mini_lr: lr = 1e-5 for param_group in self.optimizer.param_groups: param_group['lr'] = lr return if epoch <= 40: # 40 20 80 # lr = self.lr lr = self.lr * (0.1 ** (epoch // 20)) for param_group in self.optimizer.param_groups: param_group['lr'] = lr return elif epoch == 81: # 41 self.lr = self.optimizer.param_groups[0]["lr"] # for param_group in self.optimizer.param_groups: # param_group['lr'] = 1e-4 # if epoch >= 42 and epoch % 5 ==0: if epoch >= 81: lr = self.lr * (0.9 ** (epoch // 20)) for param_group in self.optimizer.param_groups: param_group['lr'] = lr return elif epoch == 81: lr = 1e-5 else: lr = 1e-5 # self.lr = self.lr * (0.9 ** (epoch // 50)) # #if epoch <= 120: # lr = self.lr * (0.9 ** (epoch // 50)) # elif epoch == 121: # self.lr = self.optimizer.param_groups[0]["lr"] # lr = self.lr * (0.9 ** (epoch // 50)) # else: # lr = 0.01 for param_group in self.optimizer.param_groups: param_group['lr'] = lr def step(self, epoch): # if not end: # self.optimizer.step() # else: if self.lr_scheduler is None: # self.optimizer.step() self.adjust_1_learning_rate(epoch) else: # self.optimizer.step() self.lr_scheduler.step(epoch) # preprint lr_map def get_lr_map(self, title, out_file=None, viz=False): plt.figure() lr = [] print("preprint lr_scheduler") tmp = self.optimizer.param_groups[0]['lr'] if self.lr_scheduler is None: for epoch in range(self.epochs): self.step(epoch) # TODO:按层绘制 # print(self.optimizer.param_groups[0]['lr']) lr.append(self.optimizer.param_groups[0]['lr']) else: for epoch in range(self.epochs): self.step(epoch) try: lr.append(self.lr_scheduler.get_last_lr()) # lr.append(self.lr_scheduler.get_lr()) except: # ReduceLROnPlateau没有get_lr方法 lr.append(self.optimizer.param_groups[0]['lr']) plt.plot(list(range(self.epochs)), lr) plt.xlabel("epoch") plt.ylabel("learning rate") plt.title(title) if out_file is not None: plt.savefig(out_file) if viz: plt.show() self.optimizer.param_groups[0]['lr'] = tmp self.lr = tmp def tune_param(): ... def partial_train(model, layers: list): # forzen layers for param in model.parameters(): if layers is not None and layers in param: continue param.requires_grad = False # Replace the last fc layer model.fc = nn.Linear(512, 100) return model if __name__ == "__main__": from torchvision.models import AlexNet import matplotlib.pyplot as plt model = AlexNet(num_classes=2) class LinearRegression(nn.Module): def __init__(self): super(LinearRegression, self).__init__() self.linear1 = nn.Linear(1, 5) # input and output is 1 dimension self.linear2 = nn.Linear(5, 1) def forward(self, x): out = self.linear1(x) out = self.linear2(out) return out glm = LinearRegression() optimizer = optim.SGD(params=glm.parameters(), lr=0.1) epochs = 450 # 构造一个带warmup小学习率的optimizer,再上升到标准值,再正常周期下降 lrs = lr_scheduler(0.1, epochs) # lrs.set_optimizer(optimizer, optim.lr_scheduler.MultiStepLR) # lrs.get_lr_map("MultiStepLR") # lrs.set_optimizer(optimizer, optim.lr_scheduler.ExponentialLR) # lrs.get_lr_map("ExponentialLR") # lrs.set_optimizer(optimizer, optim.lr_scheduler.StepLR) # lrs.get_lr_map("StepLR") # lrs.set_optimizer(optimizer, optim.lr_scheduler.CyclicLR) # lrs.get_lr_map("CyclicLR") # # lrs.set_optimizer(optimizer, optim.lr_scheduler.ReduceLROnPlateau) # # lrs.get_lr_map("ReduceLROnPlateau") lrs.set_optimizer(optimizer, None) lrs.get_lr_map("LambdaLR") # lrs.set_optimizer(optimizer, optim.lr_scheduler.CosineAnnealingLR) # lrs.get_lr_map("CosineAnnealingLR") # lrs.set_optimizer(optimizer, optim.lr_scheduler.CosineAnnealingWarmRestarts) # lrs.get_lr_map("CosineAnnealingWarmRestarts") ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/option.py ================================================ # GPL License # Copyright (C) UESTC # All Rights Reserved # @Author : Xiao Wu, LiangJian Deng # @reference: import argparse import platform # import warnings import os from UDL.AutoDL import TaskDispatcher from UDL.Basis.config import Config import warnings def common_cfg(): parser = argparse.ArgumentParser(description='PyTorch Training') # * Logger parser.add_argument('--use-log', default=True , type=bool) parser.add_argument('--log-dir', metavar='DIR', default='logs', help='path to save log') parser.add_argument('--tfb-dir', metavar='DIR', default=None, help='useless in this script.') parser.add_argument('--use-tfb', default=False, type=bool) # * DDP parser.add_argument( '--launcher', choices=['none', 'pytorch', 'slurm', 'mpi'], default='none', help='job launcher') parser.add_argument('--local_rank', default=0, type=int, help="host rank must be 0 and python -m torch.distributed.launch main.py need args.local_rank") parser.add_argument('--backend', default='nccl', type=str, # gloo help='distributed backend') parser.add_argument('--dist-url', default='env://', type=str, # 'tcp://224.66.41.62:23456' help='url used to set up distributed training') # * AMP parser.add_argument('--amp', default=None, type=bool, help="False is apex, besides True is torch1.6+, which has supports amp ops to reduce gpu memory and speed up training") parser.add_argument('--amp-opt-level', type=str, default='O1', choices=['O0', 'O1', 'O2'], help='mixed precision opt level, if O0, no amp is used') # * Training parser.add_argument('--accumulated-step', default=1, type=int) parser.add_argument('--clip_max_norm', default=0, type=float, help='gradient clipping max norm') # * extra parser.add_argument('--seed', default=10, type=int, help='seed for initializing training. ') parser.add_argument('--device', default='cuda', help='device to use for training / testing') parser.add_argument('--reg', type=bool, default=True, help='loss with l2 reguliarization for nn.Conv2D, ' 'which is very important for classical panshrapening!!! ') parser.add_argument('--crop_batch_size', type=int, default=128, help='input batch size for-' ' training') parser.add_argument('--rgb_range', type=int, default=255, help='maximum value of RGB') parser.add_argument('--model_style', type=str, default=None, help='model_style is used to recursive/cascade or GAN training') parser.add_argument('--mode', type=str, default=None, help='dataset file extension') parser.add_argument('--task', type=str, default=None, help='dataset file extension') parser.add_argument('--arch', type=str, default='', help='arch') args = parser.parse_args() args.global_rank = 0 args.once_epoch = False args.reset_lr = False args.amp_opt_level = 'O0' if args.amp == None else args.amp_opt_level args.save_top_k = 5 args.save_print_freq = 10 args.start_epoch = 1 assert args.accumulated_step > 0 args.load_model_strict = True args.resume_mode = 'best' args.validate = False args.gpu_ids = [0] # args.workflow = [] return Config(args) class panshaprening_cfg(TaskDispatcher, name='pansharpening'): def __init__(self, cfg=None, arch=None): super(panshaprening_cfg, self).__init__() import UDL.pansharpening.configs import UDL.pansharpening.models if cfg is None: cfg = common_cfg() cfg.scale = [1] if platform.system() == 'Linux': cfg.data_dir = '/Datasets/pansharpening/DLPan' if platform.system() == "Windows": cfg.data_dir = 'D:/Datasets/pansharpening/DLPan' cfg.best_prec1 = 10000 cfg.best_prec5 = 10000 cfg.metrics = 'loss' cfg.task = "pansharpening" cfg.save_fmt = "mat" # fmt is mat or not mat cfg.taskhead = "pansharpening" # * Importantly warning = f"Note: FusionNet, DiCNN, PNN don't have high-pass filter" warnings.warn(warning) if arch is not None: cfg = self.new(cfg=cfg, arch=cfg.arch) self.merge_from_dict(cfg) def nni_cfg(args): if args.mode == 'nni': import nni tuner_params = nni.get_next_parameter() print("launcher: nni is running. \n", tuner_params) args.merge_from_dict(tuner_params) return args class get_cfg(TaskDispatcher, name='entrypoint'): def __init__(self, task=None, arch=None): super(get_cfg, self).__init__() args = common_cfg() # args.mode = 'nni' if arch is not None: args.arch = arch if args.mode == 'nni': args = nni_cfg(args) # args.__delattr__('workflow') if hasattr(args, 'task'): cfg = TaskDispatcher.new(cfg=args, task=task, arch=args.arch) cfg.merge_from_dict(args) elif task in TaskDispatcher._task.keys(): cfg = TaskDispatcher.new(cfg=args, task=task, arch=args.arch) cfg.merge_from_dict(args) else: raise ValueError(f"nni starter don't have task={task} but expected" f"one of {super()._task.keys()} in TaskDispatcher") # cfg.setdefault('workflow', []) cfg = data_cfg(cfg) print(cfg.pretty_text) self.merge_from_dict(cfg) def data_cfg(cfg): if cfg.get('config', None) is not None: if not os.path.isfile(cfg.config): raise IOError(f"reading {cfg.config} failed") cfg.merge_from_dict(cfg.fromfile(cfg.config)) if cfg.get('data', None) is not None and callable(cfg.data): data_func = cfg.pop('data') cfg.merge_from_dict(Config(data_func(cfg.data_dir))) cfg.workflow = cfg.get('workflow', []) if cfg.get('norm_cfg', None) is not None and cfg.launcher == 'none': cfg.norm_cfg = 'BN' # modify loading COCO from extern # if hasattr(cfg, 'data'): # cfg.data.train['ann_file'] = cfg.data.train['ann_file'].replace('data', cfg.data_dir) # cfg.data.train['img_prefix'] = cfg.data.train['img_prefix'].replace('data', cfg.data_dir) # cfg.data.val['ann_file'] = cfg.data.val['ann_file'].replace('data', cfg.data_dir) # cfg.data.val['img_prefix'] = cfg.data.val['img_prefix'].replace('data', cfg.data_dir) # cfg.samples_per_gpu = cfg.data.samples_per_gpu # cfg.workers_per_gpu = cfg.data.workers_per_gpu return cfg ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/postprocess.py ================================================ # GPL License # Copyright (C) UESTC # All Rights Reserved # @Author : Xiao Wu, LiangJian Deng # @reference: from typing import Union, Optional, List, Tuple, Text, BinaryIO from PIL import Image import cv2 import os import numpy as np import io import pathlib import torch import math irange = range def format_np_output(np_arr): """ This is a (kind of) bandaid fix to streamline saving procedure. It converts all the outputs to the same format which is 3xWxH with using sucecssive if clauses. Args: im_as_arr (Numpy array): Matrix of shape 1xWxH or WxH or 3xWxH """ # Phase/Case 1: The np arr only has 2 dimensions # Result: Add a dimension at the beginning if len(np_arr.shape) == 2: np_arr = np.expand_dims(np_arr, axis=0) # Phase/Case 2: Np arr has only 1 channel (assuming first dim is channel) # Result: Repeat first channel and convert 1xWxH to 3xWxH if np_arr.shape[0] == 1: np_arr = np.repeat(np_arr, 3, axis=0) # Phase/Case 3: Np arr is of shape 3xWxH # Result: Convert it to WxHx3 in order to make it saveable by PIL if np_arr.shape[0] == 3: np_arr = np_arr.transpose(1, 2, 0) # Phase/Case 4: NP arr is normalized between 0-1 # Result: Multiply with 255 and change type to make it saveable by PIL if np.max(np_arr) <= 1: np_arr = (np_arr * 255).astype(np.uint8) return np_arr def save_image(im, path): """ Saves a numpy matrix or PIL image as an image Args: im_as_arr (Numpy array): Matrix of shape DxWxH path (str): Path to the image """ if isinstance(im, (np.ndarray, np.generic)): im = format_np_output(im) im = Image.fromarray(im) im.save(path) def norm_image(image, factor=255.): """ 标准化图像 :param factor: :param image: [H,W,C] :return: """ image = image.copy() image -= np.max(np.min(image), 0) image /= np.max(image) if factor == 255. or factor == 255: image *= factor return np.uint8(image) else: return image def convert_to_grayscale(im_as_arr): """ Converts 3d image to grayscale Args: im_as_arr (numpy arr): RGB image with shape (D,W,H) returns: grayscale_im (numpy_arr): Grayscale image with shape (1,W,D) """ # grayscale_im = np.sum(np.abs(im_as_arr), axis=0) # im_max = np.percentile(grayscale_im, 99) # im_min = np.min(grayscale_im) # grayscale_im = (np.clip((grayscale_im - im_min) / (im_max - im_min), 0, 1)) # grayscale_im = np.expand_dims(grayscale_im, axis=0) # return grayscale_im grayscale_im = np.sum(np.abs(im_as_arr), axis=-1) im_max = np.percentile(grayscale_im, 99) im_min = np.min(grayscale_im) grayscale_im = (np.clip((grayscale_im - im_min) / (im_max - im_min), 0, 1)) grayscale_im = np.expand_dims(grayscale_im, axis=-1) return grayscale_im def apply_gradient_images(gradient, file_name, is_save=False): """ Exports the original gradient image Args: gradient (np arr): Numpy array of the gradient with shape (3, 224, 224) file_name (str): File name to be exported """ if not os.path.exists('../results'): os.makedirs('../results') # Normalize gradient = gradient - gradient.min() gradient /= gradient.max() # Save image if is_save: path_to_file = os.path.join('../results', file_name + '.jpg') save_image(gradient, path_to_file) return None else: return gradient # misc_function import matplotlib.cm as mpl_color_map import copy import PIL def gen_colormap(input_image, feature, gradient, factor=255): if feature.size(0) == 1: feature = feature.squeeze(0) if gradient.size(0) == 1: gradient = gradient.squeeze(0) gradient = gradient.cpu().data.numpy() # [C,H,W] weight = np.mean(gradient, axis=(1, 2)) # [C] feature = feature.cpu().data.numpy() # [C,H,W] cam = feature * weight[:, np.newaxis, np.newaxis] # [C,H,W] cam = np.sum(cam, axis=0) # [H,W] cam = np.maximum(cam, 0) # ReLU # 数值归一化 # cam -= np.min(cam) # cam /= np.max(cam) # cam = np.maximum(cam, 0) cam = (cam - np.min(cam)) / (np.max(cam) - np.min(cam)) # Normalize between 0-1 cam = np.uint8(cam * factor) # Scale between 0-255 to visualize cam = np.uint8(Image.fromarray(cam).resize((input_image.shape[2], input_image.shape[3]), Image.ANTIALIAS)) / factor return cam def apply_colormap_on_image(org_im, activation, colormap_name): """ Apply heatmap on image Args: org_img (PIL img): Original image activation_map (numpy arr): Activation map (grayscale) 0-255 colormap_name (str): Name of the colormap """ if not isinstance(org_im, np.ndarray): org_im = org_im[0, :3, ...].permute(1, 2, 0) org_im = org_im.cpu().numpy() * 255 org_im = PIL.Image.fromarray(org_im.astype(np.uint8)) else: org_im = org_im * 255 org_im = PIL.Image.fromarray(org_im.astype(np.uint8)) # Get colormap ''' ''' color_map = mpl_color_map.get_cmap(colormap_name) no_trans_heatmap = color_map(activation) # Change alpha channel in colormap to make sure original image is displayed heatmap = copy.copy(no_trans_heatmap) heatmap[..., 3] = 0.4 heatmap = Image.fromarray((heatmap * 255).astype(np.uint8)) heatmap = heatmap.resize(org_im.size, Image.ANTIALIAS) no_trans_heatmap = Image.fromarray((no_trans_heatmap * 255).astype(np.uint8)) # Apply heatmap on image heatmap_on_image = Image.new("RGBA", org_im.size) heatmap_on_image = Image.alpha_composite(heatmap_on_image, org_im.convert('RGBA')) heatmap_on_image = Image.alpha_composite(heatmap_on_image, heatmap) return no_trans_heatmap, heatmap_on_image def get_positive_negative_saliency(gradient): """ Generates positive and negative saliency maps based on the gradient Args: gradient (numpy arr): Gradient of the operation to visualize returns: pos_saliency ( ) """ pos_saliency = (np.maximum(0, gradient) / gradient.max()) neg_saliency = (np.maximum(0, -gradient) / -gradient.min()) return pos_saliency, neg_saliency # hook_test/Viz/main def gen_grad_cam(image, feature, gradient): if feature.size(0) == 1: feature = feature.squeeze(0) if gradient.size(0) == 1: gradient = gradient.squeeze(0) gradient = gradient # .cpu().data.numpy() # [C,H,W] weight = torch.mean(gradient, dim=(1, 2)) # [C] # feature = feature.cpu().data.numpy() # [C,H,W] # cam = feature * weight[:, np.newaxis, np.newaxis] cam = feature * weight[:, np.newaxis, np.newaxis] # [C,H,W] cam = torch.maximum(cam, torch.zeros_like(cam)) # ReLU cam = torch.sum(cam, dim=0) # [H,W] # cam = torch.mean(feature, dim=0) # cam = torch.maximum(cam, torch.zeros_like(cam)) # ReLU # 数值归一化 cam -= torch.min(cam) cam /= (torch.max(cam) - torch.min(cam) + 1e-8) return cam.cpu().data.numpy() def apply_heatmap(image, mask, factor=255): """ 生成CAM图 :param image: [H,W,C],原始图像 :param mask: [H,W],范围0~1 :return: tuple(cam,heatmap) """ # mask转为heatmap if not isinstance(image, np.ndarray): image = image[0, :3, ...].permute(1, 2, 0) image = image.cpu().numpy() if not isinstance(mask, np.ndarray): mask = mask.cpu().numpy() # heatmaps = np.tile(np.zeros_like(mask)[..., np.newaxis], [1, 1, 1, 3]) # for c_idx in range(mask.shape[0]): # c_mask = mask[c_idx, ..., np.newaxis] # heatmap = cv2.applyColorMap(np.uint8(255 * c_mask), cv2.COLORMAP_JET) # heatmaps[c_idx, ...] = np.float32(heatmap) / 255 # heatmaps = heatmaps[..., ::-1] # gbr to rgb # mask转为heatmap heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET) heatmap = np.float32(heatmap) / 255 heatmap = heatmap[..., ::-1] # gbr to rgb # 合并heatmap到原始图像 cam = cv2.resize(heatmap, image.shape[:2]) + np.float32(image) return norm_image(cam, 2048), (heatmap * 255).astype(np.uint8) # cam = heatmaps[np.newaxis, ...] + np.float32(image) # return norm_image(cam), (heatmaps * 255).astype(np.uint8) def showimage8(images, unnormlize=2047.0, first_channel=False): assert images.shape[1] >= 3, print("input images format is not suitable") if isinstance(images, torch.Tensor): unnormlize = np.where(max(np.float(torch.max(images)), 1.0) > 1.0, 1.0, unnormlize) if first_channel: images = images.permute(1, 2, 0) output = images[..., [0, 2, 4]] * torch.tensor(unnormlize) output = torch.clamp(output, 0, 2047) output = output.cpu().detach().numpy() norm_image = linstretch(output) return norm_image[:, :, ::-1] def linstretch(images, tol=None): ''' NM = N*M; for i=1:3 b = reshape(double(uint16(ImageToView(:,:,i))),NM,1); [hb,levelb] = hist(b,max(b)-min(b)); chb = cumsum(hb);#沿第一个非单一维运算。matlab矩阵顺序 HxWxC,列的累计和 t(1)=ceil(levelb(find(chb>NM*tol(i,1), 1 ))); t(2)=ceil(levelb(find(chbt(2))=t(2); b = (b-t(1))/(t(2)-t(1)); ImageToView(:,:,i) = reshape(b,N,M); end ''' # images = np.random.randn(64, 64, 3) * 2047.0 if tol is None: tol = [0.01, 0.995] if images.ndim == 3: h, w, channels = images.shape else: images = np.expand_dims(images, axis=-1) h, w, channels = images.shape N = h * w for c in range(channels): image = np.float32(np.round(images[:, :, c])).reshape(N, 1) hb, levelb = np.histogram(image, bins=math.ceil(image.max() - image.min())) chb = np.cumsum(hb, 0) levelb_center = levelb[:-1] + (levelb[1] - levelb[0]) / 2 lbc_min, lbc_max = levelb_center[chb > N * tol[0]][0], levelb_center[chb < N * tol[1]][-1] image = np.clip(image, a_min=lbc_min, a_max=lbc_max) image = (image - lbc_min) / (lbc_max - lbc_min) images[..., c] = np.reshape(image, (h, w)) images = np.squeeze(images) return images def make_grid( tensor: Union[torch.Tensor, List[torch.Tensor]], mode: str = "grey", nrow: int = 8, padding: int = 2, normalize: bool = False, range: Optional[Tuple[int, int]] = None, scale_each: bool = False, pad_value: int = 0, ) -> torch.Tensor: """Make a grid of images. Args: tensor (Tensor or list): 4D mini-batch Tensor of shape (B x C x H x W) or a list of images all of the same size. mode (str, optional): 人为设定通道模式 nrow (int, optional): Number of images displayed in each row of the grid. The final grid size is ``(B / nrow, nrow)``. Default: ``8``. padding (int, optional): amount of padding. Default: ``2``. normalize (bool, optional): If True, shift the image to the range (0, 1), by the min and max values specified by :attr:`range`. Default: ``False``. range (tuple, optional): tuple (min, max) where min and max are numbers, then these numbers are used to normalize the image. By default, min and max are computed from the tensor. scale_each (bool, optional): If ``True``, scale each image in the batch of images separately rather than the (min, max) over all images. Default: ``False``. pad_value (float, optional): Value for the padded pixels. Default: ``0``. Example: See this notebook `here `_ """ if not (torch.is_tensor(tensor) or (isinstance(tensor, list) and all(torch.is_tensor(t) for t in tensor))): raise TypeError('tensor or list of tensors expected, got {}'.format(type(tensor))) # if list of tensors, convert to a 4D mini-batch Tensor if isinstance(tensor, list): tensor = torch.stack(tensor, dim=0) if tensor.dim() == 2: # single image H x W tensor = tensor.unsqueeze(0) if tensor.dim() == 3: # single image if tensor.size(0) == 1: # if single-channel, convert to 3-channel tensor = torch.cat((tensor, tensor, tensor), 0) tensor = tensor.unsqueeze(0) if tensor.dim() == 4 and tensor.size(1) == 1: # single-channel images if mode == "RGB": tensor = torch.cat((tensor, tensor, tensor), 1) if normalize is True: tensor = tensor.clone() # avoid modifying tensor in-place if range is not None: assert isinstance(range, tuple), \ "range has to be a tuple (min, max) if specified. min and max are numbers" def norm_ip(img, min, max): img.clamp_(min=min, max=max) img.add_(-min).div_(max - min + 1e-5) def norm_range(t, range): if range is not None: norm_ip(t, range[0], range[1]) else: norm_ip(t, float(t.min()), float(t.max())) if scale_each is True: for t in tensor: # loop over mini-batch dimension norm_range(t, range) else: norm_range(tensor, range) if tensor.size(0) == 1: return tensor.squeeze(0) # make the mini-batch of images into a grid nmaps = tensor.size(0) xmaps = min(nrow, nmaps) ymaps = int(math.ceil(float(nmaps) / xmaps)) height, width = int(tensor.size(2) + padding), int(tensor.size(3) + padding) num_channels = tensor.size(1) grid = tensor.new_full((num_channels, height * ymaps + padding, width * xmaps + padding), pad_value) k = 0 for y in irange(ymaps): for x in irange(xmaps): if k >= nmaps: break # Tensor.copy_() is a valid method but seems to be missing from the stubs # https://pytorch.org/docs/stable/tensors.html#torch.Tensor.copy_ grid.narrow(1, y * height + padding, height - padding).narrow( # type: ignore[attr-defined] 2, x * width + padding, width - padding ).copy_(tensor[k]) k = k + 1 return grid def tensor_save_image( tensor: Union[torch.Tensor, List[torch.Tensor]], fp: Union[Text, pathlib.Path, BinaryIO], nrow: int = 8, padding: int = 2, normalize: bool = False, range: Optional[Tuple[int, int]] = None, scale_each: bool = False, pad_value: int = 0, format: Optional[str] = None, ) -> None: """Save a given Tensor into an image file. Args: tensor (Tensor or list): Image to be saved. If given a mini-batch tensor, saves the tensor as a grid of images by calling ``make_grid``. fp (string or file object): A filename or a file object format(Optional): If omitted, the format to use is determined from the filename extension. If a file object was used instead of a filename, this parameter should always be used. **kwargs: Other arguments are documented in ``make_grid``. """ from PIL import Image grid = make_grid(tensor, nrow=nrow, padding=padding, pad_value=pad_value, normalize=normalize, range=range, scale_each=scale_each) # Add 0.5 after unnormalizing to [0, 255] to round to nearest integer ndarr = grid.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to('cpu', torch.uint8).numpy() im = Image.fromarray(ndarr) im.save(fp, format=format) if __name__ == "__main__": a = np.random.randn(3, 3) linstretch(a) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/python_sub_class.py ================================================ # GPL License # Copyright (C) UESTC # All Rights Reserved # @Author : Xiao Wu, LiangJian Deng # @reference: import torch from UDL.pansharpening.evaluation.ps_evaluate import analysis_accu, save_results from UDL.Basis.config import Config import warnings import scipy.io as sio class TaskDispatcher(Config): _task = dict() def __init_subclass__(cls, name='', **kwargs): super().__init_subclass__(**kwargs) if name != '': cls._task[name] = cls cls._name = name # print(cls.__repr__, cls..__repr__) else: # warnings.warn(f'Creating a subclass of MetaModel {cls.__name__} with no name.') cls._task[cls.__name__] = cls cls._name = cls.__name__ def __new__(cls, *args, **kwargs): if cls is TaskDispatcher: task = kwargs.get('task') try: cls = cls._task[task] except KeyError: raise ValueError(f'Got task={task} but expected' f'one of {cls._task.keys()}') instance = super().__new__(cls) return instance # def __len__(self): # return len(self._cfg_dict) # # def __getattr__(self, name): # return getattr(self._cfg_dict, name) # # def __delattr__(self, name): # return delattr(self._cfg_dict, name) # # def __getitem__(self, name): # return self._cfg_dict.__getitem__(name) # # def __iter__(self): # return iter(self._cfg_dict) # # def __repr__(self): # return f'TaskDispatcher {self._cfg_dict.__repr__()}' # def __setattr__(self, name, value): # if isinstance(value, dict): # value = ConfigDict(value) # print("__setattr__") # self._cfg_dict.__setattr__(name, value) # def __setitem__(self, name, value): # if isinstance(value, dict): # value = ConfigDict(value) # print("__setitem__") # self._cfg_dict.__setitem__(name, value) @classmethod def new(cls, **kwargs): # 需要从外部启动和从任务启动,但参数不同 key = 'mode' value = kwargs.setdefault('mode', None) print('111', value) if value is None: # 第二、三调用层进入此函数 key = 'task' if kwargs.get('task', None): # 二 value = kwargs.pop('task') print('222', value) elif kwargs.get('arch', None): # 三 key = 'arch' value = kwargs.pop('arch') print('333', value) else: key = 'arch' kwargs.pop('mode') try: cls = cls._task[value] except KeyError: warning = f'Got {key}={value} but expected ' \ f'one of {cls._task.keys()}' warnings.warn(warning) return Config() return cls(**kwargs) class ModelDispatcher(object): _task = dict() def __init_subclass__(cls, name='', **kwargs): super().__init_subclass__(**kwargs) if name != '': cls._task[name] = cls cls._name = name # print(cls.__repr__, cls..__repr__) else: # warnings.warn(f'Creating a subclass of MetaModel {cls.__name__} with no name.') cls._task[cls.__name__] = cls cls._name = cls.__name__ def __new__(cls, *args, **kwargs): if cls is ModelDispatcher: task = kwargs.get('task') try: cls = cls._task[task] except KeyError: raise ValueError(f'Got task={task} but expected' f'one of {cls._task.keys()}') instance = super().__new__(cls) return instance @classmethod def build_model(cls, cfg): arch = cfg.arch task = cfg.task model_style = cfg.model_style try: # 获得PansharpeningModel,进行分发 cls = cls._task[task](None, None) except KeyError: raise ValueError(f'Got task={task} but expected ' f'one of {cls._task.keys()} in {cls}') try: # 获得具体的模型 cls_arch = cls._models[arch]() except KeyError: raise ValueError(f'Got arch={arch} but expected ' f'one of {cls._models.keys()} in {cls}') model, criterion, optimizer, scheduler = cls_arch(cfg) if model_style is None: # 获得PansharpeningModel,model+head model_style = task if model_style is not None: try: # 获得具体的模型 model = cls._task[model_style](model, criterion) except KeyError: raise ValueError(f'Got model_style={model_style} but expected ' f'one of {cls._models.keys()} (merged in _models) in {cls}') return model, criterion, optimizer, scheduler class PanSharpeningModel(ModelDispatcher, name='pansharpening'): _models = {} def __init__(self, model=None, criterion=None): super(PanSharpeningModel, self).__init__() self.model = model self.criterion = criterion self.reg = False if hasattr(self.model, 'reg'): self.reg = self.model.reg def __init_subclass__(cls, name='', **kwargs): # print(name, cls) if name != '': cls._models[name] = cls cls._name = name else: cls._models[cls.__name__] = cls cls._name = cls.__name__ # warnings.warn(f'Creating a subclass of MetaModel {cls.__name__} with no name.') def l2_regularization(self, loss_dict, weight_decay=1e-5, flag=False): regularizations = [] for k, v in self.model.named_parameters(): if 'conv' in k and 'weight' in k: # print(k) penality = weight_decay * ((v.data ** 2).sum() / 2) regularizations.append(penality) if flag: print("{} : {}".format(k, penality)) # r = torch.sum(regularizations) if isinstance(loss_dict, dict): loss_dict['loss'] = loss_dict['loss'] + sum(regularizations) loss_dict['log_vars'].update(reg_loss=loss_dict['loss']) else: loss_dict = loss_dict + sum(regularizations) return loss_dict def train_step(self, *args, **kwargs): loss_dict = self.model.train_step(args[0], **kwargs) if self.reg: return self.l2_regularization(loss_dict) return loss_dict def val_step(self, *args, **kwargs): sr, gt = self.model.val_step(*args, **kwargs) result_our = torch.squeeze(sr).permute(1, 2, 0) result_our = torch.clip(result_our, 0, 1) metrics = analysis_accu(gt.cuda().squeeze(0), result_our, 4) result_our = result_our * kwargs['img_range'] if kwargs['save_fmt'] is not None: save_results(kwargs['idx'], kwargs['save_dir'], kwargs['filename'], kwargs['save_fmt'], result_our) return {'log_vars': metrics} ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/slurm_train.sh ================================================ #!/usr/bin/env bash set -x #cd projects/derain PARTITION=defq JOB_NAME=task #CONFIG=$3 #WORK_DIR=$4 GPUS=${GPUS:-16} GPUS_PER_NODE=${GPUS_PER_NODE:-8} CPUS_PER_TASK=${CPUS_PER_TASK:-8} SRUN_ARGS=${SRUN_ARGS:-""} #NNODE=${NNODE:-'node[004]'} #PY_ARGS=${@:5} #PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ srun -p ${PARTITION} \ --job-name=${JOB_NAME} \ --gres=gpu:0 \ --ntasks=${GPUS} \ --ntasks-per-node=${GPUS_PER_NODE} \ --cpus-per-task=${CPUS_PER_TASK} \ --kill-on-bad-exit=1 \ --nodelist=node[004-005] \ ${SRUN_ARGS} \ python -u main.py --launcher="slurm" #${PY_ARGS} #srun -p defq -J test -n 2 --nodelist=node[004-005] --ntasks-per-node=2 --export=cuda_home python -u test_slurm.py #srun --partition=defq --job-name=rain -n 1 --nodelist=node004 --gres=gpu:8 --ntasks-per-node=8 python -u derain_main.py --launcher="slurm #sed -i "s/\r//" slurm_train.sh # srun -p defq -J test -n 1 --nodelist=node[004] --ntasks-per-node=1 python -u derain_main.py --launcher slurm #srun -p defq -J test -n 2 --nodelist=node[004-005] --ntasks-per-node=1 python -u test_slurm.py ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/snmn_d.sh ================================================ python -m launch --nproc_per_node 8 --master_port 27890 main.py ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/variance_sacling_initializer.py ================================================ # GPL License # Copyright (C) 2021 , UESTC # All Rights Reserved # @Author : Xiao Wu, LiangJian Deng # @reference: import torch import torch.nn as nn import math def truncated_normal_(tensor, mean=0.0, std=1.0): with torch.no_grad(): size = tensor.shape tmp = tensor.new_empty(size + (4,)).normal_() valid = (tmp < 2) & (tmp > -2) ind = valid.max(-1, keepdim=True)[1] tensor.data.copy_(tmp.gather(-1, ind).squeeze(-1)) tensor.data.mul_(std).add_(mean) return tensor def variance_scaling_initializer(tensor): from scipy.stats import truncnorm def calculate_fan(shape, factor=2.0, mode='FAN_IN', uniform=False): # 64 9 3 3 -> 3 3 9 64 # 64 64 3 3 -> 3 3 64 64 if shape: # fan_in = float(shape[1]) if len(shape) > 1 else float(shape[0]) # fan_out = float(shape[0]) fan_in = float(shape[-2]) if len(shape) > 1 else float(shape[-1]) fan_out = float(shape[-1]) else: fan_in = 1.0 fan_out = 1.0 for dim in shape[:-2]: fan_in *= float(dim) fan_out *= float(dim) if mode == 'FAN_IN': # Count only number of input connections. n = fan_in elif mode == 'FAN_OUT': # Count only number of output connections. n = fan_out elif mode == 'FAN_AVG': # Average number of inputs and output connections. n = (fan_in + fan_out) / 2.0 if uniform: raise NotImplemented # # To get stddev = math.sqrt(factor / n) need to adjust for uniform. # limit = math.sqrt(3.0 * factor / n) # return random_ops.random_uniform(shape, -limit, limit, # dtype, seed=seed) else: # To get stddev = math.sqrt(factor / n) need to adjust for truncated. trunc_stddev = math.sqrt(1.3 * factor / n) return fan_in, fan_out, trunc_stddev def variance_scaling(x, scale=1.0, mode="fan_in", distribution="truncated_normal", seed=None): # fan_in, fan_out = torch.nn.init._calculate_fan_in_and_fan_out(x) x = x.permute(3, 2, 1, 0) # .permute(2, 3, 1, 0) fan_in, fan_out, trunc_stddev = calculate_fan(x.shape) # print(trunc_stddev) # debug # if mode == "fan_in": # scale /= max(1., fan_in) # elif mode == "fan_out": # scale /= max(1., fan_out) # else: # scale /= max(1., (fan_in + fan_out) / 2.) # if distribution == "normal" or distribution == "truncated_normal": # # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.) # stddev = math.sqrt(scale) / .87962566103423978 # print(fan_in,fan_out,scale,stddev)#100,100,0.01,0.1136 truncated_normal_(x, 0.0, trunc_stddev) # 0.001) x = x.permute(3, 2, 0, 1) # print(x.min(), x.max())) # debug return x # /10*1.28 variance_scaling(tensor) return tensor ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Data/pansharpening/test_data/readme-test.txt ================================================ ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Data/pansharpening/training_data/readme-test.txt ================================================ ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/Data/pansharpening/validation_data/readme-test.txt ================================================ ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/CITATION.cff ================================================ cff-version: 1.2.0 message: "If you use this software, please cite it as below." authors: - name: "MMCV Contributors" title: "OpenMMLab Computer Vision Foundation" date-released: 2018-08-22 url: "https://github.com/open-mmlab/mmcv" license: Apache-2.0 ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/CONTRIBUTING.md ================================================ ## Contributing to OpenMMLab All kinds of contributions are welcome, including but not limited to the following. - Fix typo or bugs - Add documentation or translate the documentation into other languages - Add new features and components ### Workflow 1. fork and pull the latest OpenMMLab repository 2. checkout a new branch (do not use master branch for PRs) 3. commit your changes 4. create a PR ```{note} If you plan to add some new features that involve large changes, it is encouraged to open an issue for discussion first. ``` ### Code style #### Python We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style. We use the following tools for linting and formatting: - [flake8](http://flake8.pycqa.org/en/latest/): A wrapper around some linter tools. - [yapf](https://github.com/google/yapf): A formatter for Python files. - [isort](https://github.com/timothycrosley/isort): A Python utility to sort imports. - [markdownlint](https://github.com/markdownlint/markdownlint): A linter to check markdown files and flag style issues. - [docformatter](https://github.com/myint/docformatter): A formatter to format docstring. Style configurations of yapf and isort can be found in [setup.cfg](./setup.cfg). We use [pre-commit hook](https://pre-commit.com/) that checks and formats for `flake8`, `yapf`, `isort`, `trailing whitespaces`, `markdown files`, fixes `end-of-files`, `double-quoted-strings`, `python-encoding-pragma`, `mixed-line-ending`, sorts `requirments.txt` automatically on every commit. The config for a pre-commit hook is stored in [.pre-commit-config](./.pre-commit-config.yaml). After you clone the repository, you will need to install initialize pre-commit hook. ```shell pip install -U pre-commit ``` From the repository folder ```shell pre-commit install ``` Try the following steps to install ruby when you encounter an issue on installing markdownlint ```shell # install rvm curl -L https://get.rvm.io | bash -s -- --autolibs=read-fail [[ -s "$HOME/.rvm/scripts/rvm" ]] && source "$HOME/.rvm/scripts/rvm" rvm autolibs disable # install ruby rvm install 2.7.1 ``` Or refer to [this repo](https://github.com/innerlee/setup) and take [`zzruby.sh`](https://github.com/innerlee/setup/blob/master/zzruby.sh) according its instruction. After this on every commit check code linters and formatter will be enforced. >Before you create a PR, make sure that your code lints and is formatted by yapf. #### C++ and CUDA We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/Dockerfile ================================================ FROM python:3.7 WORKDIR /mmcv COPY . /mmcv RUN pip install -e . ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/Jenkinsfile ================================================ def docker_images = ["registry.cn-hangzhou.aliyuncs.com/sensetime/openmmlab:cuda10.1-cudnn7-devel-ubuntu18.04-py37-pt1.3", "registry.cn-hangzhou.aliyuncs.com/sensetime/openmmlab:cuda10.2-cudnn7-devel-ubuntu18.04-py37-pt1.5"] def torch_versions = ["1.3.0", "1.5.0"] def torchvision_versions = ["0.4.2", "0.6.0"] def get_stages(docker_image, folder) { def pip_mirror = "-i https://mirrors.aliyun.com/pypi/simple" stages = { docker.image(docker_image).inside('-u root --gpus all --net host') { sh "rm -rf ${env.WORKSPACE}-${folder} ${env.WORKSPACE}-${folder}@tmp" sh "cp -r ${env.WORKSPACE} ${env.WORKSPACE}-${folder}" try { dir("${env.WORKSPACE}-${folder}") { stage("before_install") { sh "apt-get update && apt-get install -y ninja-build" } stage("dependencies") { // torch and torchvision are pre-installed in dockers sh "pip list | grep torch" sh "apt-get install -y ffmpeg libturbojpeg" sh "pip install pytest coverage lmdb PyTurboJPEG Cython ${pip_mirror}" } stage("build") { sh "MMCV_WITH_OPS=1 pip install -e . ${pip_mirror}" } stage("test") { sh "coverage run --branch --source=mmcv -m pytest tests/" sh "coverage xml" sh "coverage report -m" } } } finally { sh "rm -rf ${env.WORKSPACE}-${folder} ${env.WORKSPACE}-${folder}@tmp" } } } return stages } node('master') { // fetch latest change from SCM (Source Control Management) checkout scm def stages = [:] for (int i = 0; i < docker_images.size(); i++) { def docker_image = docker_images[i] def torch = torch_versions[i] def torchvision = torchvision_versions[i] def tag = docker_image + '_' + torch + '_' + torchvision def folder = "${i}" stages[tag] = get_stages(docker_image, folder) } parallel stages } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/LICENSE ================================================ Copyright (c) OpenMMLab. All rights reserved Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2018-2020 Open-MMLab. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/LICENSES.md ================================================ # Licenses for special operations In this file, we list the operations with other licenses instead of Apache 2.0. Users should be careful about adopting these operations in any commercial matters. | Operation | Files | License | | :--------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------: | :------------: | | upfirdn2d | [mmcv/ops/csrc/pytorch/cuda/upfirdn2d_kernel.cu](https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/csrc/pytorch/cuda/upfirdn2d_kernel.cu) | NVIDIA License | | fused_leaky_relu | [mmcv/ops/csrc/pytorch/cuda/fused_bias_leakyrelu_cuda.cu](https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/csrc/pytorch/cuda/fused_bias_leakyrelu_cuda.cu) | NVIDIA License | ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/MANIFEST.in ================================================ include requirements/runtime.txt include mmcv/model_zoo/open_mmlab.json mmcv/model_zoo/deprecated.json mmcv/model_zoo/mmcls.json include mmcv/ops/csrc/common/cuda/*.cuh mmcv/ops/csrc/common/cuda/*.hpp mmcv/ops/csrc/common/*.hpp include mmcv/ops/csrc/pytorch/*.cpp mmcv/ops/csrc/pytorch/cuda/*.cu mmcv/ops/csrc/pytorch/cuda/*.cpp mmcv/ops/csrc/pytorch/cpu/*.cpp include mmcv/ops/csrc/parrots/*.h mmcv/ops/csrc/parrots/*.cpp ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/README_zh-CN.md ================================================
 
OpenMMLab 官网 HOT      OpenMMLab 开放平台 TRY IT OUT
 
[![docs](https://img.shields.io/badge/docs-latest-blue)](https://mmcv.readthedocs.io/zh_CN/latest/) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/mmcv)](https://pypi.org/project/mmcv/) [![PyPI](https://img.shields.io/pypi/v/mmcv)](https://pypi.org/project/mmcv) [![badge](https://github.com/open-mmlab/mmcv/workflows/build/badge.svg)](https://github.com/open-mmlab/mmcv/actions) [![codecov](https://codecov.io/gh/open-mmlab/mmcv/branch/master/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmcv) [![license](https://img.shields.io/github/license/open-mmlab/mmcv.svg)](https://github.com/open-mmlab/mmcv/blob/master/LICENSE) [English](README.md) | 简体中文 ## 简介 MMCV 是一个面向计算机视觉的基础库,它支持了很多开源项目,例如: - [MIM](https://github.com/open-mmlab/mim): OpenMMLab 项目、算法、模型的统一入口 - [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab 图像分类工具箱与测试基准 - [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab 检测工具箱与测试基准 - [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab 新一代通用3D目标检测平台 - [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab 语义分割工具箱与测试基准 - [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab 新一代视频理解工具箱与测试基准 - [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab 一体化视频目标感知平台 - [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab 姿态估计工具箱与测试基准 - [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab 图像视频编辑工具箱 - [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab 全流程文字检测识别理解工具包 - [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab 新一代生成模型工具箱 - [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab 光流估计工具箱与测试基准 - [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab 少样本学习工具箱与测试基准 - [MMHuman3D](https://github.com/open-mmlab/mmhuman3d): OpenMMLab 人体参数化模型工具箱与测试基准 - [MMSelfSup](https://github.com/open-mmlab/mmselfsup): OpenMMLab 自监督学习工具箱与测试基准 - [MMRazor](https://github.com/open-mmlab/mmrazor): OpenMMLab 模型压缩工具箱与测试基准 - [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab 模型部署框架 MMCV 提供了如下众多功能: - 通用的 IO 接口 - 图像和视频处理 - 图像和标注结果可视化 - 常用小工具(进度条,计时器等) - 基于 PyTorch 的通用训练框架 - 多种 CNN 网络结构 - 高质量实现的常见 CUDA 算子 如想了解更多特性和使用,请参考[文档](http://mmcv.readthedocs.io/zh_CN/latest)。 提示: MMCV 需要 Python 3.6 以上版本。 ## 安装 MMCV 有两个版本: - **mmcv-full**: 完整版,包含所有的特性以及丰富的开箱即用的 CUDA 算子。注意完整版本可能需要更长时间来编译。 - **mmcv**: 精简版,不包含 CUDA 算子但包含其余所有特性和功能,类似 MMCV 1.0 之前的版本。如果你不需要使用 CUDA 算子的话,精简版可以作为一个考虑选项。 **注意**: 请不要在同一个环境中安装两个版本,否则可能会遇到类似 `ModuleNotFound` 的错误。在安装一个版本之前,需要先卸载另一个。`如果CUDA可用,强烈推荐安装mmcv-full`。 a. 安装完整版 在安装 mmcv-full 之前,请确保 PyTorch 已经成功安装在环境中,可以参考 PyTorch [官方文档](https://pytorch.org/)。 我们提供了不同 PyTorch 和 CUDA 版本的 mmcv-full 预编译包,可以大大简化用户安装编译过程。强烈推荐通过预编译包来安装。另外,安装完成后可以运行 [check_installation.py](.dev_scripts/check_installation.py) 脚本检查 mmcv-full 是否安装成功。 i. 安装最新版本 如下是安装最新版 ``mmcv-full`` 的命令 ```shell pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html ``` 请将链接中的 ``{cu_version}`` 和 ``{torch_version}`` 根据自身需求替换成实际的版本号,例如想安装和 ``CUDA 11.1``、``PyTorch 1.9.0`` 兼容的最新版 ``mmcv-full``,使用如下替换过的命令 ```shell pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html ``` **注意**: PyTorch 在 1.x.0 和 1.x.1 之间通常是兼容的,故 mmcv-full 只提供 1.x.0 的编译包。如果你的 PyTorch 版本是 1.x.1,你可以放心地安装在 1.x.0 版本编译的 mmcv-full。例如,如果你的 PyTorch 版本是 1.8.1、CUDA 版本是 11.1,你可以使用以下命令安装 mmcv-full。 ```shell pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html ``` 如果想知道更多 CUDA 和 PyTorch 版本的命令,可以参考下面的表格,将链接中的 ``=={mmcv_version}`` 删去即可。 ii. 安装特定的版本 如下是安装特定版本 ``mmcv-full`` 的命令 ```shell pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html ``` 首先请参考版本发布信息找到想要安装的版本号,将 ``{mmcv_version}`` 替换成该版本号,例如 ``1.3.9``。 然后将链接中的 ``{cu_version}`` 和 ``{torch_version}`` 根据自身需求替换成实际的版本号,例如想安装和 ``CUDA 11.1``、``PyTorch 1.9.0`` 兼容的 ``mmcv-full`` 1.3.9 版本,使用如下替换过的命令 ```shell pip install mmcv-full==1.3.9 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html ``` 对于更多的 PyTorch 和 CUDA 版本组合,请参考下表:
CUDA torch 1.10 torch 1.9 torch 1.8 torch 1.7 torch 1.6 torch 1.5
11.3
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu113/torch1.10.0/index.html
11.1
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.10.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html
11.0
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu110/torch1.7.0/index.html
10.2
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.10.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.9.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.7.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.6.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.5.0/index.html
10.1
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.8.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.7.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.6.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.5.0/index.html
9.2
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.7.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.6.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.5.0/index.html
cpu
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.10.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.9.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.8.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.7.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.6.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.5.0/index.html
**注意**:以上提供的预编译包并不囊括所有的 mmcv-full 版本,你可以点击对应链接查看支持的版本。例如,点击 [cu102-torch1.8.0](https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html),可以看到 `cu102-torch1.8.0` 只提供了 1.3.0 及以上的 mmcv-full 版本。另外,从 `mmcv v1.3.17` 开始,我们不再提供`PyTorch 1.3 & 1.4` 对应的 mmcv-full 预编译包。你可以在 [这](./docs/zh_cn/get_started/previous_versions.md) 找到 `PyTorch 1.3 & 1.4` 对应的预编包。虽然我们不再提供 `PyTorch 1.3 & 1.4` 对应的预编译包,但是我们依然在 CI 中保证对它们的兼容持续到下一年。 除了使用预编译包之外,另一种方式是在本地进行编译,直接运行下述命令 ```python pip install mmcv-full ``` 但注意本地编译可能会耗时 10 分钟以上。 b. 安装精简版 ```python pip install mmcv ``` c. 安装完整版并且编译 onnxruntime 的自定义算子 - 详细的指南请查看[这里](docs/zh_cn/deployment/onnxruntime_op.md)。 如果想从源码编译 MMCV,请参考[该文档](https://mmcv.readthedocs.io/zh_CN/latest/get_started/build.html)。 ## FAQ 如果你遇到了安装问题,CUDA 相关的问题或者 RuntimeErrors,可以首先参考[问题解决页面](https://mmcv.readthedocs.io/zh_CN/latest/faq.html) 看是否已经有解决方案。 ## 贡献指南 我们感谢所有的贡献者为改进和提升 MMCV 所作出的努力。请参考[贡献指南](CONTRIBUTING.md)来了解参与项目贡献的相关指引。 ## 许可证 `MMCV` 目前以 Apache 2.0 的许可证发布,但是其中有一部分功能并不是使用的 Apache2.0 许可证,我们在 [许可证](LICENSES.md) 中详细地列出了这些功能以及他们对应的许可证,如果您正在从事盈利性活动,请谨慎参考此文档。 ## 欢迎加入 OpenMMLab 社区 扫描下方的二维码可关注 OpenMMLab 团队的 [知乎官方账号](https://www.zhihu.com/people/openmmlab),加入 OpenMMLab 团队的 [官方交流 QQ 群](https://jq.qq.com/?_wv=1027&k=GJP18SjI)
我们会在 OpenMMLab 社区为大家 - 📢 分享 AI 框架的前沿核心技术 - 💻 解读 PyTorch 常用模块源码 - 📰 发布 OpenMMLab 的相关新闻 - 🚀 介绍 OpenMMLab 开发的前沿算法 - 🏃 获取更高效的问题答疑和意见反馈 - 🔥 提供与各行各业开发者充分交流的平台 干货满满 📘,等你来撩 💗,OpenMMLab 社区期待您的加入 👬 ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/TERMINOLOGY.md ================================================ # English-Chinese terminology comparison (英汉术语对照) This document is used as a reference for English-Chinese terminology translation. 该文档用作中英文翻译对照参考。 | English | 中文 | | :-----: | :---:| | annotation | 标注 | | backbone | 主干网络 | | benchmark | 基准测试 | | checkpoint | 模型权重文件 | | classifier | 分类器 | | cls_head | 分类头 | | decoder | 解码器 | | detector | 检测器 | | encoder | 编码器 | | finetune | 微调 | | ground truth | 真实标签 | | hook | 钩子 | | localizer | 定位器 | | neck | 模型颈部 | | pipeline | 流水线 | | recognizer | 识别器 | | register | 注册器 | | schedule | 调整 | | scheduler | 调度器 | | segmentor | 分割器 | | tensor | 张量 | | training schedule | 训练策略 | ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/Makefile ================================================ # Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/_static/css/readthedocs.css ================================================ .header-logo { background-image: url("../image/mmcv-logo.png"); background-size: 85px 40px; height: 40px; width: 85px; } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/api.rst ================================================ fileio ------- .. automodule:: mmcv.fileio :members: image ------ .. automodule:: mmcv.image :members: video ------ .. automodule:: mmcv.video :members: arraymisc --------- .. automodule:: mmcv.arraymisc :members: visualization -------------- .. automodule:: mmcv.visualization :members: utils ----- .. automodule:: mmcv.utils :members: cnn ---- .. automodule:: mmcv.cnn :members: runner ------ .. automodule:: mmcv.runner :members: engine ------ .. automodule:: mmcv.engine :members: ops ------ .. automodule:: mmcv.ops :members: ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/community/pr.md ================================================ ## Pull Request (PR) ### What is PR `PR` is the abbreviation of `Pull Request`. Here's the definition of `PR` in the [official document](https://docs.github.com/en/github/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests) of Github. ``` Pull requests let you tell others about changes you have pushed to a branch in a repository on GitHub. Once a pull request is opened, you can discuss and review the potential changes with collaborators and add follow-up commits before your changes are merged into the base branch. ``` ### Basic Workflow 1. Get the most recent codebase 2. Checkout a new branch from the master branch 3. Commit your changes 4. Push your changes and create a PR 5. Discuss and review your code 6. Merge your branch to the master branch ### Procedures in detail #### 1. Get the most recent codebase + When you work on your first PR Fork the OpenMMLab repository: click the **fork** button at the top right corner of Github page ![avatar](../_static/community/1.png) Clone forked repository to local ```bash git clone git@github.com:XXX/mmcv.git ``` Add source repository to upstream ```bash git remote add upstream git@github.com:open-mmlab/mmcv ``` + After your first PR Checkout master branch of the local repository and pull the latest master branch of the source repository ```bash git checkout master git pull upstream master ``` #### 2. Checkout a new branch from the master branch ```bash git checkout -b branchname ``` ```{tip} To make commit history clear, we strongly recommend you checkout the master branch before create a new branch. ``` #### 3. Commit your changes ```bash # coding git add [files] git commit -m 'messages' ``` #### 4. Push your changes to the forked repository and create a PR + Push the branch to your forked remote repository ```bash git push origin branchname ``` + Create a PR ![avatar](../_static/community/2.png) + Revise PR message template to describe your motivation and modifications made in this PR. You can also link the related issue to the PR manually in the PR message (For more information, checkout the [official guidance](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue)). #### 5. Discuss and review your code + After creating a pull request, you can ask a specific person to review the changes you've proposed ![avatar](../_static/community/3.png) + Modify your codes according to reviewers' suggestions and then push your changes #### 6. Merge your branch to the master branch and delete the branch ```bash git branch -d branchname # delete local branch git push origin --delete branchname # delete remote branch ``` ### PR Specs 1. Use [pre-commit](https://pre-commit.com) hook to avoid issues of code style 2. One short-time branch should be matched with only one PR 3. Accomplish a detailed change in one PR. Avoid large PR + Bad: Support Faster R-CNN + Acceptable: Add a box head to Faster R-CNN + Good: Add a parameter to box head to support custom conv-layer number 4. Provide clear and significant commit message 5. Provide clear and meaningful PR description + Task name should be clarified in title. The general format is: [Prefix] Short description of the PR (Suffix) + Prefix: add new feature [Feature], fix bug [Fix], related to documents [Docs], in developing [WIP] (which will not be reviewed temporarily) + Introduce main changes, results and influences on other modules in short description + Associate related issues and pull requests with a milestone ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/compatibility.md ================================================ ### v1.3.18 Some ops have different implementations on different devices. Lots of macros and type checks are scattered in several files, which makes the code hard to maintain. For example: ```c++ if (input.device().is_cuda()) { #ifdef MMCV_WITH_CUDA CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(rois); CHECK_CUDA_INPUT(output); CHECK_CUDA_INPUT(argmax_y); CHECK_CUDA_INPUT(argmax_x); roi_align_forward_cuda(input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); #else AT_ERROR("RoIAlign is not compiled with GPU support"); #endif } else { CHECK_CPU_INPUT(input); CHECK_CPU_INPUT(rois); CHECK_CPU_INPUT(output); CHECK_CPU_INPUT(argmax_y); CHECK_CPU_INPUT(argmax_x); roi_align_forward_cpu(input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); } ``` Registry and dispatcher are added to manage these implementations. ```c++ void ROIAlignForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned); void roi_align_forward_cuda(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned) { ROIAlignForwardCUDAKernelLauncher( input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); } // register cuda implementation void roi_align_forward_impl(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned); REGISTER_DEVICE_IMPL(roi_align_forward_impl, CUDA, roi_align_forward_cuda); // roi_align.cpp // use the dispatcher to invoke different implementation depending on device type of input tensors. void roi_align_forward_impl(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned) { DISPATCH_DEVICE_IMPL(roi_align_forward_impl, input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); } ``` ### v1.3.11 In order to flexibly support more backends and hardwares like `NVIDIA GPUs` and `AMD GPUs`, the directory of `mmcv/ops/csrc` is refactored. Note that this refactoring will not affect the usage in API. For related information, please refer to [PR1206](https://github.com/open-mmlab/mmcv/pull/1206). The original directory was organized as follows. ``` . ├── common_cuda_helper.hpp ├── ops_cuda_kernel.cuh ├── pytorch_cpp_helper.hpp ├── pytorch_cuda_helper.hpp ├── parrots_cpp_helper.hpp ├── parrots_cuda_helper.hpp ├── parrots_cudawarpfunction.cuh ├── onnxruntime │   ├── onnxruntime_register.h │   ├── onnxruntime_session_options_config_keys.h │   ├── ort_mmcv_utils.h │   ├── ... │   ├── onnx_ops.h │   └── cpu │ ├── onnxruntime_register.cpp │      ├── ... │      └── onnx_ops_impl.cpp ├── parrots │   ├── ... │   ├── ops.cpp │   ├── ops_cuda.cu │   ├── ops_parrots.cpp │   └── ops_pytorch.h ├── pytorch │   ├── ... │   ├── ops.cpp │   ├── ops_cuda.cu │   ├── pybind.cpp └── tensorrt ├── trt_cuda_helper.cuh ├── trt_plugin_helper.hpp ├── trt_plugin.hpp ├── trt_serialize.hpp ├── ... ├── trt_ops.hpp └── plugins    ├── trt_cuda_helper.cu    ├── trt_plugin.cpp    ├── ...    ├── trt_ops.cpp    └── trt_ops_kernel.cu ``` After refactored, it is organized as follows. ``` . ├── common │ ├── box_iou_rotated_utils.hpp │ ├── parrots_cpp_helper.hpp │ ├── parrots_cuda_helper.hpp │ ├── pytorch_cpp_helper.hpp │ ├── pytorch_cuda_helper.hpp │   └── cuda │   ├── common_cuda_helper.hpp │   ├── parrots_cudawarpfunction.cuh │   ├── ... │   └── ops_cuda_kernel.cuh ├── onnxruntime │   ├── onnxruntime_register.h │   ├── onnxruntime_session_options_config_keys.h │   ├── ort_mmcv_utils.h │   ├── ... │   ├── onnx_ops.h │   └── cpu │ ├── onnxruntime_register.cpp │      ├── ... │      └── onnx_ops_impl.cpp ├── parrots │   ├── ... │   ├── ops.cpp │   ├── ops_parrots.cpp │   └── ops_pytorch.h ├── pytorch │   ├── info.cpp │   ├── pybind.cpp │   ├── ... │   ├── ops.cpp │   └── cuda │      ├── ... │      └── ops_cuda.cu └── tensorrt ├── trt_cuda_helper.cuh ├── trt_plugin_helper.hpp ├── trt_plugin.hpp ├── trt_serialize.hpp ├── ... ├── trt_ops.hpp └── plugins    ├── trt_cuda_helper.cu    ├── trt_plugin.cpp    ├── ...    ├── trt_ops.cpp    └── trt_ops_kernel.cu ``` ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/conf.py ================================================ # # Configuration file for the Sphinx documentation builder. # # This file does only contain a selection of the most common options. For a # full list see the documentation: # http://www.sphinx-doc.org/en/master/config # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os import sys import pytorch_sphinx_theme from sphinx.builders.html import StandaloneHTMLBuilder sys.path.insert(0, os.path.abspath('../..')) version_file = '../../mmcv/version.py' with open(version_file, 'r') as f: exec(compile(f.read(), version_file, 'exec')) __version__ = locals()['__version__'] # -- Project information ----------------------------------------------------- project = 'mmcv' copyright = '2018-2021, OpenMMLab' author = 'MMCV Authors' # The short X.Y version version = __version__ # The full version, including alpha/beta/rc tags release = __version__ # -- General configuration --------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.napoleon', 'sphinx.ext.viewcode', 'sphinx_markdown_tables', 'myst_parser', 'sphinx_copybutton', ] # yapf: disable autodoc_mock_imports = ['mmcv._ext', 'mmcv.utils.ext_loader', 'torchvision'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # source_suffix = { '.rst': 'restructuredtext', '.md': 'markdown', } # The master toctree document. master_doc = 'index' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # # html_theme = 'sphinx_rtd_theme' html_theme = 'pytorch_sphinx_theme' html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()] # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = { 'menu': [ { 'name': 'GitHub', 'url': 'https://github.com/open-mmlab/mmcv' }, ], # Specify the language of shared menu 'menu_lang': 'en', } # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] html_css_files = ['css/readthedocs.css'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. # # The default sidebars (for documents that don't match any pattern) are # defined by theme itself. Builtin themes are using these templates by # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', # 'searchbox.html']``. # # html_sidebars = {} # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. htmlhelp_basename = 'mmcvdoc' # -- Options for LaTeX output ------------------------------------------------ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'mmcv.tex', 'mmcv Documentation', 'MMCV Contributors', 'manual'), ] # -- Options for manual page output ------------------------------------------ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [(master_doc, 'mmcv', 'mmcv Documentation', [author], 1)] # -- Options for Texinfo output ---------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'mmcv', 'mmcv Documentation', author, 'mmcv', 'One line description of project.', 'Miscellaneous'), ] # -- Options for Epub output ------------------------------------------------- # Bibliographic Dublin Core info. epub_title = project # The unique identifier of the text. This can be a ISBN number # or the project homepage. # # epub_identifier = '' # A unique identification for the text. # # epub_uid = '' # A list of files that should not be packed into the epub file. epub_exclude_files = ['search.html'] # set priority when building html StandaloneHTMLBuilder.supported_image_types = [ 'image/svg+xml', 'image/gif', 'image/png', 'image/jpeg' ] # -- Extension configuration ------------------------------------------------- # Ignore >>> when copying code copybutton_prompt_text = r'>>> |\.\.\. ' copybutton_prompt_is_regexp = True ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/deployment/mmcv_ops_definition.md ================================================ # MMCV Operators To make custom operators in MMCV more standard, precise definitions of each operator are listed in this document. - [MMCV Operators](#mmcv-operators) - [MMCVBorderAlign](#mmcvborderalign) - [Description](#description) - [Parameters](#parameters) - [Inputs](#inputs) - [Outputs](#outputs) - [Type Constraints](#type-constraints) - [MMCVCARAFE](#mmcvcarafe) - [Description](#description-1) - [Parameters](#parameters-1) - [Inputs](#inputs-1) - [Outputs](#outputs-1) - [Type Constraints](#type-constraints-1) - [MMCVCAWeight](#mmcvcaweight) - [Description](#description-2) - [Parameters](#parameters-2) - [Inputs](#inputs-2) - [Outputs](#outputs-2) - [Type Constraints](#type-constraints-2) - [MMCVCAMap](#mmcvcamap) - [Description](#description-3) - [Parameters](#parameters-3) - [Inputs](#inputs-3) - [Outputs](#outputs-3) - [Type Constraints](#type-constraints-3) - [MMCVCornerPool](#mmcvcornerpool) - [Description](#description-4) - [Parameters](#parameters-4) - [Inputs](#inputs-4) - [Outputs](#outputs-4) - [Type Constraints](#type-constraints-4) - [MMCVDeformConv2d](#mmcvdeformconv2d) - [Description](#description-5) - [Parameters](#parameters-5) - [Inputs](#inputs-5) - [Outputs](#outputs-5) - [Type Constraints](#type-constraints-5) - [MMCVModulatedDeformConv2d](#mmcvmodulateddeformconv2d) - [Description](#description-6) - [Parameters](#parameters-6) - [Inputs](#inputs-6) - [Outputs](#outputs-6) - [Type Constraints](#type-constraints-6) - [MMCVDeformRoIPool](#mmcvdeformroipool) - [Description](#description-7) - [Parameters](#parameters-7) - [Inputs](#inputs-7) - [Outputs](#outputs-7) - [Type Constraints](#type-constraints-7) - [MMCVMaskedConv2d](#mmcvmaskedconv2d) - [Description](#description-8) - [Parameters](#parameters-8) - [Inputs](#inputs-8) - [Outputs](#outputs-8) - [Type Constraints](#type-constraints-8) - [MMCVPSAMask](#mmcvpsamask) - [Description](#description-9) - [Parameters](#parameters-9) - [Inputs](#inputs-9) - [Outputs](#outputs-9) - [Type Constraints](#type-constraints-9) - [NonMaxSuppression](#nonmaxsuppression) - [Description](#description-10) - [Parameters](#parameters-10) - [Inputs](#inputs-10) - [Outputs](#outputs-10) - [Type Constraints](#type-constraints-10) - [MMCVRoIAlign](#mmcvroialign) - [Description](#description-11) - [Parameters](#parameters-11) - [Inputs](#inputs-11) - [Outputs](#outputs-11) - [Type Constraints](#type-constraints-11) - [MMCVRoIAlignRotated](#mmcvroialignrotated) - [Description](#description-12) - [Parameters](#parameters-12) - [Inputs](#inputs-12) - [Outputs](#outputs-12) - [Type Constraints](#type-constraints-12) - [grid_sampler*](#grid_sampler) - [Description](#description-13) - [Parameters](#parameters-13) - [Inputs](#inputs-13) - [Outputs](#outputs-13) - [Type Constraints](#type-constraints-13) - [cummax*](#cummax) - [Description](#description-14) - [Parameters](#parameters-14) - [Inputs](#inputs-14) - [Outputs](#outputs-14) - [Type Constraints](#type-constraints-14) - [cummin*](#cummin) - [Description](#description-15) - [Parameters](#parameters-15) - [Inputs](#inputs-15) - [Outputs](#outputs-15) - [Type Constraints](#type-constraints-15) - [Reminders](#reminders) ## MMCVBorderAlign ### Description Applies `border_align` over the input feature based on predicted bboxes. For each border line (e.g. top, left, bottom or right) of each box, border_align does the following: - uniformly samples `pool_size`+1 positions on this line, involving the start and end points. - the corresponding features on these points are computed by bilinear interpolation. - max pooling over all the `pool_size`+1 positions are used for computing pooled feature. Read [BorderDet: Border Feature for Dense Object Detection](ttps://arxiv.org/abs/2007.11056) for more detailed information. ### Parameters | Type | Parameter | Description | |-------|-------------|-------------------------------------------------------------------------------------| | `int` | `pool_size` | number of positions sampled over the boxes' borders(e.g. top, bottom, left, right). | ### Inputs
input: T
Features with shape [N,4C,H,W]. Channels ranged in [0,C), [C,2C), [2C,3C), [3C,4C) represent the top, left, bottom, right features respectively
boxes: T
Boxes with shape [N,H*W,4]. Coordinate format (x1,y1,x2,y2).
### Outputs
output: T
Pooled features with shape [N,C,H*W,4]. The order is(top,left,bottom,right) for the last dimension.
### Type Constraints - T:tensor(float32) ## MMCVCARAFE ### Description CARAFE operator performs feature upsampling. Read [CARAFE: Content-Aware ReAssembly of FEatures](https://arxiv.org/abs/1905.02188) for more detailed information. ### Parameters | Type | Parameter | Description | |---------|----------------|-----------------------------------------------| | `int` | `kernel_size` | reassemble kernel size, should be odd integer | | `int` | `group_size` | reassemble group size | | `float` | `scale_factor` | upsample ratio(>=1) | ### Inputs
features: T
Input features. 4-D tensor of shape (N, C, H, W). N is the batch size.
masks: T
The input mask
### Outputs
output: T
The upsampled features. 4-D tensor of shape (N, C, H * scale_factor, W * scale_factor). N is the batch size.
### Type Constraints - T:tensor(float32) ## MMCVCAWeight ### Description Operator for Criss-Cross Attention Read [CCNet: Criss-Cross Attention for SemanticSegmentation](https://arxiv.org/pdf/1811.11721.pdf) for more detailed information. ### Parameters None ### Inputs
t: T
The query matrix of shape (N, C', H, W).
f: T
The key matrix of shape (N, C', H, W).
### Outputs
weight: T
The attention map of shape (N, H+W-1, H, W).
### Type Constraints - T:tensor(float32) ## MMCVCAMap ### Description Operator for Criss-Cross Attention Read [CCNet: Criss-Cross Attention for SemanticSegmentation](https://arxiv.org/pdf/1811.11721.pdf) for more detailed information. ### Parameters None ### Inputs
weight: T
Output from the operator MMCVCAWeight.
value: T
The value matrix of shape (N, C, H, W).
### Outputs
output: T
Output tensor of aggregated contextual information
### Type Constraints - T:tensor(float32) ## MMCVCornerPool ### Description Perform CornerPool on `input` features. Read [CornerNet -- Detecting Objects as Paired Keypoints](https://arxiv.org/abs/1808.01244) for more details. ### Parameters | Type | Parameter | Description | |-------|-----------|------------------------------------------------------------------| | `int` | `mode` | corner pool mode, (0: `top`, 1: `bottom`, 2: `left`, 3: `right`) | ### Inputs
input: T
Input features. 4-D tensor of shape (N, C, H, W). N is the batch size.
### Outputs
output: T
The pooled features. 4-D tensor of shape (N, C, H, W).
### Type Constraints - T:tensor(float32) ## MMCVDeformConv2d ### Description Applies a deformable 2D convolution over an input signal composed of several input planes. Read [Deformable Convolutional Networks](https://arxiv.org/pdf/1703.06211.pdf) for detail. ### Parameters | Type | Parameter | Description | |----------------|---------------------|-------------------------------------------------------------------------------------------------------------------| | `list of ints` | `stride` | The stride of the convolving kernel, (sH, sW). Defaults to `(1, 1)`. | | `list of ints` | `padding` | Paddings on both sides of the input, (padH, padW). Defaults to `(0, 0)`. | | `list of ints` | `dilation` | The spacing between kernel elements (dH, dW). Defaults to `(1, 1)`. | | `int` | `groups` | Split input into groups. `input_channel` should be divisible by the number of groups. Defaults to `1`. | | `int` | `deformable_groups` | Groups of deformable offset. Defaults to `1`. | | `int` | `bias` | Whether to add a learnable bias to the output. `0` stands for `False` and `1` stands for `True`. Defaults to `0`. | | `int` | `im2col_step` | Groups of deformable offset. Defaults to `32`. | ### Inputs
input: T
Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the number of channels, inH and inW are the height and width of the data.
offset: T
Input offset; 4-D tensor of shape (N, deformable_group* 2* kH* kW, outH, outW), where kH and kW are the height and width of weight, outH and outW is the height and width of offset and output.
weight: T
Input weight; 4-D tensor of shape (output_channel, input_channel, kH, kW).
### Outputs
output: T
Output feature; 4-D tensor of shape (N, output_channel, outH, outW).
### Type Constraints - T:tensor(float32, Linear) ## MMCVModulatedDeformConv2d ### Description Perform Modulated Deformable Convolution on input feature, read [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/abs/1811.11168?from=timeline) for detail. ### Parameters | Type | Parameter | Description | |----------------|---------------------|---------------------------------------------------------------------------------------| | `list of ints` | `stride` | The stride of the convolving kernel. (sH, sW) | | `list of ints` | `padding` | Paddings on both sides of the input. (padH, padW) | | `list of ints` | `dilation` | The spacing between kernel elements. (dH, dW) | | `int` | `deformable_groups` | Groups of deformable offset. | | `int` | `groups` | Split input into groups. `input_channel` should be divisible by the number of groups. | ### Inputs
feature: T
Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the number of channels, inH and inW are the height and width of the data.
offset: T
Input offset; 4-D tensor of shape (N, deformable_group* 2* kH* kW, outH, outW), where kH and kW are the height and width of weight, outH and outW are the height and width of offset and output.
mask: T
Input mask; 4-D tensor of shape (N, deformable_group* kH* kW, outH, outW), where kH and kW are the height and width of weight, outH and outW are the height and width of offset and output.
weight]: T
Input weight; 4-D tensor of shape (output_channel, input_channel, kH, kW).
bias: T, optional
Input bias; 1-D tensor of shape (output_channel).
### Outputs
output: T
Output feature; 4-D tensor of shape (N, output_channel, outH, outW).
### Type Constraints - T:tensor(float32, Linear) ## MMCVDeformRoIPool ### Description Deformable roi pooling layer ### Parameters | Type | Parameter | Description | |---------|------------------|---------------------------------------------------------------------------------------------------------------| | `int` | `output_height` | height of output roi | | `int` | `output_width` | width of output roi | | `float` | `spatial_scale` | used to scale the input boxes | | `int` | `sampling_ratio` | number of input samples to take for each output sample. `0` means to take samples densely for current models. | | `float` | `gamma` | gamma | ### Inputs
input: T
Input feature map; 4D tensor of shape (N, C, H, W), where N is the batch size, C is the numbers of channels, H and W are the height and width of the data.
rois: T
RoIs (Regions of Interest) to pool over; 2-D tensor of shape (num_rois, 5) given as [[batch_index, x1, y1, x2, y2], ...]. The RoIs' coordinates are the coordinate system of input.
offset: T
offset of height and width. Defaults to a tensor of zero
### Outputs
feat: T
RoI pooled output, 4-D tensor of shape (num_rois, C, output_height, output_width). The r-th batch element feat[r-1] is a pooled feature map corresponding to the r-th RoI RoIs[r-1].
### Type Constraints - T:tensor(float32) ## MMCVMaskedConv2d ### Description Performs a masked 2D convolution from PixelRNN Read [Pixel Recurrent Neural Networks](https://arxiv.org/abs/1601.06759) for more detailed information. ### Parameters | Type | Parameter | Description | |----------------|-----------|----------------------------------------------------------------------------------| | `list of ints` | `stride` | The stride of the convolving kernel. (sH, sW). **Only support stride=1 in mmcv** | | `list of ints` | `padding` | Paddings on both sides of the input. (padH, padW). Defaults to `(0, 0)`. | ### Inputs
features: T
Input features; 4D tensor of shape (N, C, H, W), where N is the batch size, C is the numbers of channels, H and W are the height and width of the data.
mask: T
Input mask; 3D tensor of shape (N, H, W)
weight: T
The learnable weights of the module
bias: T
The learnable bias of the module
### Outputs
output: T
The output convolved feature
### Type Constraints - T:tensor(float32) ## MMCVPSAMask ### Description An operator from PSANet. Read [PSANet: Point-wise Spatial Attention Network for Scene Parsing](https://hszhao.github.io/papers/eccv18_psanet.pdf) for more detailed information. ### Parameters | Type | Parameter | Description | |----------------|-------------|----------------------------------------------| | `int` | `psa_type` | `0` means collect and `1` means `distribute` | | `list of ints` | `mask_size` | The size of mask | ### Inputs
input: T
Input feature; 4D tensor of shape (N, C, H, W), where N is the batch size, C is the numbers of channels, H and W are the height and width of the data.
### Outputs
output: T
Output tensor of shape (N, H * W, H, W)
### Type Constraints - T:tensor(float32) ## NonMaxSuppression ### Description Filter out boxes has high IoU overlap with previously selected boxes or low score. Output the indices of valid boxes. Note this definition is slightly different with [onnx: NonMaxSuppression](https://github.com/onnx/onnx/blob/master/docs/Operators.md#nonmaxsuppression) ### Parameters | Type | Parameter | Description | |---------|------------------------------|--------------------------------------------------------------------------------------------------------------------------------------| | `int` | `center_point_box` | 0 - the box data is supplied as [y1, x1, y2, x2], 1-the box data is supplied as [x_center, y_center, width, height]. | | `int` | `max_output_boxes_per_class` | The maximum number of boxes to be selected per batch per class. Default to 0, number of output boxes equal to number of input boxes. | | `float` | `iou_threshold` | The threshold for deciding whether boxes overlap too much with respect to IoU. Value range [0, 1]. Default to 0. | | `float` | `score_threshold` | The threshold for deciding when to remove boxes based on score. | | `int` | `offset` | 0 or 1, boxes' width or height is (x2 - x1 + offset). | ### Inputs
boxes: T
Input boxes. 3-D tensor of shape (num_batches, spatial_dimension, 4).
scores: T
Input scores. 3-D tensor of shape (num_batches, num_classes, spatial_dimension).
### Outputs
indices: tensor(int32, Linear)
Selected indices. 2-D tensor of shape (num_selected_indices, 3) as [[batch_index, class_index, box_index], ...].
num_selected_indices=num_batches* num_classes* min(max_output_boxes_per_class, spatial_dimension).
All invalid indices will be filled with -1.
### Type Constraints - T:tensor(float32, Linear) ## MMCVRoIAlign ### Description Perform RoIAlign on output feature, used in bbox_head of most two-stage detectors. ### Parameters | Type | Parameter | Description | |---------|------------------|---------------------------------------------------------------------------------------------------------------| | `int` | `output_height` | height of output roi | | `int` | `output_width` | width of output roi | | `float` | `spatial_scale` | used to scale the input boxes | | `int` | `sampling_ratio` | number of input samples to take for each output sample. `0` means to take samples densely for current models. | | `str` | `mode` | pooling mode in each bin. `avg` or `max` | | `int` | `aligned` | If `aligned=0`, use the legacy implementation in MMDetection. Else, align the results more perfectly. | ### Inputs
input: T
Input feature map; 4D tensor of shape (N, C, H, W), where N is the batch size, C is the numbers of channels, H and W are the height and width of the data.
rois: T
RoIs (Regions of Interest) to pool over; 2-D tensor of shape (num_rois, 5) given as [[batch_index, x1, y1, x2, y2], ...]. The RoIs' coordinates are the coordinate system of input.
### Outputs
feat: T
RoI pooled output, 4-D tensor of shape (num_rois, C, output_height, output_width). The r-th batch element feat[r-1] is a pooled feature map corresponding to the r-th RoI RoIs[r-1].
### Type Constraints - T:tensor(float32) ## MMCVRoIAlignRotated ### Description Perform RoI align pooling for rotated proposals ### Parameters | Type | Parameter | Description | |---------|------------------|---------------------------------------------------------------------------------------------------------------| | `int` | `output_height` | height of output roi | | `int` | `output_width` | width of output roi | | `float` | `spatial_scale` | used to scale the input boxes | | `int` | `sampling_ratio` | number of input samples to take for each output sample. `0` means to take samples densely for current models. | | `str` | `mode` | pooling mode in each bin. `avg` or `max` | | `int` | `aligned` | If `aligned=0`, use the legacy implementation in MMDetection. Else, align the results more perfectly. | | `int` | `clockwise` | If `aligned=0`, use the legacy implementation in MMDetection. Else, align the results more perfectly. | ### Inputs
features: T
Input feature map; 4D tensor of shape (N, C, H, W)
rois: T
RoIs (Regions of Interest) to pool over; 2-D tensor of shape (num_rois, 5) given as [[batch_index, x1, y1, x2, y2], ...]. The RoIs' coordinates are the coordinate system of input.
### Outputs
RoI pooled output, 4-D tensor of shape (num_rois, C, output_height, output_width). The r-th batch element feat[r-1] is a pooled feature map corresponding to the r-th RoI RoIs[r-1].
### Type Constraints - T:tensor(float32) ## grid_sampler* ### Description Perform sample from `input` with pixel locations from `grid`. Check [torch.nn.functional.grid_sample](https://pytorch.org/docs/stable/generated/torch.nn.functional.grid_sample.html?highlight=grid_sample#torch.nn.functional.grid_sample) for more information. ### Parameters | Type | Parameter | Description | |-------|----------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | `int` | `interpolation_mode` | Interpolation mode to calculate output values. (0: `bilinear` , 1: `nearest`) | | `int` | `padding_mode` | Padding mode for outside grid values. (0: `zeros`, 1: `border`, 2: `reflection`) | | `int` | `align_corners` | If `align_corners=1`, the extrema (`-1` and `1`) are considered as referring to the center points of the input's corner pixels. If `align_corners=0`, they are instead considered as referring to the corner points of the input's corner pixels, making the sampling more resolution agnostic. | ### Inputs
input: T
Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the numbers of channels, inH and inW are the height and width of the data.
grid: T
Input offset; 4-D tensor of shape (N, outH, outW, 2), where outH and outW are the height and width of offset and output.
### Outputs
output: T
Output feature; 4-D tensor of shape (N, C, outH, outW).
### Type Constraints - T:tensor(float32, Linear) ## cummax* ### Description Returns a tuple (`values`, `indices`) where `values` is the cumulative maximum elements of `input` in the dimension `dim`. And `indices` is the index location of each maximum value found in the dimension `dim`. Read [torch.cummax](https://pytorch.org/docs/stable/generated/torch.cummax.html) for more details. ### Parameters | Type | Parameter | Description | |-------|-----------|----------------------------------------| | `int` | `dim` | the dimension to do the operation over | ### Inputs
input: T
The input tensor with various shapes. Tensor with empty element is also supported.
### Outputs
output: T
Output the cumulative maximum elements of `input` in the dimension `dim`, with the same shape and dtype as `input`.
indices: tensor(int64)
Output the index location of each cumulative maximum value found in the dimension `dim`, with the same shape as `input`.
### Type Constraints - T:tensor(float32) ## cummin* ### Description Returns a tuple (`values`, `indices`) where `values` is the cumulative minimum elements of `input` in the dimension `dim`. And `indices` is the index location of each minimum value found in the dimension `dim`. Read [torch.cummin](https://pytorch.org/docs/stable/generated/torch.cummin.html) for more details. ### Parameters | Type | Parameter | Description | |-------|-----------|----------------------------------------| | `int` | `dim` | the dimension to do the operation over | ### Inputs
input: T
The input tensor with various shapes. Tensor with empty element is also supported.
### Outputs
output: T
Output the cumulative minimum elements of `input` in the dimension `dim`, with the same shape and dtype as `input`.
indices: tensor(int64)
Output the index location of each cumulative minimum value found in the dimension `dim`, with the same shape as `input`.
### Type Constraints - T:tensor(float32) ## Reminders - Operators endwith `*` are defined in Torch and are included here for the conversion to ONNX. ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/deployment/onnx.md ================================================ ## Introduction of mmcv.onnx module ### register_extra_symbolics Some extra symbolic functions need to be registered before exporting PyTorch model to ONNX. #### Example ```python import mmcv from mmcv.onnx import register_extra_symbolics opset_version = 11 register_extra_symbolics(opset_version) ``` #### Reminder - *Please note that this feature is experimental and may change in the future.* #### FAQs - None ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/deployment/onnxruntime_custom_ops.md ================================================ ## ONNX Runtime Custom Ops - [ONNX Runtime Custom Ops](#onnx-runtime-custom-ops) - [SoftNMS](#softnms) - [Description](#description) - [Parameters](#parameters) - [Inputs](#inputs) - [Outputs](#outputs) - [Type Constraints](#type-constraints) - [RoIAlign](#roialign) - [Description](#description-1) - [Parameters](#parameters-1) - [Inputs](#inputs-1) - [Outputs](#outputs-1) - [Type Constraints](#type-constraints-1) - [NMS](#nms) - [Description](#description-2) - [Parameters](#parameters-2) - [Inputs](#inputs-2) - [Outputs](#outputs-2) - [Type Constraints](#type-constraints-2) - [grid_sampler](#grid_sampler) - [Description](#description-3) - [Parameters](#parameters-3) - [Inputs](#inputs-3) - [Outputs](#outputs-3) - [Type Constraints](#type-constraints-3) - [CornerPool](#cornerpool) - [Description](#description-4) - [Parameters](#parameters-4) - [Inputs](#inputs-4) - [Outputs](#outputs-4) - [Type Constraints](#type-constraints-4) - [cummax](#cummax) - [Description](#description-5) - [Parameters](#parameters-5) - [Inputs](#inputs-5) - [Outputs](#outputs-5) - [Type Constraints](#type-constraints-5) - [cummin](#cummin) - [Description](#description-6) - [Parameters](#parameters-6) - [Inputs](#inputs-6) - [Outputs](#outputs-6) - [Type Constraints](#type-constraints-6) - [MMCVModulatedDeformConv2d](#mmcvmodulateddeformconv2d) - [Description](#description-7) - [Parameters](#parameters-7) - [Inputs](#inputs-7) - [Outputs](#outputs-7) - [Type Constraints](#type-constraints-7) - [MMCVDeformConv2d](#mmcvdeformconv2d) - [Description](#description-8) - [Parameters](#parameters-8) - [Inputs](#inputs-8) - [Outputs](#outputs-8) - [Type Constraints](#type-constraints-8) ### SoftNMS #### Description Perform soft NMS on `boxes` with `scores`. Read [Soft-NMS -- Improving Object Detection With One Line of Code](https://arxiv.org/abs/1704.04503) for detail. #### Parameters | Type | Parameter | Description | |---------|-----------------|----------------------------------------------------------------| | `float` | `iou_threshold` | IoU threshold for NMS | | `float` | `sigma` | hyperparameter for gaussian method | | `float` | `min_score` | score filter threshold | | `int` | `method` | method to do the nms, (0: `naive`, 1: `linear`, 2: `gaussian`) | | `int` | `offset` | `boxes` width or height is (x2 - x1 + offset). (0 or 1) | #### Inputs
boxes: T
Input boxes. 2-D tensor of shape (N, 4). N is the number of boxes.
scores: T
Input scores. 1-D tensor of shape (N, ).
#### Outputs
dets: T
Output boxes and scores. 2-D tensor of shape (num_valid_boxes, 5), [[x1, y1, x2, y2, score], ...]. num_valid_boxes is the number of valid boxes.
indices: tensor(int64)
Output indices. 1-D tensor of shape (num_valid_boxes, ).
#### Type Constraints - T:tensor(float32) ### RoIAlign #### Description Perform RoIAlign on output feature, used in bbox_head of most two-stage detectors. #### Parameters | Type | Parameter | Description | |---------|------------------|---------------------------------------------------------------------------------------------------------------| | `int` | `output_height` | height of output roi | | `int` | `output_width` | width of output roi | | `float` | `spatial_scale` | used to scale the input boxes | | `int` | `sampling_ratio` | number of input samples to take for each output sample. `0` means to take samples densely for current models. | | `str` | `mode` | pooling mode in each bin. `avg` or `max` | | `int` | `aligned` | If `aligned=0`, use the legacy implementation in MMDetection. Else, align the results more perfectly. | #### Inputs
input: T
Input feature map; 4D tensor of shape (N, C, H, W), where N is the batch size, C is the numbers of channels, H and W are the height and width of the data.
rois: T
RoIs (Regions of Interest) to pool over; 2-D tensor of shape (num_rois, 5) given as [[batch_index, x1, y1, x2, y2], ...]. The RoIs' coordinates are the coordinate system of input.
#### Outputs
feat: T
RoI pooled output, 4-D tensor of shape (num_rois, C, output_height, output_width). The r-th batch element feat[r-1] is a pooled feature map corresponding to the r-th RoI RoIs[r-1].
#### Type Constraints - T:tensor(float32) ### NMS #### Description Filter out boxes has high IoU overlap with previously selected boxes. #### Parameters | Type | Parameter | Description | |---------|-----------------|------------------------------------------------------------------------------------------------------------------| | `float` | `iou_threshold` | The threshold for deciding whether boxes overlap too much with respect to IoU. Value range [0, 1]. Default to 0. | | `int` | `offset` | 0 or 1, boxes' width or height is (x2 - x1 + offset). | #### Inputs
bboxes: T
Input boxes. 2-D tensor of shape (num_boxes, 4). num_boxes is the number of input boxes.
scores: T
Input scores. 1-D tensor of shape (num_boxes, ).
#### Outputs
indices: tensor(int32, Linear)
Selected indices. 1-D tensor of shape (num_valid_boxes, ). num_valid_boxes is the number of valid boxes.
#### Type Constraints - T:tensor(float32) ### grid_sampler #### Description Perform sample from `input` with pixel locations from `grid`. #### Parameters | Type | Parameter | Description | |-------|----------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | `int` | `interpolation_mode` | Interpolation mode to calculate output values. (0: `bilinear` , 1: `nearest`) | | `int` | `padding_mode` | Padding mode for outside grid values. (0: `zeros`, 1: `border`, 2: `reflection`) | | `int` | `align_corners` | If `align_corners=1`, the extrema (`-1` and `1`) are considered as referring to the center points of the input's corner pixels. If `align_corners=0`, they are instead considered as referring to the corner points of the input's corner pixels, making the sampling more resolution agnostic. | #### Inputs
input: T
Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the numbers of channels, inH and inW are the height and width of the data.
grid: T
Input offset; 4-D tensor of shape (N, outH, outW, 2), where outH and outW is the height and width of offset and output.
#### Outputs
output: T
Output feature; 4-D tensor of shape (N, C, outH, outW).
#### Type Constraints - T:tensor(float32, Linear) ### CornerPool #### Description Perform CornerPool on `input` features. Read [CornerNet -- Detecting Objects as Paired Keypoints](https://arxiv.org/abs/1808.01244) for more details. #### Parameters | Type | Parameter | Description | |-------|-----------|------------------------------------------------------------------| | `int` | `mode` | corner pool mode, (0: `top`, 1: `bottom`, 2: `left`, 3: `right`) | #### Inputs
input: T
Input features. 4-D tensor of shape (N, C, H, W). N is the batch size.
#### Outputs
output: T
Output the pooled features. 4-D tensor of shape (N, C, H, W).
#### Type Constraints - T:tensor(float32) ### cummax #### Description Returns a tuple (`values`, `indices`) where `values` is the cumulative maximum elements of `input` in the dimension `dim`. And `indices` is the index location of each maximum value found in the dimension `dim`. Read [torch.cummax](https://pytorch.org/docs/stable/generated/torch.cummax.html) for more details. #### Parameters | Type | Parameter | Description | |-------|-----------|----------------------------------------| | `int` | `dim` | the dimension to do the operation over | #### Inputs
input: T
The input tensor with various shapes. Tensor with empty element is also supported.
#### Outputs
output: T
Output the cumulative maximum elements of `input` in the dimension `dim`, with the same shape and dtype as `input`.
indices: tensor(int64)
Output the index location of each cumulative maximum value found in the dimension `dim`, with the same shape as `input`.
#### Type Constraints - T:tensor(float32) ### cummin #### Description Returns a tuple (`values`, `indices`) where `values` is the cumulative minimum elements of `input` in the dimension `dim`. And `indices` is the index location of each minimum value found in the dimension `dim`. Read [torch.cummin](https://pytorch.org/docs/stable/generated/torch.cummin.html) for more details. #### Parameters | Type | Parameter | Description | |-------|-----------|----------------------------------------| | `int` | `dim` | the dimension to do the operation over | #### Inputs
input: T
The input tensor with various shapes. Tensor with empty element is also supported.
#### Outputs
output: T
Output the cumulative minimum elements of `input` in the dimension `dim`, with the same shape and dtype as `input`.
indices: tensor(int64)
Output the index location of each cumulative minimum value found in the dimension `dim`, with the same shape as `input`.
#### Type Constraints - T:tensor(float32) ### MMCVModulatedDeformConv2d #### Description Perform Modulated Deformable Convolution on input feature, read [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/abs/1811.11168?from=timeline) for detail. #### Parameters | Type | Parameter | Description | |----------------|---------------------|---------------------------------------------------------------------------------------| | `list of ints` | `stride` | The stride of the convolving kernel. (sH, sW) | | `list of ints` | `padding` | Paddings on both sides of the input. (padH, padW) | | `list of ints` | `dilation` | The spacing between kernel elements. (dH, dW) | | `int` | `deformable_groups` | Groups of deformable offset. | | `int` | `groups` | Split input into groups. `input_channel` should be divisible by the number of groups. | #### Inputs
inputs[0]: T
Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the number of channels, inH and inW are the height and width of the data.
inputs[1]: T
Input offset; 4-D tensor of shape (N, deformable_group* 2* kH* kW, outH, outW), where kH and kW is the height and width of weight, outH and outW is the height and width of offset and output.
inputs[2]: T
Input mask; 4-D tensor of shape (N, deformable_group* kH* kW, outH, outW), where kH and kW is the height and width of weight, outH and outW is the height and width of offset and output.
inputs[3]: T
Input weight; 4-D tensor of shape (output_channel, input_channel, kH, kW).
inputs[4]: T, optional
Input bias; 1-D tensor of shape (output_channel).
#### Outputs
outputs[0]: T
Output feature; 4-D tensor of shape (N, output_channel, outH, outW).
#### Type Constraints - T:tensor(float32, Linear) ### MMCVDeformConv2d #### Description Perform Deformable Convolution on input feature, read [Deformable Convolutional Network](https://arxiv.org/abs/1703.06211) for detail. #### Parameters | Type | Parameter | Description | |----------------|--------------------|-----------------------------------------------------------------------------------------------------------------------------------| | `list of ints` | `stride` | The stride of the convolving kernel. (sH, sW) | | `list of ints` | `padding` | Paddings on both sides of the input. (padH, padW) | | `list of ints` | `dilation` | The spacing between kernel elements. (dH, dW) | | `int` | `deformable_group` | Groups of deformable offset. | | `int` | `group` | Split input into groups. `input_channel` should be divisible by the number of groups. | | `int` | `im2col_step` | DeformableConv2d use im2col to compute convolution. im2col_step is used to split input and offset, reduce memory usage of column. | #### Inputs
inputs[0]: T
Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the numbers of channels, inH and inW are the height and width of the data.
inputs[1]: T
Input offset; 4-D tensor of shape (N, deformable_group* 2* kH* kW, outH, outW), where kH and kW is the height and width of weight, outH and outW is the height and width of offset and output.
inputs[2]: T
Input weight; 4-D tensor of shape (output_channel, input_channel, kH, kW).
#### Outputs
outputs[0]: T
Output feature; 4-D tensor of shape (N, output_channel, outH, outW).
#### Type Constraints - T:tensor(float32, Linear) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/deployment/onnxruntime_op.md ================================================ ## ONNX Runtime Deployment ### Introduction of ONNX Runtime **ONNX Runtime** is a cross-platform inferencing and training accelerator compatible with many popular ML/DNN frameworks. Check its [github](https://github.com/microsoft/onnxruntime) for more information. ### Introduction of ONNX **ONNX** stands for **Open Neural Network Exchange**, which acts as *Intermediate Representation(IR)* for ML/DNN models from many frameworks. Check its [github](https://github.com/onnx/onnx) for more information. ### Why include custom operators for ONNX Runtime in MMCV - To verify the correctness of exported ONNX models in ONNX Runtime. - To ease the deployment of ONNX models with custom operators from `mmcv.ops` in ONNX Runtime. ### List of operators for ONNX Runtime supported in MMCV | Operator | CPU | GPU | MMCV Releases | |:-------------------------------------------------------|:---:|:---:|:-------------:| | [SoftNMS](onnxruntime_custom_ops.md#softnms) | Y | N | 1.2.3 | | [RoIAlign](onnxruntime_custom_ops.md#roialign) | Y | N | 1.2.5 | | [NMS](onnxruntime_custom_ops.md#nms) | Y | N | 1.2.7 | | [grid_sampler](onnxruntime_custom_ops.md#grid_sampler) | Y | N | 1.3.1 | | [CornerPool](onnxruntime_custom_ops.md#cornerpool) | Y | N | 1.3.4 | | [cummax](onnxruntime_custom_ops.md#cummax) | Y | N | 1.3.4 | | [cummin](onnxruntime_custom_ops.md#cummin) | Y | N | 1.3.4 | ### How to build custom operators for ONNX Runtime *Please be noted that only **onnxruntime>=1.8.1** of CPU version on Linux platform is tested by now.* #### Prerequisite - Clone repository ```bash git clone https://github.com/open-mmlab/mmcv.git ``` - Download `onnxruntime-linux` from ONNX Runtime [releases](https://github.com/microsoft/onnxruntime/releases/tag/v1.8.1), extract it, expose `ONNXRUNTIME_DIR` and finally add the lib path to `LD_LIBRARY_PATH` as below: ```bash wget https://github.com/microsoft/onnxruntime/releases/download/v1.8.1/onnxruntime-linux-x64-1.8.1.tgz tar -zxvf onnxruntime-linux-x64-1.8.1.tgz cd onnxruntime-linux-x64-1.8.1 export ONNXRUNTIME_DIR=$(pwd) export LD_LIBRARY_PATH=$ONNXRUNTIME_DIR/lib:$LD_LIBRARY_PATH ``` #### Build on Linux ```bash cd mmcv ## to MMCV root directory MMCV_WITH_OPS=1 MMCV_WITH_ORT=1 python setup.py develop ``` ### How to do inference using exported ONNX models with custom operators in ONNX Runtime in python Install ONNX Runtime with `pip` ```bash pip install onnxruntime==1.8.1 ``` Inference Demo ```python import os import numpy as np import onnxruntime as ort from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() assert os.path.exists(ort_custom_op_path) session_options = ort.SessionOptions() session_options.register_custom_ops_library(ort_custom_op_path) ## exported ONNX model with custom operators onnx_file = 'sample.onnx' input_data = np.random.randn(1, 3, 224, 224).astype(np.float32) sess = ort.InferenceSession(onnx_file, session_options) onnx_results = sess.run(None, {'input' : input_data}) ``` ### How to add a new custom operator for ONNX Runtime in MMCV #### Reminder - *Please note that this feature is experimental and may change in the future. Strongly suggest users always try with the latest master branch.* - The custom operator is not included in [supported operator list](https://github.com/microsoft/onnxruntime/blob/master/docs/OperatorKernels.md) in ONNX Runtime. - The custom operator should be able to be exported to ONNX. #### Main procedures Take custom operator `soft_nms` for example. 1. Add header `soft_nms.h` to ONNX Runtime include directory `mmcv/ops/csrc/onnxruntime/` 2. Add source `soft_nms.cpp` to ONNX Runtime source directory `mmcv/ops/csrc/onnxruntime/cpu/` 3. Register `soft_nms` operator in [onnxruntime_register.cpp](../../mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp) ```c++ #include "soft_nms.h" SoftNmsOp c_SoftNmsOp; if (auto status = ortApi->CustomOpDomain_Add(domain, &c_SoftNmsOp)) { return status; } ``` 4. Add unit test into `tests/test_ops/test_onnx.py` Check [here](../../tests/test_ops/test_onnx.py) for examples. **Finally, welcome to send us PR of adding custom operators for ONNX Runtime in MMCV.** :nerd_face: ### Known Issues - "RuntimeError: tuple appears in op that does not forward tuples, unsupported kind: `prim::PythonOp`." 1. Note generally `cummax` or `cummin` is exportable to ONNX as long as the torch version >= 1.5.0, since `torch.cummax` is only supported with torch >= 1.5.0. But when `cummax` or `cummin` serves as an intermediate component whose outputs is used as inputs for another modules, it's expected that torch version must be >= 1.7.0. Otherwise the above error might arise, when running exported ONNX model with onnxruntime. 2. Solution: update the torch version to 1.7.0 or higher. ### References - [How to export Pytorch model with custom op to ONNX and run it in ONNX Runtime](https://github.com/onnx/tutorials/blob/master/PyTorchCustomOperator/README.md) - [How to add a custom operator/kernel in ONNX Runtime](https://github.com/microsoft/onnxruntime/blob/master/docs/AddingCustomOp.md) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/deployment/tensorrt_custom_ops.md ================================================ ## TensorRT Custom Ops - [TensorRT Custom Ops](#tensorrt-custom-ops) - [MMCVRoIAlign](#mmcvroialign) - [Description](#description) - [Parameters](#parameters) - [Inputs](#inputs) - [Outputs](#outputs) - [Type Constraints](#type-constraints) - [ScatterND](#scatternd) - [Description](#description-1) - [Parameters](#parameters-1) - [Inputs](#inputs-1) - [Outputs](#outputs-1) - [Type Constraints](#type-constraints-1) - [NonMaxSuppression](#nonmaxsuppression) - [Description](#description-2) - [Parameters](#parameters-2) - [Inputs](#inputs-2) - [Outputs](#outputs-2) - [Type Constraints](#type-constraints-2) - [MMCVDeformConv2d](#mmcvdeformconv2d) - [Description](#description-3) - [Parameters](#parameters-3) - [Inputs](#inputs-3) - [Outputs](#outputs-3) - [Type Constraints](#type-constraints-3) - [grid_sampler](#grid_sampler) - [Description](#description-4) - [Parameters](#parameters-4) - [Inputs](#inputs-4) - [Outputs](#outputs-4) - [Type Constraints](#type-constraints-4) - [cummax](#cummax) - [Description](#description-5) - [Parameters](#parameters-5) - [Inputs](#inputs-5) - [Outputs](#outputs-5) - [Type Constraints](#type-constraints-5) - [cummin](#cummin) - [Description](#description-6) - [Parameters](#parameters-6) - [Inputs](#inputs-6) - [Outputs](#outputs-6) - [Type Constraints](#type-constraints-6) - [MMCVInstanceNormalization](#mmcvinstancenormalization) - [Description](#description-7) - [Parameters](#parameters-7) - [Inputs](#inputs-7) - [Outputs](#outputs-7) - [Type Constraints](#type-constraints-7) - [MMCVModulatedDeformConv2d](#mmcvmodulateddeformconv2d) - [Description](#description-8) - [Parameters](#parameters-8) - [Inputs](#inputs-8) - [Outputs](#outputs-8) - [Type Constraints](#type-constraints-8) ### MMCVRoIAlign #### Description Perform RoIAlign on output feature, used in bbox_head of most two stage detectors. #### Parameters | Type | Parameter | Description | | ------- | ---------------- | ------------------------------------------------------------------------------------------------------------- | | `int` | `output_height` | height of output roi | | `int` | `output_width` | width of output roi | | `float` | `spatial_scale` | used to scale the input boxes | | `int` | `sampling_ratio` | number of input samples to take for each output sample. `0` means to take samples densely for current models. | | `str` | `mode` | pooling mode in each bin. `avg` or `max` | | `int` | `aligned` | If `aligned=0`, use the legacy implementation in MMDetection. Else, align the results more perfectly. | #### Inputs
inputs[0]: T
Input feature map; 4D tensor of shape (N, C, H, W), where N is the batch size, C is the numbers of channels, H and W are the height and width of the data.
inputs[1]: T
RoIs (Regions of Interest) to pool over; 2-D tensor of shape (num_rois, 5) given as [[batch_index, x1, y1, x2, y2], ...]. The RoIs' coordinates are the coordinate system of inputs[0].
#### Outputs
outputs[0]: T
RoI pooled output, 4-D tensor of shape (num_rois, C, output_height, output_width). The r-th batch element output[0][r-1] is a pooled feature map corresponding to the r-th RoI inputs[1][r-1].
#### Type Constraints - T:tensor(float32, Linear) ### ScatterND #### Description ScatterND takes three inputs `data` tensor of rank r >= 1, `indices` tensor of rank q >= 1, and `updates` tensor of rank q + r - indices.shape[-1] - 1. The output of the operation is produced by creating a copy of the input `data`, and then updating its value to values specified by updates at specific index positions specified by `indices`. Its output shape is the same as the shape of `data`. Note that `indices` should not have duplicate entries. That is, two or more updates for the same index-location is not supported. The `output` is calculated via the following equation: ```python output = np.copy(data) update_indices = indices.shape[:-1] for idx in np.ndindex(update_indices): output[indices[idx]] = updates[idx] ``` #### Parameters None #### Inputs
inputs[0]: T
Tensor of rank r>=1.
inputs[1]: tensor(int32, Linear)
Tensor of rank q>=1.
inputs[2]: T
Tensor of rank q + r - indices_shape[-1] - 1.
#### Outputs
outputs[0]: T
Tensor of rank r >= 1.
#### Type Constraints - T:tensor(float32, Linear), tensor(int32, Linear) ### NonMaxSuppression #### Description Filter out boxes has high IoU overlap with previously selected boxes or low score. Output the indices of valid boxes. Indices of invalid boxes will be filled with -1. #### Parameters | Type | Parameter | Description | | ------- | ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------ | | `int` | `center_point_box` | 0 - the box data is supplied as [y1, x1, y2, x2], 1-the box data is supplied as [x_center, y_center, width, height]. | | `int` | `max_output_boxes_per_class` | The maximum number of boxes to be selected per batch per class. Default to 0, number of output boxes equal to number of input boxes. | | `float` | `iou_threshold` | The threshold for deciding whether boxes overlap too much with respect to IoU. Value range [0, 1]. Default to 0. | | `float` | `score_threshold` | The threshold for deciding when to remove boxes based on score. | | `int` | `offset` | 0 or 1, boxes' width or height is (x2 - x1 + offset). | #### Inputs
inputs[0]: T
Input boxes. 3-D tensor of shape (num_batches, spatial_dimension, 4).
inputs[1]: T
Input scores. 3-D tensor of shape (num_batches, num_classes, spatial_dimension).
#### Outputs
outputs[0]: tensor(int32, Linear)
Selected indices. 2-D tensor of shape (num_selected_indices, 3) as [[batch_index, class_index, box_index], ...].
num_selected_indices=num_batches* num_classes* min(max_output_boxes_per_class, spatial_dimension).
All invalid indices will be filled with -1.
#### Type Constraints - T:tensor(float32, Linear) ### MMCVDeformConv2d #### Description Perform Deformable Convolution on input feature, read [Deformable Convolutional Network](https://arxiv.org/abs/1703.06211) for detail. #### Parameters | Type | Parameter | Description | | -------------- | ------------------ | --------------------------------------------------------------------------------------------------------------------------------- | | `list of ints` | `stride` | The stride of the convolving kernel. (sH, sW) | | `list of ints` | `padding` | Paddings on both sides of the input. (padH, padW) | | `list of ints` | `dilation` | The spacing between kernel elements. (dH, dW) | | `int` | `deformable_group` | Groups of deformable offset. | | `int` | `group` | Split input into groups. `input_channel` should be divisible by the number of groups. | | `int` | `im2col_step` | DeformableConv2d use im2col to compute convolution. im2col_step is used to split input and offset, reduce memory usage of column. | #### Inputs
inputs[0]: T
Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the numbers of channels, inH and inW are the height and width of the data.
inputs[1]: T
Input offset; 4-D tensor of shape (N, deformable_group* 2* kH* kW, outH, outW), where kH and kW is the height and width of weight, outH and outW is the height and width of offset and output.
inputs[2]: T
Input weight; 4-D tensor of shape (output_channel, input_channel, kH, kW).
#### Outputs
outputs[0]: T
Output feature; 4-D tensor of shape (N, output_channel, outH, outW).
#### Type Constraints - T:tensor(float32, Linear) ### grid_sampler #### Description Perform sample from `input` with pixel locations from `grid`. #### Parameters | Type | Parameter | Description | | ----- | -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `int` | `interpolation_mode` | Interpolation mode to calculate output values. (0: `bilinear` , 1: `nearest`) | | `int` | `padding_mode` | Padding mode for outside grid values. (0: `zeros`, 1: `border`, 2: `reflection`) | | `int` | `align_corners` | If `align_corners=1`, the extrema (`-1` and `1`) are considered as referring to the center points of the input's corner pixels. If `align_corners=0`, they are instead considered as referring to the corner points of the input's corner pixels, making the sampling more resolution agnostic. | #### Inputs
inputs[0]: T
Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the numbers of channels, inH and inW are the height and width of the data.
inputs[1]: T
Input offset; 4-D tensor of shape (N, outH, outW, 2), where outH and outW is the height and width of offset and output.
#### Outputs
outputs[0]: T
Output feature; 4-D tensor of shape (N, C, outH, outW).
#### Type Constraints - T:tensor(float32, Linear) ### cummax #### Description Returns a namedtuple (`values`, `indices`) where `values` is the cumulative maximum of elements of `input` in the dimension `dim`. And `indices` is the index location of each maximum value found in the dimension `dim`. #### Parameters | Type | Parameter | Description | | ----- | --------- | --------------------------------------- | | `int` | `dim` | The dimension to do the operation over. | #### Inputs
inputs[0]: T
The input tensor.
#### Outputs
outputs[0]: T
Output values.
outputs[1]: (int32, Linear)
Output indices.
#### Type Constraints - T:tensor(float32, Linear) ### cummin #### Description Returns a namedtuple (`values`, `indices`) where `values` is the cumulative minimum of elements of `input` in the dimension `dim`. And `indices` is the index location of each minimum value found in the dimension `dim`. #### Parameters | Type | Parameter | Description | | ----- | --------- | --------------------------------------- | | `int` | `dim` | The dimension to do the operation over. | #### Inputs
inputs[0]: T
The input tensor.
#### Outputs
outputs[0]: T
Output values.
outputs[1]: (int32, Linear)
Output indices.
#### Type Constraints - T:tensor(float32, Linear) ### MMCVInstanceNormalization #### Description Carries out instance normalization as described in the paper https://arxiv.org/abs/1607.08022. y = scale * (x - mean) / sqrt(variance + epsilon) + B, where mean and variance are computed per instance per channel. #### Parameters | Type | Parameter | Description | | ------- | --------- | -------------------------------------------------------------------- | | `float` | `epsilon` | The epsilon value to use to avoid division by zero. Default is 1e-05 | #### Inputs
input: T
Input data tensor from the previous operator; dimensions for image case are (N x C x H x W), where N is the batch size, C is the number of channels, and H and W are the height and the width of the data. For non image case, the dimensions are in the form of (N x C x D1 x D2 ... Dn), where N is the batch size.
scale: T
The input 1-dimensional scale tensor of size C.
B: T
The input 1-dimensional bias tensor of size C.
#### Outputs
output: T
The output tensor of the same shape as input.
#### Type Constraints - T:tensor(float32, Linear) ### MMCVModulatedDeformConv2d #### Description Perform Modulated Deformable Convolution on input feature, read [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/abs/1811.11168?from=timeline) for detail. #### Parameters | Type | Parameter | Description | | -------------- | ------------------ | ------------------------------------------------------------------------------------- | | `list of ints` | `stride` | The stride of the convolving kernel. (sH, sW) | | `list of ints` | `padding` | Paddings on both sides of the input. (padH, padW) | | `list of ints` | `dilation` | The spacing between kernel elements. (dH, dW) | | `int` | `deformable_group` | Groups of deformable offset. | | `int` | `group` | Split input into groups. `input_channel` should be divisible by the number of groups. | #### Inputs
inputs[0]: T
Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the number of channels, inH and inW are the height and width of the data.
inputs[1]: T
Input offset; 4-D tensor of shape (N, deformable_group* 2* kH* kW, outH, outW), where kH and kW is the height and width of weight, outH and outW is the height and width of offset and output.
inputs[2]: T
Input mask; 4-D tensor of shape (N, deformable_group* kH* kW, outH, outW), where kH and kW is the height and width of weight, outH and outW is the height and width of offset and output.
inputs[3]: T
Input weight; 4-D tensor of shape (output_channel, input_channel, kH, kW).
inputs[4]: T, optional
Input weight; 1-D tensor of shape (output_channel).
#### Outputs
outputs[0]: T
Output feature; 4-D tensor of shape (N, output_channel, outH, outW).
#### Type Constraints - T:tensor(float32, Linear) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/deployment/tensorrt_plugin.md ================================================ ## TensorRT Deployment - [TensorRT Deployment](#tensorrt-deployment) - [Introduction](#introduction) - [List of TensorRT plugins supported in MMCV](#list-of-tensorrt-plugins-supported-in-mmcv) - [How to build TensorRT plugins in MMCV](#how-to-build-tensorrt-plugins-in-mmcv) - [Prerequisite](#prerequisite) - [Build on Linux](#build-on-linux) - [Create TensorRT engine and run inference in python](#create-tensorrt-engine-and-run-inference-in-python) - [How to add a TensorRT plugin for custom op in MMCV](#how-to-add-a-tensorrt-plugin-for-custom-op-in-mmcv) - [Main procedures](#main-procedures) - [Reminders](#reminders) - [Known Issues](#known-issues) - [References](#references) ### Introduction **NVIDIA TensorRT** is a software development kit(SDK) for high-performance inference of deep learning models. It includes a deep learning inference optimizer and runtime that delivers low latency and high-throughput for deep learning inference applications. Please check its [developer's website](https://developer.nvidia.com/tensorrt) for more information. To ease the deployment of trained models with custom operators from `mmcv.ops` using TensorRT, a series of TensorRT plugins are included in MMCV. ### List of TensorRT plugins supported in MMCV | ONNX Operator | TensorRT Plugin | MMCV Releases | |:--------------------------|:--------------------------------------------------------------------------------|:-------------:| | MMCVRoiAlign | [MMCVRoiAlign](./tensorrt_custom_ops.md#mmcvroialign) | 1.2.6 | | ScatterND | [ScatterND](./tensorrt_custom_ops.md#scatternd) | 1.2.6 | | NonMaxSuppression | [NonMaxSuppression](./tensorrt_custom_ops.md#nonmaxsuppression) | 1.3.0 | | MMCVDeformConv2d | [MMCVDeformConv2d](./tensorrt_custom_ops.md#mmcvdeformconv2d) | 1.3.0 | | grid_sampler | [grid_sampler](./tensorrt_custom_ops.md#grid-sampler) | 1.3.1 | | cummax | [cummax](./tensorrt_custom_ops.md#cummax) | 1.3.5 | | cummin | [cummin](./tensorrt_custom_ops.md#cummin) | 1.3.5 | | MMCVInstanceNormalization | [MMCVInstanceNormalization](./tensorrt_custom_ops.md#mmcvinstancenormalization) | 1.3.5 | | MMCVModulatedDeformConv2d | [MMCVModulatedDeformConv2d](./tensorrt_custom_ops.md#mmcvmodulateddeformconv2d) | 1.3.8 | Notes - All plugins listed above are developed on TensorRT-7.2.1.6.Ubuntu-16.04.x86_64-gnu.cuda-10.2.cudnn8.0 ### How to build TensorRT plugins in MMCV #### Prerequisite - Clone repository ```bash git clone https://github.com/open-mmlab/mmcv.git ``` - Install TensorRT Download the corresponding TensorRT build from [NVIDIA Developer Zone](https://developer.nvidia.com/nvidia-tensorrt-download). For example, for Ubuntu 16.04 on x86-64 with cuda-10.2, the downloaded file is `TensorRT-7.2.1.6.Ubuntu-16.04.x86_64-gnu.cuda-10.2.cudnn8.0.tar.gz`. Then, install as below: ```bash cd ~/Downloads tar -xvzf TensorRT-7.2.1.6.Ubuntu-16.04.x86_64-gnu.cuda-10.2.cudnn8.0.tar.gz export TENSORRT_DIR=`pwd`/TensorRT-7.2.1.6 export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$TENSORRT_DIR/lib ``` Install python packages: tensorrt, graphsurgeon, onnx-graphsurgeon ```bash pip install $TENSORRT_DIR/python/tensorrt-7.2.1.6-cp37-none-linux_x86_64.whl pip install $TENSORRT_DIR/onnx_graphsurgeon/onnx_graphsurgeon-0.2.6-py2.py3-none-any.whl pip install $TENSORRT_DIR/graphsurgeon/graphsurgeon-0.4.5-py2.py3-none-any.whl ``` For more detailed information of installing TensorRT using tar, please refer to [Nvidia' website](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-721/install-guide/index.html#installing-tar). #### Build on Linux ```bash cd mmcv ## to MMCV root directory MMCV_WITH_OPS=1 MMCV_WITH_TRT=1 pip install -e . ``` ### Create TensorRT engine and run inference in python Here is an example. ```python import torch import onnx from mmcv.tensorrt import (TRTWrapper, onnx2trt, save_trt_engine, is_tensorrt_plugin_loaded) assert is_tensorrt_plugin_loaded(), 'Requires to complie TensorRT plugins in mmcv' onnx_file = 'sample.onnx' trt_file = 'sample.trt' onnx_model = onnx.load(onnx_file) ## Model input inputs = torch.rand(1, 3, 224, 224).cuda() ## Model input shape info opt_shape_dict = { 'input': [list(inputs.shape), list(inputs.shape), list(inputs.shape)] } ## Create TensorRT engine max_workspace_size = 1 << 30 trt_engine = onnx2trt( onnx_model, opt_shape_dict, max_workspace_size=max_workspace_size) ## Save TensorRT engine save_trt_engine(trt_engine, trt_file) ## Run inference with TensorRT trt_model = TRTWrapper(trt_file, ['input'], ['output']) with torch.no_grad(): trt_outputs = trt_model({'input': inputs}) output = trt_outputs['output'] ``` ### How to add a TensorRT plugin for custom op in MMCV #### Main procedures Below are the main steps: 1. Add c++ header file 2. Add c++ source file 3. Add cuda kernel file 4. Register plugin in `trt_plugin.cpp` 5. Add unit test in `tests/test_ops/test_tensorrt.py` **Take RoIAlign plugin `roi_align` for example.** 1. Add header `trt_roi_align.hpp` to TensorRT include directory `mmcv/ops/csrc/tensorrt/` 2. Add source `trt_roi_align.cpp` to TensorRT source directory `mmcv/ops/csrc/tensorrt/plugins/` 3. Add cuda kernel `trt_roi_align_kernel.cu` to TensorRT source directory `mmcv/ops/csrc/tensorrt/plugins/` 4. Register `roi_align` plugin in [trt_plugin.cpp](https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/csrc/tensorrt/plugins/trt_plugin.cpp) ```c++ #include "trt_plugin.hpp" #include "trt_roi_align.hpp" REGISTER_TENSORRT_PLUGIN(RoIAlignPluginDynamicCreator); extern "C" { bool initLibMMCVInferPlugins() { return true; } } // extern "C" ``` 5. Add unit test into `tests/test_ops/test_tensorrt.py` Check [here](https://github.com/open-mmlab/mmcv/blob/master/tests/test_ops/test_tensorrt.py) for examples. #### Reminders - *Please note that this feature is experimental and may change in the future. Strongly suggest users always try with the latest master branch.* - Some of the [custom ops](https://mmcv.readthedocs.io/en/latest/ops.html) in `mmcv` have their cuda implementations, which could be referred. ### Known Issues - None ### References - [Developer guide of Nvidia TensorRT](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html) - [TensorRT Open Source Software](https://github.com/NVIDIA/TensorRT) - [onnx-tensorrt](https://github.com/onnx/onnx-tensorrt) - [TensorRT python API](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/index.html) - [TensorRT c++ plugin API](https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1_i_plugin.html) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/faq.md ================================================ ## Frequently Asked Questions We list some common troubles faced by many users and their corresponding solutions here. Feel free to enrich the list if you find any frequent issues and have ways to help others to solve them. ### Installation - KeyError: "xxx: 'yyy is not in the zzz registry'" The registry mechanism will be triggered only when the file of the module is imported. So you need to import that file somewhere. More details can be found at https://github.com/open-mmlab/mmdetection/issues/5974. - "No module named 'mmcv.ops'"; "No module named 'mmcv._ext'" 1. Uninstall existing mmcv in the environment using `pip uninstall mmcv` 2. Install mmcv-full following the [installation instruction](https://mmcv.readthedocs.io/en/latest/get_started/installation.html) or [Build MMCV from source](https://mmcv.readthedocs.io/en/latest/get_started/build.html) - "invalid device function" or "no kernel image is available for execution" 1. Check the CUDA compute capability of you GPU 2. Run `python mmdet/utils/collect_env.py` to check whether PyTorch, torchvision, and MMCV are built for the correct GPU architecture. You may need to set `TORCH_CUDA_ARCH_LIST` to reinstall MMCV. The compatibility issue could happen when using old GPUS, e.g., Tesla K80 (3.7) on colab. 3. Check whether the running environment is the same as that when mmcv/mmdet is compiled. For example, you may compile mmcv using CUDA 10.0 bug run it on CUDA9.0 environments - "undefined symbol" or "cannot open xxx.so" 1. If those symbols are CUDA/C++ symbols (e.g., libcudart.so or GLIBCXX), check whether the CUDA/GCC runtimes are the same as those used for compiling mmcv 2. If those symbols are Pytorch symbols (e.g., symbols containing caffe, aten, and TH), check whether the Pytorch version is the same as that used for compiling mmcv 3. Run `python mmdet/utils/collect_env.py` to check whether PyTorch, torchvision, and MMCV are built by and running on the same environment - "RuntimeError: CUDA error: invalid configuration argument" This error may be caused by the poor performance of GPU. Try to decrease the value of [THREADS_PER_BLOCK](https://github.com/open-mmlab/mmcv/blob/cac22f8cf5a904477e3b5461b1cc36856c2793da/mmcv/ops/csrc/common_cuda_helper.hpp#L10) and recompile mmcv. - "RuntimeError: nms is not compiled with GPU support" This error is because your CUDA environment is not installed correctly. You may try to re-install your CUDA environment and then delete the build/ folder before re-compile mmcv. - "Segmentation fault" 1. Check your GCC version and use GCC >= 5.4. This usually caused by the incompatibility between PyTorch and the environment (e.g., GCC < 4.9 for PyTorch). We also recommend the users to avoid using GCC 5.5 because many feedbacks report that GCC 5.5 will cause "segmentation fault" and simply changing it to GCC 5.4 could solve the problem 2. Check whether PyTorch is correctly installed and could use CUDA op, e.g. type the following command in your terminal and see whether they could correctly output results ```shell python -c 'import torch; print(torch.cuda.is_available())' ``` 3. If PyTorch is correctly installed, check whether MMCV is correctly installed. If MMCV is correctly installed, then there will be no issue of the command ```shell python -c 'import mmcv; import mmcv.ops' ``` 4. If MMCV and PyTorch are correctly installed, you can use `ipdb` to set breakpoints or directly add `print` to debug and see which part leads the `segmentation fault` - "libtorch_cuda_cu.so: cannot open shared object file" `mmcv-full` depends on the share object but it can not be found. We can check whether the object exists in `~/miniconda3/envs/{environment-name}/lib/python3.7/site-packages/torch/lib` or try to re-install the PyTorch. - "fatal error C1189: #error: -- unsupported Microsoft Visual Studio version!" If you are building mmcv-full on Windows and the version of CUDA is 9.2, you will probably encounter the error `"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include\crt/host_config.h(133): fatal error C1189: #error: -- unsupported Microsoft Visual Studio version! Only the versions 2012, 2013, 2015 and 2017 are supported!"`, in which case you can use a lower version of Microsoft Visual Studio like vs2017. - "error: member "torch::jit::detail::ModulePolicy::all_slots" may not be initialized" If your version of PyTorch is 1.5.0 and you are building mmcv-full on Windows, you will probably encounter the error `- torch/csrc/jit/api/module.h(474): error: member "torch::jit::detail::ModulePolicy::all_slots" may not be initialized`. The way to solve the error is to replace all the `static constexpr bool all_slots = false;` with `static bool all_slots = false;` at this file `https://github.com/pytorch/pytorch/blob/v1.5.0/torch/csrc/jit/api/module.h`. More details can be found at https://github.com/pytorch/pytorch/issues/39394. - "error: a member with an in-class initializer must be const" If your version of PyTorch is 1.6.0 and you are building mmcv-full on Windows, you will probably encounter the error `"- torch/include\torch/csrc/jit/api/module.h(483): error: a member with an in-class initializer must be const"`. The way to solve the error is to replace all the `CONSTEXPR_EXCEPT_WIN_CUDA ` with `const` at `torch/include\torch/csrc/jit/api/module.h`. More details can be found at https://github.com/open-mmlab/mmcv/issues/575. - "error: member "torch::jit::ProfileOptionalOp::Kind" may not be initialized" If your version of PyTorch is 1.7.0 and you are building mmcv-full on Windows, you will probably encounter the error `torch/include\torch/csrc/jit/ir/ir.h(1347): error: member "torch::jit::ProfileOptionalOp::Kind" may not be initialized`. The way to solve the error needs to modify several local files of PyTorch: - delete `static constexpr Symbol Kind = ::c10::prim::profile;` and `tatic constexpr Symbol Kind = ::c10::prim::profile_optional;` at `torch/include\torch/csrc/jit/ir/ir.h` - replace `explicit operator type&() { return *(this->value); }` with `explicit operator type&() { return *((type*)this->value); }` at `torch\include\pybind11\cast.h` - replace all the `CONSTEXPR_EXCEPT_WIN_CUDA` with `const` at `torch/include\torch/csrc/jit/api/module.h` - Compatibility issue between MMCV and MMDetection; "ConvWS is already registered in conv layer" Please install the correct version of MMCV for the version of your MMDetection following the [installation instruction](https://mmdetection.readthedocs.io/en/latest/get_started.html#installation). More details can be found at https://github.com/pytorch/pytorch/pull/45956. ### Usage - "RuntimeError: Expected to have finished reduction in the prior iteration before starting a new one" 1. This error indicates that your module has parameters that were not used in producing loss. This phenomenon may be caused by running different branches in your code in DDP mode. More datails at https://github.com/pytorch/pytorch/issues/55582 2. You can set ` find_unused_parameters = True` in the config to solve the above problems or find those unused parameters manually - "RuntimeError: Trying to backward through the graph a second time" `GradientCumulativeOptimizerHook` and `OptimizerHook` are both set which causes the `loss.backward()` to be called twice so `RuntimeError` was raised. We can only use one of these. More datails at https://github.com/open-mmlab/mmcv/issues/1379. ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/get_started/build.md ================================================ ## Build MMCV from source ### Build on Linux or macOS After cloning the repo with ```bash git clone https://github.com/open-mmlab/mmcv.git cd mmcv ``` It is recommended to install `ninja` to speed up the compilation ```bash pip install -r requirements/optional.txt ``` You can either - install the lite version ```bash pip install -e . ``` - install the full version ```bash MMCV_WITH_OPS=1 pip install -e . ``` If you are on macOS, add the following environment variables before the installing command. ```bash CC=clang CXX=clang++ CFLAGS='-stdlib=libc++' ``` e.g., ```bash CC=clang CXX=clang++ CFLAGS='-stdlib=libc++' MMCV_WITH_OPS=1 pip install -e . ``` ```{note} If you would like to use `opencv-python-headless` instead of `opencv-python`, e.g., in a minimum container environment or servers without GUI, you can first install it before installing MMCV to skip the installation of `opencv-python`. ``` ### Build on Windows Building MMCV on Windows is a bit more complicated than that on Linux. The following instructions show how to get this accomplished. #### Prerequisite The following software is required for building MMCV on windows. Install them first. - [Git](https://git-scm.com/download/win) - During installation, tick **add git to Path**. - [Visual Studio Community 2019](https://visualstudio.microsoft.com) - A compiler for C++ and CUDA codes. - [Miniconda](https://docs.conda.io/en/latest/miniconda.html) - Official distributions of Python should work too. - [CUDA 10.2](https://developer.nvidia.com/cuda-10.2-download-archive) - Not required for building CPU version. - Customize the installation if necessary. As a recommendation, skip the driver installation if a newer version is already installed. ```{note} You should know how to set up environment variables, especially `Path`, on Windows. The following instruction relies heavily on this skill. ``` #### Setup Python Environment 1. Launch Anaconda prompt from Windows Start menu Do not use raw `cmd.exe` s instruction is based on PowerShell syntax. 2. Create a new conda environment ```shell conda create --name mmcv python=3.7 # 3.6, 3.7, 3.8 should work too as tested conda activate mmcv # make sure to activate environment before any operation ``` 3. Install PyTorch. Choose a version based on your need. ```shell conda install pytorch torchvision cudatoolkit=10.2 -c pytorch ``` We only tested PyTorch version >= 1.6.0. 4. Prepare MMCV source code ```shell git clone https://github.com/open-mmlab/mmcv.git cd mmcv ``` 5. Install required Python packages ```shell pip3 install -r requirements/runtime.txt ``` 6. It is recommended to install `ninja` to speed up the compilation ```bash pip install -r requirements/optional.txt ``` #### Build and install MMCV MMCV can be built in three ways: 1. Lite version (without ops) In this way, no custom ops are compiled and mmcv is a pure python package. 2. Full version (CPU ops) Module `ops` will be compiled as a pytorch extension, but only x86 code will be compiled. The compiled ops can be executed on CPU only. 3. Full version (CUDA ops) Both x86 and CUDA codes of `ops` module will be compiled. The compiled version can be run on both CPU and CUDA-enabled GPU (if implemented). ##### Common steps 1. Set up MSVC compiler Set Environment variable, add `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.27.29110\bin\Hostx86\x64` to `PATH`, so that `cl.exe` will be available in prompt, as shown below. ```none (base) PS C:\Users\xxx> cl Microsoft (R) C/C++ Optimizing Compiler Version 19.27.29111 for x64 Copyright (C) Microsoft Corporation. All rights reserved. usage: cl [ option... ] filename... [ / link linkoption... ] ``` For compatibility, we use the x86-hosted and x64-targeted compiler. note `Hostx86\x64` in the path. You may want to change the system language to English because pytorch will parse text output from `cl.exe` to check its version. However only utf-8 is recognized. Navigate to Control Panel -> Region -> Administrative -> Language for Non-Unicode programs and change it to English. ##### Option 1: Build MMCV (lite version) After finishing above common steps, launch Anaconda shell from Start menu and issue the following commands: ```shell # activate environment conda activate mmcv # change directory cd mmcv # install python setup.py develop # check pip list ``` ##### Option 2: Build MMCV (full version with CPU) 1. Finish above common steps 2. Set up environment variables ```shell $env:MMCV_WITH_OPS = 1 $env:MAX_JOBS = 8 # based on your available number of CPU cores and amount of memory ``` 3. Following build steps of the lite version ```shell # activate environment conda activate mmcv # change directory cd mmcv # build python setup.py build_ext # if success, cl will be launched to compile ops # install python setup.py develop # check pip list ``` ##### Option 3: Build MMCV (full version with CUDA) 1. Finish above common steps 2. Make sure `CUDA_PATH` or `CUDA_HOME` is already set in `envs` via `ls env:`, desired output is shown as below: ```none (base) PS C:\Users\WRH> ls env: Name Value ---- ----- <... omit some lines ...> CUDA_PATH C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2 CUDA_PATH_V10_1 C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1 CUDA_PATH_V10_2 C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2 <... omit some lines ...> ``` This should already be done by CUDA installer. If not, or you have multiple version of CUDA toolkit installed, set it with ```shell $env:CUDA_HOME = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2" # OR $env:CUDA_HOME = $env:CUDA_PATH_V10_2 # if CUDA_PATH_V10_2 is in envs: ``` 3. Set CUDA target arch ```shell # Suppose you are using GTX 1080, which is of capability 6.1 $env:TORCH_CUDA_ARCH_LIST="6.1" # OR build all supported arch, will be slow $env:TORCH_CUDA_ARCH_LIST="3.5 3.7 5.0 5.2 6.0 6.1 7.0 7.5" ``` ```{note} Check your the compute capability of your GPU from [here](https://developer.nvidia.com/cuda-gpus). ``` 4. Launch compiling the same way as CPU ```shell $env:MMCV_WITH_OPS = 1 $env:MAX_JOBS = 8 # based on available number of CPU cores and amount of memory # activate environment conda activate mmcv # change directory cd mmcv # build python setup.py build_ext # if success, cl will be launched to compile ops # install python setup.py develop # check pip list ``` ```{note} If you are compiling against PyTorch 1.6.0, you might meet some errors from PyTorch as described in [this issue](https://github.com/pytorch/pytorch/issues/42467). Follow [this pull request](https://github.com/pytorch/pytorch/pull/43380/files) to modify the source code in your local PyTorch installation. ``` If you meet issues when running or compiling mmcv, we list some common issues in [Frequently Asked Question](../faq.html). ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/get_started/installation.md ================================================ ## Installation There are two versions of MMCV: - **mmcv-full**: comprehensive, with full features and various CUDA ops out of box. It takes longer time to build. - **mmcv**: lite, without CUDA ops but all other features, similar to mmcv<1.0.0. It is useful when you do not need those CUDA ops. ```{warning} Do not install both versions in the same environment, otherwise you may encounter errors like `ModuleNotFound`. You need to uninstall one before installing the other. `Installing the full version is highly recommended if CUDA is avaliable`. ``` a. Install the full version. Before installing mmcv-full, make sure that PyTorch has been successfully installed following the [official guide](https://pytorch.org/). We provide pre-built mmcv packages (recommended) with different PyTorch and CUDA versions to simplify the building. In addition, you can run [check_installation.py](.dev_scripts/check_installation.py) to check the installation of mmcv-full after running the installation commands. i. Install the latest version. The rule for installing the latest ``mmcv-full`` is as follows: ```shell pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html ``` Please replace ``{cu_version}`` and ``{torch_version}`` in the url to your desired one. For example, to install the latest ``mmcv-full`` with ``CUDA 11.1`` and ``PyTorch 1.9.0``, use the following command: ```shell pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html ``` For more details, please refer the the following tables and delete ``=={mmcv_version}``. ii. Install a specified version. The rule for installing a specified ``mmcv-full`` is as follows: ```shell pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html ``` First of all, please refer to the Releases and replace ``{mmcv_version}`` a specified one. e.g. ``1.3.9``. Then replace ``{cu_version}`` and ``{torch_version}`` in the url to your desired versions. For example, to install ``mmcv-full==1.3.9`` with ``CUDA 11.1`` and ``PyTorch 1.9.0``, use the following command: ```shell pip install mmcv-full==1.3.9 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html ``` ```{note} mmcv-full is only compiled on PyTorch 1.x.0 because the compatibility usually holds between 1.x.0 and 1.x.1. If your PyTorch version is 1.x.1, you can install mmcv-full compiled with PyTorch 1.x.0 and it usually works well. For example, if your PyTorch version is 1.8.1 and CUDA version is 11.1, you can use the following command to install mmcv-full. `pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html` ``` For more details, please refer the the following tables.
CUDA torch 1.10 torch 1.9 torch 1.8 torch 1.7 torch 1.6 torch 1.5
11.3
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu113/torch1.10.0/index.html
11.1
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.10.0/index.html
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html
11.0
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu110/torch1.7.0/index.html
10.2
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.10.0/index.html
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.9.0/index.html
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.7.0/index.html
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.6.0/index.html
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.5.0/index.html
10.1
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.8.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.7.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.6.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.5.0/index.html
9.2
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.7.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.6.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.5.0/index.html
cpu
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.10.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.9.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.8.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.7.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.6.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.5.0/index.html
```{note} The pre-built packages provided above do not include all versions of mmcv-full, you can click on the corresponding links to see the supported versions. For example, if you click [cu102-torch1.8.0](https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html), you can see that `cu102-torch1.8.0` only provides 1.3.0 and above versions of mmcv-full. In addition, We no longer provide `mmcv-full` pre-built packages compiled with `PyTorch 1.3 & 1.4` since v1.3.17. You can find previous versions that compiled with PyTorch 1.3 & 1.4 [here](./previous_versions.md). The compatibility is still ensured in our CI, but we will discard the support of PyTorch 1.3 & 1.4 next year. ``` Another way is to compile locally by running ```python pip install mmcv-full ``` Note that the local compiling may take up to 10 mins. b. Install the lite version. ```python pip install mmcv ``` c. Install full version with custom operators for onnxruntime - Check [here](https://mmcv.readthedocs.io/en/latest/deployment/onnxruntime_custom_ops.html) for detailed instruction. If you would like to build MMCV from source, please refer to the [guide](https://mmcv.readthedocs.io/en/latest/get_started/build.html). ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/get_started/introduction.md ================================================ ## Introduction MMCV is a foundational library for computer vision research and supports many research projects as below: - [MIM](https://github.com/open-mmlab/mim): MIM Installs OpenMMLab Packages. - [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab image classification toolbox and benchmark. - [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab detection toolbox and benchmark. - [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab's next-generation platform for general 3D object detection. - [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab semantic segmentation toolbox and benchmark. - [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab's next-generation action understanding toolbox and benchmark. - [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab video perception toolbox and benchmark. - [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab pose estimation toolbox and benchmark. - [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab image and video editing toolbox. - [MMOCR](https://github.com/open-mmlab/mmocr): A Comprehensive Toolbox for Text Detection, Recognition and Understanding. - [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab image and video generative models toolbox. - [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab optical flow toolbox and benchmark. - [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab FewShot Learning Toolbox and Benchmark. - [MMHuman3D](https://github.com/open-mmlab/mmhuman3d): OpenMMLab 3D human parametric model toolbox and benchmark. - [MMSelfSup](https://github.com/open-mmlab/mmselfsup): OpenMMLab self-supervised learning Toolbox and Benchmark. - [MMRazor](https://github.com/open-mmlab/mmrazor): OpenMMLab Model Compression Toolbox and Benchmark. - [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab Model Deployment Framework. It provides the following functionalities. - Universal IO APIs - Image/Video processing - Image and annotation visualization - Useful utilities (progress bar, timer, ...) - PyTorch runner with hooking mechanism - Various CNN architectures - High-quality implementation of common CUDA ops ```{note} MMCV requires Python 3.6+. ``` ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/get_started/previous_versions.md ================================================ ## OTHER VERSIONS OF PYTORCH BUILT FOR MMCV-FULL We no longer provide `mmcv-full` packages compiled under lower versions of `PyTorch`, but for your convenience, you can find them below. ### PyTorch 1.4 | 1.0.0 <= mmcv_version <= 1.2.1 #### CUDA 10.1 ```bash pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.4.0/index.html ``` #### CUDA 9.2 ```bash pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.4.0/index.html ``` #### CPU ```bash pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.4.0/index.html ``` ### PyTorch v1.3 | 1.0.0 <= mmcv_version <= 1.3.16 #### CUDA 10.1 ```bash pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.3.0/index.html ``` #### CUDA 9.2 ```bash pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.3.0/index.html ``` #### CPU ```bash pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.3.0/index.html ``` ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/index.rst ================================================ Welcome to MMCV's documentation! ================================ You can switch between Chinese and English documents in the lower-left corner of the layout. .. toctree:: :maxdepth: 2 :caption: Get Started get_started/introduction.md get_started/installation.md get_started/build.md .. toctree:: :maxdepth: 2 :caption: Understand MMCV understand_mmcv/config.md understand_mmcv/registry.md understand_mmcv/runner.md understand_mmcv/io.md understand_mmcv/data_process.md understand_mmcv/visualization.md understand_mmcv/cnn.md understand_mmcv/ops.md understand_mmcv/utils.md .. toctree:: :maxdepth: 2 :caption: Deployment deployment/mmcv_ops_definition.md deployment/onnx.md deployment/onnxruntime_op.md deployment/tensorrt_plugin.md .. toctree:: :maxdepth: 2 :caption: Compatibility compatibility.md .. toctree:: :maxdepth: 2 :caption: FAQ faq.md .. toctree:: :maxdepth: 2 :caption: Community community/contributing.md community/pr.md .. toctree:: :maxdepth: 2 :caption: API Reference api.rst Indices and tables ================== * :ref:`genindex` * :ref:`search` ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/make.bat ================================================ @ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set SOURCEDIR=. set BUILDDIR=_build if "%1" == "" goto help %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% :end popd ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/understand_mmcv/cnn.md ================================================ ## CNN We provide some building bricks for CNNs, including layer building, module bundles and weight initialization. ### Layer building We may need to try different layers of the same type when running experiments, but do not want to modify the code from time to time. Here we provide some layer building methods to construct layers from a dict, which can be written in configs or specified via command line arguments. #### Usage A simplest example is ```python cfg = dict(type='Conv3d') layer = build_conv_layer(cfg, in_channels=3, out_channels=8, kernel_size=3) ``` - `build_conv_layer`: Supported types are Conv1d, Conv2d, Conv3d, Conv (alias for Conv2d). - `build_norm_layer`: Supported types are BN1d, BN2d, BN3d, BN (alias for BN2d), SyncBN, GN, LN, IN1d, IN2d, IN3d, IN (alias for IN2d). - `build_activation_layer`: Supported types are ReLU, LeakyReLU, PReLU, RReLU, ReLU6, ELU, Sigmoid, Tanh, GELU. - `build_upsample_layer`: Supported types are nearest, bilinear, deconv, pixel_shuffle. - `build_padding_layer`: Supported types are zero, reflect, replicate. #### Extension We also allow extending the building methods with custom layers and operators. 1. Write and register your own module. ```python from mmcv.cnn import UPSAMPLE_LAYERS @UPSAMPLE_LAYERS.register_module() class MyUpsample: def __init__(self, scale_factor): pass def forward(self, x): pass ``` 2. Import `MyUpsample` somewhere (e.g., in `__init__.py`) and then use it. ```python cfg = dict(type='MyUpsample', scale_factor=2) layer = build_upsample_layer(cfg) ``` ### Module bundles We also provide common module bundles to facilitate the network construction. `ConvModule` is a bundle of convolution, normalization and activation layers, please refer to the [api](api.html#mmcv.cnn.ConvModule) for details. ```python # conv + bn + relu conv = ConvModule(3, 8, 2, norm_cfg=dict(type='BN')) # conv + gn + relu conv = ConvModule(3, 8, 2, norm_cfg=dict(type='GN', num_groups=2)) # conv + relu conv = ConvModule(3, 8, 2) # conv conv = ConvModule(3, 8, 2, act_cfg=None) # conv + leaky relu conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='LeakyReLU')) # bn + conv + relu conv = ConvModule( 3, 8, 2, norm_cfg=dict(type='BN'), order=('norm', 'conv', 'act')) ``` ### Weight initialization > Implementation details are available at [mmcv/cnn/utils/weight_init.py](../../mmcv/cnn/utils/weight_init.py) During training, a proper initialization strategy is beneficial to speed up the training or obtain a higher performance. In MMCV, we provide some commonly used methods for initializing modules like `nn.Conv2d`. Of course, we also provide high-level APIs for initializing models containing one or more modules. #### Initialization functions Initialize a `nn.Module` such as `nn.Conv2d`, `nn.Linear` in a functional way. We provide the following initialization methods. - constant_init Initialize module parameters with constant values. ```python >>> import torch.nn as nn >>> from mmcv.cnn import constant_init >>> conv1 = nn.Conv2d(3, 3, 1) >>> # constant_init(module, val, bias=0) >>> constant_init(conv1, 1, 0) >>> conv1.weight ``` - xavier_init Initialize module parameters with values according to the method described in [Understanding the difficulty of training deep feedforward neural networks - Glorot, X. & Bengio, Y. (2010)](http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf) ```python >>> import torch.nn as nn >>> from mmcv.cnn import xavier_init >>> conv1 = nn.Conv2d(3, 3, 1) >>> # xavier_init(module, gain=1, bias=0, distribution='normal') >>> xavier_init(conv1, distribution='normal') ``` - normal_init Initialize module parameters with the values drawn from a normal distribution. ```python >>> import torch.nn as nn >>> from mmcv.cnn import normal_init >>> conv1 = nn.Conv2d(3, 3, 1) >>> # normal_init(module, mean=0, std=1, bias=0) >>> normal_init(conv1, std=0.01, bias=0) ``` - uniform_init Initialize module parameters with values drawn from a uniform distribution. ```python >>> import torch.nn as nn >>> from mmcv.cnn import uniform_init >>> conv1 = nn.Conv2d(3, 3, 1) >>> # uniform_init(module, a=0, b=1, bias=0) >>> uniform_init(conv1, a=0, b=1) ``` - kaiming_init Initialize module parameters with the values according to the method described in [Delving deep into rectifiers: Surpassing human-level performance on ImageNet classification - He, K. et al. (2015)](https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/He_Delving_Deep_into_ICCV_2015_paper.pdf) ```python >>> import torch.nn as nn >>> from mmcv.cnn import kaiming_init >>> conv1 = nn.Conv2d(3, 3, 1) >>> # kaiming_init(module, a=0, mode='fan_out', nonlinearity='relu', bias=0, distribution='normal') >>> kaiming_init(conv1) ``` - caffe2_xavier_init The xavier initialization is implemented in caffe2, which corresponds to `kaiming_uniform_` in PyTorch. ```python >>> import torch.nn as nn >>> from mmcv.cnn import caffe2_xavier_init >>> conv1 = nn.Conv2d(3, 3, 1) >>> # caffe2_xavier_init(module, bias=0) >>> caffe2_xavier_init(conv1) ``` - bias_init_with_prob Initialize conv/fc bias value according to a given probability, as proposed in [Focal Loss for Dense Object Detection](https://arxiv.org/pdf/1708.02002.pdf). ```python >>> from mmcv.cnn import bias_init_with_prob >>> # bias_init_with_prob is proposed in Focal Loss >>> bias = bias_init_with_prob(0.01) >>> bias -4.59511985013459 ``` #### Initializers and configs On the basis of the initialization methods, we define the corresponding initialization classes and register them to `INITIALIZERS`, so we can use the configuration to initialize the model. We provide the following initialization classes. - ConstantInit - XavierInit - NormalInit - UniformInit - KaimingInit - Caffe2XavierInit - PretrainedInit Let us introduce the usage of `initialize` in detail. 1. Initialize model by `layer` key If we only define `layer`, it just initialize the layer in `layer` key. NOTE: Value of `layer` key is the class name with attributes weights and bias of Pytorch, so `MultiheadAttention layer` is not supported. - Define `layer` key for initializing module with same configuration. ```python import torch.nn as nn from mmcv.cnn import initialize class FooNet(nn.Module): def __init__(self): super().__init__() self.feat = nn.Conv1d(3, 1, 3) self.reg = nn.Conv2d(3, 3, 3) self.cls = nn.Linear(1, 2) model = FooNet() init_cfg = dict(type='Constant', layer=['Conv1d', 'Conv2d', 'Linear'], val=1) # initialize whole module with same configuration initialize(model, init_cfg) # model.feat.weight # Parameter containing: # tensor([[[1., 1., 1.], # [1., 1., 1.], # [1., 1., 1.]]], requires_grad=True) ``` - Define `layer` key for initializing layer with different configurations. ```python import torch.nn as nn from mmcv.cnn.utils import initialize class FooNet(nn.Module): def __init__(self): super().__init__() self.feat = nn.Conv1d(3, 1, 3) self.reg = nn.Conv2d(3, 3, 3) self.cls = nn.Linear(1,2) model = FooNet() init_cfg = [dict(type='Constant', layer='Conv1d', val=1), dict(type='Constant', layer='Conv2d', val=2), dict(type='Constant', layer='Linear', val=3)] # nn.Conv1d will be initialized with dict(type='Constant', val=1) # nn.Conv2d will be initialized with dict(type='Constant', val=2) # nn.Linear will be initialized with dict(type='Constant', val=3) initialize(model, init_cfg) # model.reg.weight # Parameter containing: # tensor([[[[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]], # ..., # [[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]]]], requires_grad=True) ``` 2. Initialize model by `override` key - When initializing some specific part with its attribute name, we can use `override` key, and the value in `override` will ignore the value in init_cfg. ```python import torch.nn as nn from mmcv.cnn import initialize class FooNet(nn.Module): def __init__(self): super().__init__() self.feat = nn.Conv1d(3, 1, 3) self.reg = nn.Conv2d(3, 3, 3) self.cls = nn.Sequential(nn.Conv1d(3, 1, 3), nn.Linear(1,2)) # if we would like to initialize model's weights as 1 and bias as 2 # but weight in `cls` as 3 and bias 4, we can use override key model = FooNet() init_cfg = dict(type='Constant', layer=['Conv1d','Conv2d'], val=1, bias=2, override=dict(type='Constant', name='reg', val=3, bias=4)) # self.feat and self.cls will be initialized with dict(type='Constant', val=1, bias=2) # The module called 'reg' will be initialized with dict(type='Constant', val=3, bias=4) initialize(model, init_cfg) # model.reg.weight # Parameter containing: # tensor([[[[3., 3., 3.], # [3., 3., 3.], # [3., 3., 3.]], # ..., # [[3., 3., 3.], # [3., 3., 3.], # [3., 3., 3.]]]], requires_grad=True) ``` - If `layer` is None in init_cfg, only sub-module with the name in override will be initialized, and type and other args in override can be omitted. ```python model = FooNet() init_cfg = dict(type='Constant', val=1, bias=2, override=dict(name='reg')) # self.feat and self.cls will be initialized by Pytorch # The module called 'reg' will be initialized with dict(type='Constant', val=1, bias=2) initialize(model, init_cfg) # model.reg.weight # Parameter containing: # tensor([[[[1., 1., 1.], # [1., 1., 1.], # [1., 1., 1.]], # ..., # [[1., 1., 1.], # [1., 1., 1.], # [1., 1., 1.]]]], requires_grad=True) ``` - If we don't define `layer` key or `override` key, it will not initialize anything. - Invalid usage ```python # It is invalid that override don't have name key init_cfg = dict(type='Constant', layer=['Conv1d','Conv2d'], val=1, bias=2, override=dict(type='Constant', val=3, bias=4)) # It is also invalid that override has name and other args except type init_cfg = dict(type='Constant', layer=['Conv1d','Conv2d'], val=1, bias=2, override=dict(name='reg', val=3, bias=4)) ``` 3. Initialize model with the pretrained model ```python import torch.nn as nn import torchvision.models as models from mmcv.cnn import initialize # initialize model with pretrained model model = models.resnet50() # model.conv1.weight # Parameter containing: # tensor([[[[-6.7435e-03, -2.3531e-02, -9.0143e-03, ..., -2.1245e-03, # -1.8077e-03, 3.0338e-03], # [-1.2603e-02, -2.7831e-02, 2.3187e-02, ..., -1.5793e-02, # 1.1655e-02, 4.5889e-03], # [-3.7916e-02, 1.2014e-02, 1.3815e-02, ..., -4.2651e-03, # 1.7314e-02, -9.9998e-03], # ..., init_cfg = dict(type='Pretrained', checkpoint='torchvision://resnet50') initialize(model, init_cfg) # model.conv1.weight # Parameter containing: # tensor([[[[ 1.3335e-02, 1.4664e-02, -1.5351e-02, ..., -4.0896e-02, # -4.3034e-02, -7.0755e-02], # [ 4.1205e-03, 5.8477e-03, 1.4948e-02, ..., 2.2060e-03, # -2.0912e-02, -3.8517e-02], # [ 2.2331e-02, 2.3595e-02, 1.6120e-02, ..., 1.0281e-01, # 6.2641e-02, 5.1977e-02], # ..., # initialize weights of a sub-module with the specific part of a pretrained model by using 'prefix' model = models.resnet50() url = 'http://download.openmmlab.com/mmdetection/v2.0/retinanet/'\ 'retinanet_r50_fpn_1x_coco/'\ 'retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth' init_cfg = dict(type='Pretrained', checkpoint=url, prefix='backbone.') initialize(model, init_cfg) ``` 4. Initialize model inherited from BaseModule, Sequential, ModuleList, ModuleDict `BaseModule` is inherited from `torch.nn.Module`, and the only different between them is that `BaseModule` implements `init_weight`. `Sequential` is inherited from `BaseModule` and `torch.nn.Sequential`. `ModuleList` is inherited from `BaseModule` and `torch.nn.ModuleList`. `ModuleDict` is inherited from `BaseModule` and `torch.nn.ModuleDict`. `````python import torch.nn as nn from mmcv.runner import BaseModule, Sequential, ModuleList, ModuleDict class FooConv1d(BaseModule): def __init__(self, init_cfg=None): super().__init__(init_cfg) self.conv1d = nn.Conv1d(4, 1, 4) def forward(self, x): return self.conv1d(x) class FooConv2d(BaseModule): def __init__(self, init_cfg=None): super().__init__(init_cfg) self.conv2d = nn.Conv2d(3, 1, 3) def forward(self, x): return self.conv2d(x) # BaseModule init_cfg = dict(type='Constant', layer='Conv1d', val=0., bias=1.) model = FooConv1d(init_cfg) model.init_weights() # model.conv1d.weight # Parameter containing: # tensor([[[0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.]]], requires_grad=True) # Sequential init_cfg1 = dict(type='Constant', layer='Conv1d', val=0., bias=1.) init_cfg2 = dict(type='Constant', layer='Conv2d', val=2., bias=3.) model1 = FooConv1d(init_cfg1) model2 = FooConv2d(init_cfg2) seq_model = Sequential(model1, model2) seq_model.init_weights() # seq_model[0].conv1d.weight # Parameter containing: # tensor([[[0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.]]], requires_grad=True) # seq_model[1].conv2d.weight # Parameter containing: # tensor([[[[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]], # ..., # [[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]]]], requires_grad=True) # inner init_cfg has higher priority model1 = FooConv1d(init_cfg1) model2 = FooConv2d(init_cfg2) init_cfg = dict(type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.) seq_model = Sequential(model1, model2, init_cfg=init_cfg) seq_model.init_weights() # seq_model[0].conv1d.weight # Parameter containing: # tensor([[[0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.]]], requires_grad=True) # seq_model[1].conv2d.weight # Parameter containing: # tensor([[[[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]], # ..., # [[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]]]], requires_grad=True) # ModuleList model1 = FooConv1d(init_cfg1) model2 = FooConv2d(init_cfg2) modellist = ModuleList([model1, model2]) modellist.init_weights() # modellist[0].conv1d.weight # Parameter containing: # tensor([[[0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.]]], requires_grad=True) # modellist[1].conv2d.weight # Parameter containing: # tensor([[[[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]], # ..., # [[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]]]], requires_grad=True) # inner init_cfg has higher priority model1 = FooConv1d(init_cfg1) model2 = FooConv2d(init_cfg2) init_cfg = dict(type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.) modellist = ModuleList([model1, model2], init_cfg=init_cfg) modellist.init_weights() # modellist[0].conv1d.weight # Parameter containing: # tensor([[[0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.]]], requires_grad=True) # modellist[1].conv2d.weight # Parameter containing: # tensor([[[[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]], # ..., # [[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]]]], requires_grad=True) # ModuleDict model1 = FooConv1d(init_cfg1) model2 = FooConv2d(init_cfg2) modeldict = ModuleDict(dict(model1=model1, model2=model2)) modeldict.init_weights() # modeldict['model1'].conv1d.weight # Parameter containing: # tensor([[[0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.]]], requires_grad=True) # modeldict['model2'].conv2d.weight # Parameter containing: # tensor([[[[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]], # ..., # [[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]]]], requires_grad=True) # inner init_cfg has higher priority model1 = FooConv1d(init_cfg1) model2 = FooConv2d(init_cfg2) init_cfg = dict(type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.) modeldict = ModuleDict(dict(model1=model1, model2=model2), init_cfg=init_cfg) modeldict.init_weights() # modeldict['model1'].conv1d.weight # Parameter containing: # tensor([[[0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.]]], requires_grad=True) # modeldict['model2'].conv2d.weight # Parameter containing: # tensor([[[[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]], # ..., # [[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]]]], requires_grad=True) ````` ### Model Zoo Besides torchvision pre-trained models, we also provide pre-trained models of following CNN: - VGG Caffe - ResNet Caffe - ResNeXt - ResNet with Group Normalization - ResNet with Group Normalization and Weight Standardization - HRNetV2 - Res2Net - RegNet #### Model URLs in JSON The model zoo links in MMCV are managed by JSON files. The json file consists of key-value pair of model name and its url or path. An example json file could be like: ```json { "model_a": "https://example.com/models/model_a_9e5bac.pth", "model_b": "pretrain/model_b_ab3ef2c.pth" } ``` The default links of the pre-trained models hosted on OpenMMLab AWS could be found [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/model_zoo/open_mmlab.json). You may override default links by putting `open-mmlab.json` under `MMCV_HOME`. If `MMCV_HOME` is not find in the environment, `~/.cache/mmcv` will be used by default. You may `export MMCV_HOME=/your/path` to use your own path. The external json files will be merged into default one. If the same key presents in both external json and default json, the external one will be used. #### Load Checkpoint The following types are supported for `filename` argument of `mmcv.load_checkpoint()`. - filepath: The filepath of the checkpoint. - `http://xxx` and `https://xxx`: The link to download the checkpoint. The `SHA256` postfix should be contained in the filename. - `torchvision://xxx`: The model links in `torchvision.models`.Please refer to [torchvision](https://pytorch.org/docs/stable/torchvision/models.html) for details. - `open-mmlab://xxx`: The model links or filepath provided in default and additional json files. ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/understand_mmcv/config.md ================================================ ## Config `Config` class is used for manipulating config and config files. It supports loading configs from multiple file formats including **python**, **json** and **yaml**. It provides dict-like apis to get and set values. Here is an example of the config file `test.py`. ```python a = 1 b = dict(b1=[0, 1, 2], b2=None) c = (1, 2) d = 'string' ``` To load and use configs ```python >>> cfg = Config.fromfile('test.py') >>> print(cfg) >>> dict(a=1, ... b=dict(b1=[0, 1, 2], b2=None), ... c=(1, 2), ... d='string') ``` For all format configs, some predefined variables are supported. It will convert the variable in `{{ var }}` with its real value. Currently, it supports four predefined variables: `{{ fileDirname }}` - the current opened file's dirname, e.g. /home/your-username/your-project/folder `{{ fileBasename }}` - the current opened file's basename, e.g. file.ext `{{ fileBasenameNoExtension }}` - the current opened file's basename with no file extension, e.g. file `{{ fileExtname }}` - the current opened file's extension, e.g. .ext These variable names are referred from [VS Code](https://code.visualstudio.com/docs/editor/variables-reference). Here is one examples of config with predefined variables. `config_a.py` ```python a = 1 b = './work_dir/{{ fileBasenameNoExtension }}' c = '{{ fileExtname }}' ``` ```python >>> cfg = Config.fromfile('./config_a.py') >>> print(cfg) >>> dict(a=1, ... b='./work_dir/config_a', ... c='.py') ``` For all format configs, inheritance is supported. To reuse fields in other config files, specify `_base_='./config_a.py'` or a list of configs `_base_=['./config_a.py', './config_b.py']`. Here are 4 examples of config inheritance. `config_a.py` ```python a = 1 b = dict(b1=[0, 1, 2], b2=None) ``` ### Inherit from base config without overlapped keys `config_b.py` ```python _base_ = './config_a.py' c = (1, 2) d = 'string' ``` ```python >>> cfg = Config.fromfile('./config_b.py') >>> print(cfg) >>> dict(a=1, ... b=dict(b1=[0, 1, 2], b2=None), ... c=(1, 2), ... d='string') ``` New fields in `config_b.py` are combined with old fields in `config_a.py` ### Inherit from base config with overlapped keys `config_c.py` ```python _base_ = './config_a.py' b = dict(b2=1) c = (1, 2) ``` ```python >>> cfg = Config.fromfile('./config_c.py') >>> print(cfg) >>> dict(a=1, ... b=dict(b1=[0, 1, 2], b2=1), ... c=(1, 2)) ``` `b.b2=None` in `config_a` is replaced with `b.b2=1` in `config_c.py`. ### Inherit from base config with ignored fields `config_d.py` ```python _base_ = './config_a.py' b = dict(_delete_=True, b2=None, b3=0.1) c = (1, 2) ``` ```python >>> cfg = Config.fromfile('./config_d.py') >>> print(cfg) >>> dict(a=1, ... b=dict(b2=None, b3=0.1), ... c=(1, 2)) ``` You may also set `_delete_=True` to ignore some fields in base configs. All old keys `b1, b2, b3` in `b` are replaced with new keys `b2, b3`. ### Inherit from multiple base configs (the base configs should not contain the same keys) `config_e.py` ```python c = (1, 2) d = 'string' ``` `config_f.py` ```python _base_ = ['./config_a.py', './config_e.py'] ``` ```python >>> cfg = Config.fromfile('./config_f.py') >>> print(cfg) >>> dict(a=1, ... b=dict(b1=[0, 1, 2], b2=None), ... c=(1, 2), ... d='string') ``` ### Reference variables from base You can reference variables defined in base using the following grammar. `base.py` ```python item1 = 'a' item2 = dict(item3 = 'b') ``` `config_g.py` ```python _base_ = ['./base.py'] item = dict(a = {{ _base_.item1 }}, b = {{ _base_.item2.item3 }}) ``` ```python >>> cfg = Config.fromfile('./config_g.py') >>> print(cfg.pretty_text) item1 = 'a' item2 = dict(item3='b') item = dict(a='a', b='b') ``` ### Add deprecation information in configs Deprecation information can be added in a config file, which will trigger a `UserWarning` when this config file is loaded. `deprecated_cfg.py` ```python _base_ = 'expected_cfg.py' _deprecation_ = dict( expected = 'expected_cfg.py', # optional to show expected config path in the warning information reference = 'url to related PR' # optional to show reference link in the warning information ) ``` ```python >>> cfg = Config.fromfile('./deprecated_cfg.py') UserWarning: The config file deprecated.py will be deprecated in the future. Please use expected_cfg.py instead. More information can be found at https://github.com/open-mmlab/mmcv/pull/1275 ``` ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/understand_mmcv/data_process.md ================================================ ## Data Process ### Image This module provides some image processing methods, which requires `opencv` to be installed. #### Read/Write/Show To read or write images files, use `imread` or `imwrite`. ```python import mmcv img = mmcv.imread('test.jpg') img = mmcv.imread('test.jpg', flag='grayscale') img_ = mmcv.imread(img) # nothing will happen, img_ = img mmcv.imwrite(img, 'out.jpg') ``` To read images from bytes ```python with open('test.jpg', 'rb') as f: data = f.read() img = mmcv.imfrombytes(data) ``` To show an image file or a loaded image ```python mmcv.imshow('tests/data/color.jpg') # this is equivalent to for i in range(10): img = np.random.randint(256, size=(100, 100, 3), dtype=np.uint8) mmcv.imshow(img, win_name='test image', wait_time=200) ``` #### Color space conversion Supported conversion methods: - bgr2gray - gray2bgr - bgr2rgb - rgb2bgr - bgr2hsv - hsv2bgr ```python img = mmcv.imread('tests/data/color.jpg') img1 = mmcv.bgr2rgb(img) img2 = mmcv.rgb2gray(img1) img3 = mmcv.bgr2hsv(img) ``` #### Resize There are three resize methods. All `imresize_*` methods have an argument `return_scale`, if this argument is `False`, then the return value is merely the resized image, otherwise is a tuple `(resized_img, scale)`. ```python # resize to a given size mmcv.imresize(img, (1000, 600), return_scale=True) # resize to the same size of another image mmcv.imresize_like(img, dst_img, return_scale=False) # resize by a ratio mmcv.imrescale(img, 0.5) # resize so that the max edge no longer than 1000, short edge no longer than 800 # without changing the aspect ratio mmcv.imrescale(img, (1000, 800)) ``` #### Rotate To rotate an image by some angle, use `imrotate`. The center can be specified, which is the center of original image by default. There are two modes of rotating, one is to keep the image size unchanged so that some parts of the image will be cropped after rotating, the other is to extend the image size to fit the rotated image. ```python img = mmcv.imread('tests/data/color.jpg') # rotate the image clockwise by 30 degrees. img_ = mmcv.imrotate(img, 30) # rotate the image counterclockwise by 90 degrees. img_ = mmcv.imrotate(img, -90) # rotate the image clockwise by 30 degrees, and rescale it by 1.5x at the same time. img_ = mmcv.imrotate(img, 30, scale=1.5) # rotate the image clockwise by 30 degrees, with (100, 100) as the center. img_ = mmcv.imrotate(img, 30, center=(100, 100)) # rotate the image clockwise by 30 degrees, and extend the image size. img_ = mmcv.imrotate(img, 30, auto_bound=True) ``` #### Flip To flip an image, use `imflip`. ```python img = mmcv.imread('tests/data/color.jpg') # flip the image horizontally mmcv.imflip(img) # flip the image vertically mmcv.imflip(img, direction='vertical') ``` #### Crop `imcrop` can crop the image with one or some regions, represented as (x1, y1, x2, y2). ```python import mmcv import numpy as np img = mmcv.imread('tests/data/color.jpg') # crop the region (10, 10, 100, 120) bboxes = np.array([10, 10, 100, 120]) patch = mmcv.imcrop(img, bboxes) # crop two regions (10, 10, 100, 120) and (0, 0, 50, 50) bboxes = np.array([[10, 10, 100, 120], [0, 0, 50, 50]]) patches = mmcv.imcrop(img, bboxes) # crop two regions, and rescale the patches by 1.2x patches = mmcv.imcrop(img, bboxes, scale_ratio=1.2) ``` #### Padding There are two methods `impad` and `impad_to_multiple` to pad an image to the specific size with given values. ```python img = mmcv.imread('tests/data/color.jpg') # pad the image to (1000, 1200) with all zeros img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=0) # pad the image to (1000, 1200) with different values for three channels. img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=[100, 50, 200]) # pad the image on left, right, top, bottom borders with all zeros img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=0) # pad the image on left, right, top, bottom borders with different values # for three channels. img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=[100, 50, 200]) # pad an image so that each edge is a multiple of some value. img_ = mmcv.impad_to_multiple(img, 32) ``` ### Video This module provides the following functionalities. - A `VideoReader` class with friendly apis to read and convert videos. - Some methods for editing (cut, concat, resize) videos. - Optical flow read/write/warp. #### VideoReader The `VideoReader` class provides sequence like apis to access video frames. It will internally cache the frames which have been visited. ```python video = mmcv.VideoReader('test.mp4') # obtain basic information print(len(video)) print(video.width, video.height, video.resolution, video.fps) # iterate over all frames for frame in video: print(frame.shape) # read the next frame img = video.read() # read a frame by index img = video[100] # read some frames img = video[5:10] ``` To convert a video to images or generate a video from a image directory. ```python # split a video into frames and save to a folder video = mmcv.VideoReader('test.mp4') video.cvt2frames('out_dir') # generate video from frames mmcv.frames2video('out_dir', 'test.avi') ``` #### Editing utils There are also some methods for editing videos, which wraps the commands of ffmpeg. ```python # cut a video clip mmcv.cut_video('test.mp4', 'clip1.mp4', start=3, end=10, vcodec='h264') # join a list of video clips mmcv.concat_video(['clip1.mp4', 'clip2.mp4'], 'joined.mp4', log_level='quiet') # resize a video with the specified size mmcv.resize_video('test.mp4', 'resized1.mp4', (360, 240)) # resize a video with a scaling ratio of 2 mmcv.resize_video('test.mp4', 'resized2.mp4', ratio=2) ``` #### Optical flow `mmcv` provides the following methods to operate on optical flows. - IO - Visualization - Flow warpping We provide two options to dump optical flow files: uncompressed and compressed. The uncompressed way just dumps the floating numbers to a binary file. It is lossless but the dumped file has a larger size. The compressed way quantizes the optical flow to 0-255 and dumps it as a jpeg image. The flow of x-dim and y-dim will be concatenated into a single image. 1. IO ```python flow = np.random.rand(800, 600, 2).astype(np.float32) # dump the flow to a flo file (~3.7M) mmcv.flowwrite(flow, 'uncompressed.flo') # dump the flow to a jpeg file (~230K) # the shape of the dumped image is (800, 1200) mmcv.flowwrite(flow, 'compressed.jpg', quantize=True, concat_axis=1) # read the flow file, the shape of loaded flow is (800, 600, 2) for both ways flow = mmcv.flowread('uncompressed.flo') flow = mmcv.flowread('compressed.jpg', quantize=True, concat_axis=1) ``` 2. Visualization It is possible to visualize optical flows with `mmcv.flowshow()`. ```python mmcv.flowshow(flow) ``` ![progress](../_static/flow_visualization.png) 3. Flow warpping ```python img1 = mmcv.imread('img1.jpg') flow = mmcv.flowread('flow.flo') warpped_img2 = mmcv.flow_warp(img1, flow) ``` img1 (left) and img2 (right) ![raw images](../_static/flow_raw_images.png) optical flow (img2 -> img1) ![optical flow](../_static/flow_img2toimg1.png) warpped image and difference with ground truth ![warpped image](../_static/flow_warp_diff.png) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/understand_mmcv/io.md ================================================ ## File IO This module provides two universal API to load and dump files of different formats. ```{note} Since v1.3.16, the IO modules support loading (dumping) data from (to) different backends, respectively. More details are in PR [#1330](https://github.com/open-mmlab/mmcv/pull/1330). ``` ### Load and dump data `mmcv` provides a universal api for loading and dumping data, currently supported formats are json, yaml and pickle. #### Load from disk or dump to disk ```python import mmcv # load data from a file data = mmcv.load('test.json') data = mmcv.load('test.yaml') data = mmcv.load('test.pkl') # load data from a file-like object with open('test.json', 'r') as f: data = mmcv.load(f, file_format='json') # dump data to a string json_str = mmcv.dump(data, file_format='json') # dump data to a file with a filename (infer format from file extension) mmcv.dump(data, 'out.pkl') # dump data to a file with a file-like object with open('test.yaml', 'w') as f: data = mmcv.dump(data, f, file_format='yaml') ``` #### Load from other backends or dump to other backends ```python import mmcv # load data from a file data = mmcv.load('s3://bucket-name/test.json') data = mmcv.load('s3://bucket-name/test.yaml') data = mmcv.load('s3://bucket-name/test.pkl') # dump data to a file with a filename (infer format from file extension) mmcv.dump(data, 's3://bucket-name/out.pkl') ``` It is also very convenient to extend the api to support more file formats. All you need to do is to write a file handler inherited from `BaseFileHandler` and register it with one or several file formats. You need to implement at least 3 methods. ```python import mmcv # To register multiple file formats, a list can be used as the argument. # @mmcv.register_handler(['txt', 'log']) @mmcv.register_handler('txt') class TxtHandler1(mmcv.BaseFileHandler): def load_from_fileobj(self, file): return file.read() def dump_to_fileobj(self, obj, file): file.write(str(obj)) def dump_to_str(self, obj, **kwargs): return str(obj) ``` Here is an example of `PickleHandler`. ```python import pickle class PickleHandler(mmcv.BaseFileHandler): def load_from_fileobj(self, file, **kwargs): return pickle.load(file, **kwargs) def load_from_path(self, filepath, **kwargs): return super(PickleHandler, self).load_from_path( filepath, mode='rb', **kwargs) def dump_to_str(self, obj, **kwargs): kwargs.setdefault('protocol', 2) return pickle.dumps(obj, **kwargs) def dump_to_fileobj(self, obj, file, **kwargs): kwargs.setdefault('protocol', 2) pickle.dump(obj, file, **kwargs) def dump_to_path(self, obj, filepath, **kwargs): super(PickleHandler, self).dump_to_path( obj, filepath, mode='wb', **kwargs) ``` ### Load a text file as a list or dict For example `a.txt` is a text file with 5 lines. ``` a b c d e ``` #### Load from disk Use `list_from_file` to load the list from a.txt. ```python >>> mmcv.list_from_file('a.txt') ['a', 'b', 'c', 'd', 'e'] >>> mmcv.list_from_file('a.txt', offset=2) ['c', 'd', 'e'] >>> mmcv.list_from_file('a.txt', max_num=2) ['a', 'b'] >>> mmcv.list_from_file('a.txt', prefix='/mnt/') ['/mnt/a', '/mnt/b', '/mnt/c', '/mnt/d', '/mnt/e'] ``` For example `b.txt` is a text file with 3 lines. ``` 1 cat 2 dog cow 3 panda ``` Then use `dict_from_file` to load the dict from `b.txt`. ```python >>> mmcv.dict_from_file('b.txt') {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} >>> mmcv.dict_from_file('b.txt', key_type=int) {1: 'cat', 2: ['dog', 'cow'], 3: 'panda'} ``` #### Load from other backends Use `list_from_file` to load the list from `s3://bucket-name/a.txt`. ```python >>> mmcv.list_from_file('s3://bucket-name/a.txt') ['a', 'b', 'c', 'd', 'e'] >>> mmcv.list_from_file('s3://bucket-name/a.txt', offset=2) ['c', 'd', 'e'] >>> mmcv.list_from_file('s3://bucket-name/a.txt', max_num=2) ['a', 'b'] >>> mmcv.list_from_file('s3://bucket-name/a.txt', prefix='/mnt/') ['/mnt/a', '/mnt/b', '/mnt/c', '/mnt/d', '/mnt/e'] ``` Use `dict_from_file` to load the dict from `s3://bucket-name/b.txt`. ```python >>> mmcv.dict_from_file('s3://bucket-name/b.txt') {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} >>> mmcv.dict_from_file('s3://bucket-name/b.txt', key_type=int) {1: 'cat', 2: ['dog', 'cow'], 3: 'panda'} ``` ### Load and dump checkpoints #### Load checkpoints from disk or save to disk We can read the checkpoints from disk or save to disk in the following way. ```python import torch filepath1 = '/path/of/your/checkpoint1.pth' filepath2 = '/path/of/your/checkpoint2.pth' # read from filepath1 checkpoint = torch.load(filepath1) # save to filepath2 torch.save(checkpoint, filepath2) ``` MMCV provides many backends. `HardDiskBackend` is one of them and we can use it to read or save checkpoints. ```python import io from mmcv.fileio.file_client import HardDiskBackend disk_backend = HardDiskBackend() with io.BytesIO(disk_backend.get(filepath1)) as buffer: checkpoint = torch.load(buffer) with io.BytesIO() as buffer: torch.save(checkpoint, f) disk_backend.put(f.getvalue(), filepath2) ``` If we want to implement an interface which automatically select the corresponding backend based on the file path, we can use the `FileClient`. For example, we want to implement two methods for reading checkpoints as well as saving checkpoints, which need to support different types of file paths, either disk paths, network paths or other paths. ```python from mmcv.fileio.file_client import FileClient def load_checkpoint(path): file_client = FileClient.infer(uri=path) with io.BytesIO(file_client.get(path)) as buffer: checkpoint = torch.load(buffer) return checkpoint def save_checkpoint(checkpoint, path): with io.BytesIO() as buffer: torch.save(checkpoint, buffer) file_client.put(buffer.getvalue(), path) file_client = FileClient.infer_client(uri=filepath1) checkpoint = load_checkpoint(filepath1) save_checkpoint(checkpoint, filepath2) ``` #### Load checkpoints from the Internet ```{note} Currently, it only supports reading checkpoints from the Internet, and does not support saving checkpoints to the Internet. ``` ```python import io import torch from mmcv.fileio.file_client import HTTPBackend, FileClient filepath = 'http://path/of/your/checkpoint.pth' checkpoint = torch.utils.model_zoo.load_url(filepath) http_backend = HTTPBackend() with io.BytesIO(http_backend.get(filepath)) as buffer: checkpoint = torch.load(buffer) file_client = FileClient.infer_client(uri=filepath) with io.BytesIO(file_client.get(filepath)) as buffer: checkpoint = torch.load(buffer) ``` ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/understand_mmcv/ops.md ================================================ ## CUDA ops We implement common CUDA ops used in detection, segmentation, etc. - ActiveRotatedFilter - AssignScoreWithK - BallQuery - BBoxOverlaps - CARAFE - CrissCrossAttention - ContextBlock - ConvexIoU - CornerPool - Deformable Convolution v1/v2 - Deformable RoIPool - DynamicScatter - GatherPoints - FurthestPointSample - FurthestPointSampleWithDist - GeneralizedAttention - GroupPoints - KNN - MaskedConv - MinAreaPolygon - NMS - PointsInPolygons - PSAMask - RiRoIAlignRotated - RotatedFeatureAlign - RoIPointPool3d - RoIPool - RoIAlign - RoIAwarePool3d - SimpleRoIAlign - SigmoidFocalLoss - SoftmaxFocalLoss - SoftNMS - Synchronized BatchNorm - Voxelization - ThreeInterpolate - ThreeNN - Weight standardization - Correlation ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/understand_mmcv/registry.md ================================================ ## Registry MMCV implements [registry](https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/registry.py) to manage different modules that share similar functionalities, e.g., backbones, head, and necks, in detectors. Most projects in OpenMMLab use registry to manage modules of datasets and models, such as [MMDetection](https://github.com/open-mmlab/mmdetection), [MMDetection3D](https://github.com/open-mmlab/mmdetection3d), [MMClassification](https://github.com/open-mmlab/mmclassification), [MMEditing](https://github.com/open-mmlab/mmediting), etc. ### What is registry In MMCV, registry can be regarded as a mapping that maps a class to a string. These classes contained by a single registry usually have similar APIs but implement different algorithms or support different datasets. With the registry, users can find and instantiate the class through its corresponding string, and use the instantiated module as they want. One typical example is the config systems in most OpenMMLab projects, which use the registry to create hooks, runners, models, and datasets, through configs. The API reference could be found [here](https://mmcv.readthedocs.io/en/latest/api.html?highlight=registry#mmcv.utils.Registry). To manage your modules in the codebase by `Registry`, there are three steps as below. 1. Create a build method (optional, in most cases you can just use the default one). 2. Create a registry. 3. Use this registry to manage the modules. `build_func` argument of `Registry` is to customize how to instantiate the class instance, the default one is `build_from_cfg` implemented [here](https://mmcv.readthedocs.io/en/latest/api.html?highlight=registry#mmcv.utils.build_from_cfg). ### A Simple Example Here we show a simple example of using registry to manage modules in a package. You can find more practical examples in OpenMMLab projects. Assuming we want to implement a series of Dataset Converter for converting different formats of data to the expected data format. We create a directory as a package named `converters`. In the package, we first create a file to implement builders, named `converters/builder.py`, as below ```python from mmcv.utils import Registry # create a registry for converters CONVERTERS = Registry('converter') ``` Then we can implement different converters in the package. For example, implement `Converter1` in `converters/converter1.py` ```python from .builder import CONVERTERS # use the registry to manage the module @CONVERTERS.register_module() class Converter1(object): def __init__(self, a, b): self.a = a self.b = b ``` The key step to use registry for managing the modules is to register the implemented module into the registry `CONVERTERS` through `@CONVERTERS.register_module()` when you are creating the module. By this way, a mapping between a string and the class is built and maintained by `CONVERTERS` as below ```python 'Converter1' -> ``` ```{note} The registry mechanism will be triggered only when the file where the module is located is imported. So you need to import that file somewhere. More details can be found at https://github.com/open-mmlab/mmdetection/issues/5974. ``` If the module is successfully registered, you can use this converter through configs as ```python converter_cfg = dict(type='Converter1', a=a_value, b=b_value) converter = CONVERTERS.build(converter_cfg) ``` ### Customize Build Function Suppose we would like to customize how `converters` are built, we could implement a customized `build_func` and pass it into the registry. ```python from mmcv.utils import Registry # create a build function def build_converter(cfg, registry, *args, **kwargs): cfg_ = cfg.copy() converter_type = cfg_.pop('type') if converter_type not in registry: raise KeyError(f'Unrecognized converter type {converter_type}') else: converter_cls = registry.get(converter_type) converter = converter_cls(*args, **kwargs, **cfg_) return converter # create a registry for converters and pass ``build_converter`` function CONVERTERS = Registry('converter', build_func=build_converter) ``` ```{note} In this example, we demonstrate how to use the `build_func` argument to customize the way to build a class instance. The functionality is similar to the default `build_from_cfg`. In most cases, default one would be sufficient. `build_model_from_cfg` is also implemented to build PyTorch module in `nn.Sequentail`, you may directly use them instead of implementing by yourself. ``` ### Hierarchy Registry You could also build modules from more than one OpenMMLab frameworks, e.g. you could use all backbones in [MMClassification](https://github.com/open-mmlab/mmclassification) for object detectors in [MMDetection](https://github.com/open-mmlab/mmdetection), you may also combine an object detection model in [MMDetection](https://github.com/open-mmlab/mmdetection) and semantic segmentation model in [MMSegmentation](https://github.com/open-mmlab/mmsegmentation). All `MODELS` registries of downstream codebases are children registries of MMCV's `MODELS` registry. Basically, there are two ways to build a module from child or sibling registries. 1. Build from children registries. For example: In MMDetection we define: ```python from mmcv.utils import Registry from mmcv.cnn import MODELS as MMCV_MODELS MODELS = Registry('model', parent=MMCV_MODELS) @MODELS.register_module() class NetA(nn.Module): def forward(self, x): return x ``` In MMClassification we define: ```python from mmcv.utils import Registry from mmcv.cnn import MODELS as MMCV_MODELS MODELS = Registry('model', parent=MMCV_MODELS) @MODELS.register_module() class NetB(nn.Module): def forward(self, x): return x + 1 ``` We could build two net in either MMDetection or MMClassification by: ```python from mmdet.models import MODELS net_a = MODELS.build(cfg=dict(type='NetA')) net_b = MODELS.build(cfg=dict(type='mmcls.NetB')) ``` or ```python from mmcls.models import MODELS net_a = MODELS.build(cfg=dict(type='mmdet.NetA')) net_b = MODELS.build(cfg=dict(type='NetB')) ``` 2. Build from parent registry. The shared `MODELS` registry in MMCV is the parent registry for all downstream codebases (root registry): ```python from mmcv.cnn import MODELS as MMCV_MODELS net_a = MMCV_MODELS.build(cfg=dict(type='mmdet.NetA')) net_b = MMCV_MODELS.build(cfg=dict(type='mmcls.NetB')) ``` ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/understand_mmcv/runner.md ================================================ ## Runner The runner class is designed to manage the training. It eases the training process with less code demanded from users while staying flexible and configurable. The main features are as listed: - Support `EpochBasedRunner` and `IterBasedRunner` for different scenarios. Implementing customized runners is also allowed to meet customized needs. - Support customized workflow to allow switching between different modes while training. Currently, supported modes are train and val. - Enable extensibility through various hooks, including hooks defined in MMCV and customized ones. ### EpochBasedRunner As its name indicates, workflow in `EpochBasedRunner` should be set based on epochs. For example, [('train', 2), ('val', 1)] means running 2 epochs for training and 1 epoch for validation, iteratively. And each epoch may contain multiple iterations. Currently, MMDetection uses `EpochBasedRunner` by default. Let's take a look at its core logic: ```python # the condition to stop training while curr_epoch < max_epochs: # traverse the workflow. # e.g. workflow = [('train', 2), ('val', 1)] for i, flow in enumerate(workflow): # mode(e.g. train) determines which function to run mode, epochs = flow # epoch_runner will be either self.train() or self.val() epoch_runner = getattr(self, mode) # execute the corresponding function for _ in range(epochs): epoch_runner(data_loaders[i], **kwargs) ``` Currently, we support 2 modes: train and val. Let's take a train function for example and have a look at its core logic: ```python # Currently, epoch_runner could be either train or val def train(self, data_loader, **kwargs): # traverse the dataset and get batch data for 1 epoch for i, data_batch in enumerate(data_loader): # it will execute all before_train_iter function in the hooks registered. You may want to watch out for the order. self.call_hook('before_train_iter') # set train_mode as False in val function self.run_iter(data_batch, train_mode=True, **kwargs) self.call_hook('after_train_iter') self.call_hook('after_train_epoch') ``` ### IterBasedRunner Different from `EpochBasedRunner`, workflow in `IterBasedRunner` should be set based on iterations. For example, [('train', 2), ('val', 1)] means running 2 iters for training and 1 iter for validation, iteratively. Currently, MMSegmentation uses `IterBasedRunner` by default. Let's take a look at its core logic: ```python # Although we set workflow by iters here, we might also need info on the epochs in some using cases. That can be provided by IterLoader. iter_loaders = [IterLoader(x) for x in data_loaders] # the condition to stop training while curr_iter < max_iters: # traverse the workflow. # e.g. workflow = [('train', 2), ('val', 1)] for i, flow in enumerate(workflow): # mode(e.g. train) determines which function to run mode, iters = flow # iter_runner will be either self.train() or self.val() iter_runner = getattr(self, mode) # execute the corresponding function for _ in range(iters): iter_runner(iter_loaders[i], **kwargs) ``` Currently, we support 2 modes: train and val. Let's take a val function for example and have a look at its core logic: ```python # Currently, iter_runner could be either train or val def val(self, data_loader, **kwargs): # get batch data for 1 iter data_batch = next(data_loader) # it will execute all before_val_iter function in the hooks registered. You may want to watch out for the order. self.call_hook('before_val_iter') outputs = self.model.val_step(data_batch, self.optimizer, **kwargs) self.outputs = outputs self.call_hook('after_val_iter') ``` Other than the basic functionalities explained above, `EpochBasedRunner` and `IterBasedRunner` provide methods such as `resume`, `save_checkpoint` and `register_hook`. In case you are not familiar with the term Hook mentioned earlier, we will also provide a tutorial about it.(coming soon...) Essentially, a hook is functionality to alter or augment the code behaviors through predefined api. It allows users to have their own code called under certain circumstances. It makes code extensible in a non-intrusive manner. ### A Simple Example We will walk you through the usage of runner with a classification task. The following code only contains essential steps for demonstration purposes. The following steps are necessary for any training tasks. **(1) Initialize dataloader, model, optimizer, etc.** ```python # initialize model model=... # initialize optimizer, typically, we set: cfg.optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) optimizer = build_optimizer(model, cfg.optimizer) # initialize the dataloader corresponding to the workflow(train/val) data_loaders = [ build_dataloader( ds, cfg.data.samples_per_gpu, cfg.data.workers_per_gpu, ...) for ds in dataset ] ``` **(2) Initialize runner** ```python runner = build_runner( # cfg.runner is typically set as: # runner = dict(type='EpochBasedRunner', max_epochs=200) cfg.runner, default_args=dict( model=model, batch_processor=None, optimizer=optimizer, logger=logger)) ``` **(3) Register training hooks and customized hooks.** ```python # register default hooks necessary for training runner.register_training_hooks( # configs of learning rate, it is typically set as: # lr_config = dict(policy='step', step=[100, 150]) cfg.lr_config, # configuration of optimizer, e.g. grad_clip optimizer_config, # configuration of saving checkpoints, it is typically set as: # checkpoint_config = dict(interval=1), saving checkpoints every epochs cfg.checkpoint_config, # configuration of logs cfg.log_config, ...) # register customized hooks # say we want to enable ema, then we could set custom_hooks=[dict(type='EMAHook')] if cfg.get('custom_hooks', None): custom_hooks = cfg.custom_hooks for hook_cfg in cfg.custom_hooks: hook_cfg = hook_cfg.copy() priority = hook_cfg.pop('priority', 'NORMAL') hook = build_from_cfg(hook_cfg, HOOKS) runner.register_hook(hook, priority=priority) ``` Then, we can use `resume` or `load_checkpoint` to load existing weights. **(4) Start training** ```python # workflow is typically set as: workflow = [('train', 1)] # here the training begins. runner.run(data_loaders, cfg.workflow) ``` Let's take `EpochBasedRunner` for example and go a little bit into details about setting workflow: - Say we only want to put train in the workflow, then we can set: workflow = [('train', 1)]. The runner will only execute train iteratively in this case. - Say we want to put both train and val in the workflow, then we can set: workflow = [('train', 3), ('val',1)]. The runner will first execute train for 3 epochs and then switch to val mode and execute val for 1 epoch. The workflow will be repeated until the current epoch hit the max_epochs. - Workflow is highly flexible. Therefore, you can set workflow = [('val', 1), ('train',1)] if you would like the runner to validate first and train after. The code we demonstrated above is already in `train.py` in MM repositories. Simply modify the corresponding keys in the configuration files and the script will execute the expected workflow automatically. ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/understand_mmcv/utils.md ================================================ ## Utils ### ProgressBar If you want to apply a method to a list of items and track the progress, `track_progress` is a good choice. It will display a progress bar to tell the progress and ETA. ```python import mmcv def func(item): # do something pass tasks = [item_1, item_2, ..., item_n] mmcv.track_progress(func, tasks) ``` The output is like the following. ![progress](../_static/progress.*) There is another method `track_parallel_progress`, which wraps multiprocessing and progress visualization. ```python mmcv.track_parallel_progress(func, tasks, 8) # 8 workers ``` ![progress](../_static/parallel_progress.*) If you want to iterate or enumerate a list of items and track the progress, `track_iter_progress` is a good choice. It will display a progress bar to tell the progress and ETA. ```python import mmcv tasks = [item_1, item_2, ..., item_n] for task in mmcv.track_iter_progress(tasks): # do something like print print(task) for i, task in enumerate(mmcv.track_iter_progress(tasks)): # do something like print print(i) print(task) ``` ### Timer It is convenient to compute the runtime of a code block with `Timer`. ```python import time with mmcv.Timer(): # simulate some code block time.sleep(1) ``` or try with `since_start()` and `since_last_check()`. This former can return the runtime since the timer starts and the latter will return the time since the last time checked. ```python timer = mmcv.Timer() # code block 1 here print(timer.since_start()) # code block 2 here print(timer.since_last_check()) print(timer.since_start()) ``` ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/understand_mmcv/visualization.md ================================================ ## Visualization `mmcv` can show images and annotations (currently supported types include bounding boxes). ```python # show an image file mmcv.imshow('a.jpg') # show a loaded image img = np.random.rand(100, 100, 3) mmcv.imshow(img) # show image with bounding boxes img = np.random.rand(100, 100, 3) bboxes = np.array([[0, 0, 50, 50], [20, 20, 60, 60]]) mmcv.imshow_bboxes(img, bboxes) ``` `mmcv` can also visualize special images such as optical flows. ```python flow = mmcv.flowread('test.flo') mmcv.flowshow(flow) ``` ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/Makefile ================================================ # Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/_static/css/readthedocs.css ================================================ .header-logo { background-image: url("../image/mmcv-logo.png"); background-size: 85px 40px; height: 40px; width: 85px; } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/api.rst ================================================ fileio ------- .. automodule:: mmcv.fileio :members: image ------ .. automodule:: mmcv.image :members: video ------ .. automodule:: mmcv.video :members: arraymisc --------- .. automodule:: mmcv.arraymisc :members: visualization -------------- .. automodule:: mmcv.visualization :members: utils ----- .. automodule:: mmcv.utils :members: cnn ---- .. automodule:: mmcv.cnn :members: runner ------ .. automodule:: mmcv.runner :members: engine ------ .. automodule:: mmcv.engine :members: ops ------ .. automodule:: mmcv.ops :members: ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/community/contributing.md ================================================ ## 贡献代码 欢迎任何类型的贡献,包括但不限于 - 修改拼写错误或代码错误 - 添加文档或将文档翻译成其他语言 - 添加新功能和新组件 ### 工作流 | 详细工作流见 [拉取请求](pr.md) 1. 复刻并拉取最新的 OpenMMLab 算法库 2. 创建新的分支(不建议使用主分支提拉取请求) 3. 提交你的修改 4. 创建拉取请求 ```{note} 如果你计划添加新功能并且该功能包含比较大的改动,建议先开 issue 讨论 ``` ### 代码风格 #### Python [PEP8](https://www.python.org/dev/peps/pep-0008/) 作为 OpenMMLab 算法库首选的代码规范,我们使用以下工具检查和格式化代码 - [flake8](http://flake8.pycqa.org/en/latest/): Python 官方发布的代码规范检查工具,是多个检查工具的封装 - [yapf](https://github.com/google/yapf): Google 发布的代码规范检查工具 - [isort](https://github.com/timothycrosley/isort): 自动调整模块导入顺序的工具 - [markdownlint](https://github.com/markdownlint/markdownlint): 检查 markdown 文件的工具 - [docformatter](https://github.com/myint/docformatter): 格式化 docstring 的工具 yapf 和 isort 的配置可以在 [setup.cfg](./setup.cfg) 找到 通过配置 [pre-commit hook](https://pre-commit.com/) ,我们可以在提交代码时自动检查和格式化 `flake8`、`yapf`、`isort`、`trailing whitespaces`、`markdown files`, 修复 `end-of-files`、`double-quoted-strings`、`python-encoding-pragma`、`mixed-line-ending`,调整 `requirments.txt` 的包顺序。 pre-commit 钩子的配置可以在 [.pre-commit-config](./.pre-commit-config.yaml) 找到。 在克隆算法库后,你需要安装并初始化 pre-commit 钩子 ```shell pip install -U pre-commit ``` 切换算法库根目录 ```shell pre-commit install ``` 如果安装 markdownlint 遇到了问题,可以尝试使用以下的步骤安装 ruby ```shell # install rvm curl -L https://get.rvm.io | bash -s -- --autolibs=read-fail [[ -s "$HOME/.rvm/scripts/rvm" ]] && source "$HOME/.rvm/scripts/rvm" rvm autolibs disable # install ruby rvm install 2.7.1 ``` 或者参考 [这个代码库](https://github.com/innerlee/setup) 和 [`zzruby.sh`](https://github.com/innerlee/setup/blob/master/zzruby.sh)。 至此,每一次 commit 修改都会触发 pre-commit 检查代码格式。 >提交拉取请求前,请确保你的代码符合 yapf 的格式 #### C++ and CUDA C++ 和 CUDA 的代码规范遵从 [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/community/pr.md ================================================ ## 拉取请求 ### 什么是拉取请求? `拉取请求` (Pull Request), [GitHub 官方文档](https://docs.github.com/en/github/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests)定义如下。 ``` 拉取请求是一种通知机制。你修改了他人的代码,将你的修改通知原来作者,希望他合并你的修改。 ``` ### 基本的工作流: 1. 获取最新的代码库 2. 从主分支创建最新的分支进行开发 3. 提交修改 4. 推送你的修改并创建一个 `拉取请求` 5. 讨论、审核代码 6. 将开发分支合并到主分支 ### 具体步骤 #### 1. 获取最新的代码库 + 当你第一次提 PR 时 复刻 OpenMMLab 原代码库,点击 GitHub 页面右上角的 **Fork** 按钮即可 ![avatar](../../en/_static/community/1.png) 克隆复刻的代码库到本地 ```bash git clone git@github.com:XXX/mmcv.git ``` 添加原代码库为上游代码库 ```bash git remote add upstream git@github.com:open-mmlab/mmcv ``` + 从第二个 PR 起 检出本地代码库的主分支,然后从最新的原代码库的主分支拉取更新 ```bash git checkout master git pull upstream master ``` #### 2. 从主分支创建一个新的开发分支 ```bash git checkout -b branchname ``` ```{tip} 为了保证提交历史清晰可读,我们强烈推荐您先检出主分支 (master),再创建新的分支。 ``` #### 3. 提交你的修改 ```bash # coding git add [files] git commit -m 'messages' ``` #### 4. 推送你的修改到复刻的代码库,并创建一个`拉取请求` + 推送当前分支到远端复刻的代码库 ```bash git push origin branchname ``` + 创建一个`拉取请求` ![avatar](../../en/_static/community/2.png) + 修改`拉取请求`信息模板,描述修改原因和修改内容。还可以在 PR 描述中,手动关联到相关的`议题` (issue),(更多细节,请参考[官方文档](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue))。 #### 5. 讨论并评审你的代码 + 创建`拉取请求`时,可以关联给相关人员进行评审 ![avatar](../../en/_static/community/3.png) + 根据评审人员的意见修改代码,并推送修改 #### 6. `拉取请求`合并之后删除该分支 ```bash git branch -d branchname # delete local branch git push origin --delete branchname # delete remote branch ``` ### PR 规范 1. 使用 [pre-commit hook](https://pre-commit.com),尽量减少代码风格相关问题 2. 一个 PR 对应一个短期分支 3. 粒度要细,一个PR只做一件事情,避免超大的PR + Bad:实现 Faster R-CNN + Acceptable:给 Faster R-CNN 添加一个 box head + Good:给 box head 增加一个参数来支持自定义的 conv 层数 4. 每次 Commit 时需要提供清晰且有意义 commit 信息 5. 提供清晰且有意义的`拉取请求`描述 + 标题写明白任务名称,一般格式:[Prefix] Short description of the pull request (Suffix) + prefix: 新增功能 [Feature], 修 bug [Fix], 文档相关 [Docs], 开发中 [WIP] (暂时不会被review) + 描述里介绍`拉取请求`的主要修改内容,结果,以及对其他部分的影响, 参考`拉取请求`模板 + 关联相关的`议题` (issue) 和其他`拉取请求` ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/compatibility.md ================================================ ### v1.3.18 部分自定义算子对于不同的设备有不同实现,为此添加的大量宏命令与类型检查使得代码变得难以维护。例如: ```c++ if (input.device().is_cuda()) { #ifdef MMCV_WITH_CUDA CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(rois); CHECK_CUDA_INPUT(output); CHECK_CUDA_INPUT(argmax_y); CHECK_CUDA_INPUT(argmax_x); roi_align_forward_cuda(input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); #else AT_ERROR("RoIAlign is not compiled with GPU support"); #endif } else { CHECK_CPU_INPUT(input); CHECK_CPU_INPUT(rois); CHECK_CPU_INPUT(output); CHECK_CPU_INPUT(argmax_y); CHECK_CPU_INPUT(argmax_x); roi_align_forward_cpu(input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); } ``` 为此我们设计了注册与分发的机制以更好的管理这些算子实现。 ```c++ void ROIAlignForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned); void roi_align_forward_cuda(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned) { ROIAlignForwardCUDAKernelLauncher( input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); } // 注册算子的cuda实现 void roi_align_forward_impl(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned); REGISTER_DEVICE_IMPL(roi_align_forward_impl, CUDA, roi_align_forward_cuda); // roi_align.cpp // 使用dispatcher根据参数中的Tensor device类型对实现进行分发 void roi_align_forward_impl(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned) { DISPATCH_DEVICE_IMPL(roi_align_forward_impl, input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); } ``` ### v1.3.11 为了灵活地支持更多的后端和硬件,例如 `NVIDIA GPUs` 、`AMD GPUs`,我们重构了 `mmcv/ops/csrc` 目录。注意,这次重构不会影响 API 的使用。更多相关信息,请参考 [PR1206](https://github.com/open-mmlab/mmcv/pull/1206)。 原始的目录结构如下所示 ``` . ├── common_cuda_helper.hpp ├── ops_cuda_kernel.cuh ├── pytorch_cpp_helper.hpp ├── pytorch_cuda_helper.hpp ├── parrots_cpp_helper.hpp ├── parrots_cuda_helper.hpp ├── parrots_cudawarpfunction.cuh ├── onnxruntime │   ├── onnxruntime_register.h │   ├── onnxruntime_session_options_config_keys.h │   ├── ort_mmcv_utils.h │   ├── ... │   ├── onnx_ops.h │   └── cpu │ ├── onnxruntime_register.cpp │      ├── ... │      └── onnx_ops_impl.cpp ├── parrots │   ├── ... │   ├── ops.cpp │   ├── ops_cuda.cu │   ├── ops_parrots.cpp │   └── ops_pytorch.h ├── pytorch │   ├── ... │   ├── ops.cpp │   ├── ops_cuda.cu │   ├── pybind.cpp └── tensorrt ├── trt_cuda_helper.cuh ├── trt_plugin_helper.hpp ├── trt_plugin.hpp ├── trt_serialize.hpp ├── ... ├── trt_ops.hpp └── plugins    ├── trt_cuda_helper.cu    ├── trt_plugin.cpp    ├── ...    ├── trt_ops.cpp    └── trt_ops_kernel.cu ``` 重构之后,它的结构如下所示 ``` . ├── common │ ├── box_iou_rotated_utils.hpp │ ├── parrots_cpp_helper.hpp │ ├── parrots_cuda_helper.hpp │ ├── pytorch_cpp_helper.hpp │ ├── pytorch_cuda_helper.hpp │   └── cuda │   ├── common_cuda_helper.hpp │   ├── parrots_cudawarpfunction.cuh │   ├── ... │   └── ops_cuda_kernel.cuh ├── onnxruntime │   ├── onnxruntime_register.h │   ├── onnxruntime_session_options_config_keys.h │   ├── ort_mmcv_utils.h │   ├── ... │   ├── onnx_ops.h │   └── cpu │ ├── onnxruntime_register.cpp │      ├── ... │      └── onnx_ops_impl.cpp ├── parrots │   ├── ... │   ├── ops.cpp │   ├── ops_parrots.cpp │   └── ops_pytorch.h ├── pytorch │   ├── info.cpp │   ├── pybind.cpp │   ├── ... │   ├── ops.cpp │   └── cuda │      ├── ... │      └── ops_cuda.cu └── tensorrt ├── trt_cuda_helper.cuh ├── trt_plugin_helper.hpp ├── trt_plugin.hpp ├── trt_serialize.hpp ├── ... ├── trt_ops.hpp └── plugins    ├── trt_cuda_helper.cu    ├── trt_plugin.cpp    ├── ...    ├── trt_ops.cpp    └── trt_ops_kernel.cu ``` ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/conf.py ================================================ # # Configuration file for the Sphinx documentation builder. # # This file does only contain a selection of the most common options. For a # full list see the documentation: # http://www.sphinx-doc.org/en/master/config # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os import sys import pytorch_sphinx_theme from sphinx.builders.html import StandaloneHTMLBuilder sys.path.insert(0, os.path.abspath('../..')) version_file = '../../mmcv/version.py' with open(version_file, 'r') as f: exec(compile(f.read(), version_file, 'exec')) __version__ = locals()['__version__'] # -- Project information ----------------------------------------------------- project = 'mmcv' copyright = '2018-2021, OpenMMLab' author = 'MMCV Authors' # The short X.Y version version = __version__ # The full version, including alpha/beta/rc tags release = __version__ # -- General configuration --------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.napoleon', 'sphinx.ext.viewcode', 'sphinx.ext.autosectionlabel', 'sphinx_markdown_tables', 'myst_parser', 'sphinx_copybutton', ] # yapf: disable autodoc_mock_imports = ['mmcv._ext', 'mmcv.utils.ext_loader', 'torchvision'] autosectionlabel_prefix_document = True # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # source_suffix = { '.rst': 'restructuredtext', '.md': 'markdown', } # The master toctree document. master_doc = 'index' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = 'zh_CN' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # # html_theme = 'sphinx_rtd_theme' html_theme = 'pytorch_sphinx_theme' html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()] # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = { 'menu': [ { 'name': 'GitHub', 'url': 'https://github.com/open-mmlab/mmcv' }, ], # Specify the language of shared menu 'menu_lang': 'cn', } # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] html_css_files = ['css/readthedocs.css'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. # # The default sidebars (for documents that don't match any pattern) are # defined by theme itself. Builtin themes are using these templates by # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', # 'searchbox.html']``. # # html_sidebars = {} # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. htmlhelp_basename = 'mmcvdoc' # -- Options for LaTeX output ------------------------------------------------ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'mmcv.tex', 'mmcv Documentation', 'MMCV Contributors', 'manual'), ] # -- Options for manual page output ------------------------------------------ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [(master_doc, 'mmcv', 'mmcv Documentation', [author], 1)] # -- Options for Texinfo output ---------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'mmcv', 'mmcv Documentation', author, 'mmcv', 'One line description of project.', 'Miscellaneous'), ] # -- Options for Epub output ------------------------------------------------- # Bibliographic Dublin Core info. epub_title = project # The unique identifier of the text. This can be a ISBN number # or the project homepage. # # epub_identifier = '' # A unique identification for the text. # # epub_uid = '' # A list of files that should not be packed into the epub file. epub_exclude_files = ['search.html'] # set priority when building html StandaloneHTMLBuilder.supported_image_types = [ 'image/svg+xml', 'image/gif', 'image/png', 'image/jpeg' ] # -- Extension configuration ------------------------------------------------- # Ignore >>> when copying code copybutton_prompt_text = r'>>> |\.\.\. ' copybutton_prompt_is_regexp = True ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/deployment/onnx.md ================================================ ## MMCV中ONNX模块简介 (实验性) ### register_extra_symbolics 在将PyTorch模型导出成ONNX时,需要注册额外的符号函数 #### 范例 ```python import mmcv from mmcv.onnx import register_extra_symbolics opset_version = 11 register_extra_symbolics(opset_version) ``` #### 常见问题 - 无 ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/deployment/onnxruntime_custom_ops.md ================================================ ## ONNX Runtime自定义算子 - [ONNX Runtime自定义算子](#onnx-runtime自定义算子) - [SoftNMS](#softnms) - [描述](#描述) - [模型参数](#模型参数) - [输入](#输入) - [输出](#输出) - [类型约束](#类型约束) - [RoIAlign](#roialign) - [描述](#描述-1) - [模型参数](#模型参数-1) - [输入](#输入-1) - [输出](#输出-1) - [类型约束](#类型约束-1) - [NMS](#nms) - [描述](#描述-2) - [模型参数](#模型参数-2) - [输入](#输入-2) - [输出](#输出-2) - [类型约束](#类型约束-2) - [grid_sampler](#grid_sampler) - [描述](#描述-3) - [模型参数](#模型参数-3) - [输入](#输入-3) - [输出](#输出-3) - [类型约束](#类型约束-3) - [CornerPool](#cornerpool) - [描述](#描述-4) - [模型参数](#模型参数-4) - [输入](#输入-4) - [输出](#输出-4) - [类型约束](#类型约束-4) - [cummax](#cummax) - [描述](#描述-5) - [模型参数](#模型参数-5) - [输入](#输入-5) - [输出](#输出-5) - [类型约束](#类型约束-5) - [cummin](#cummin) - [描述](#描述-6) - [模型参数](#模型参数-6) - [输入](#输入-6) - [输出](#输出-6) - [类型约束](#类型约束-6) - [MMCVModulatedDeformConv2d](#mmcvmodulateddeformconv2d) - [描述](#描述-7) - [模型参数](#模型参数-7) - [输入](#输入-7) - [输出](#输出-7) - [类型约束](#类型约束-7) ### SoftNMS #### 描述 根据`scores`计算`boxes`的soft NMS。 请阅读[Soft-NMS -- Improving Object Detection With One Line of Code](https://arxiv.org/abs/1704.04503)了解细节。 #### 模型参数 | 类型 | 参数名 | 描述 | | ------- | --------------- | ------------------------------------------------------- | | `float` | `iou_threshold` | 用来判断候选框重合度的阈值,取值范围[0, 1]。默认值为0 | | `float` | `sigma` | 高斯方法的超参数 | | `float` | `min_score` | NMS的score阈值 | | `int` | `method` | NMS的计算方式, (0: `naive`, 1: `linear`, 2: `gaussian`) | | `int` | `offset` | 用来计算候选框的宽高(x2 - x1 + offset)。可选值0或1 | #### 输入
boxes: T
输入候选框。形状为(N, 4)的二维张量,N为候选框数量。
scores: T
输入得分。形状为(N, )的一维张量。
#### 输出
dets: T
输出的检测框与得分。形状为(num_valid_boxes, 5)的二维张量,内容为[[x1, y1, x2, y2, score], ...]。num_valid_boxes是合法的检测框数量。
indices: tensor(int64)
输出序号。形状为(num_valid_boxes, )的一维张量。
#### 类型约束 - T:tensor(float32) ### RoIAlign #### 描述 在特征图上计算RoIAlign,通常在双阶段目标检测模型的bbox_head中使用 #### 模型参数 | 类型 | 参数名 | 描述 | | ------- | ---------------- | ------------------------------------------------------- | | `int` | `output_height` | roi特征的输出高度 | | `int` | `output_width` | roi特征的输出宽度 | | `float` | `spatial_scale` | 输入检测框的缩放系数 | | `int` | `sampling_ratio` | 输出的采样率。`0`表示使用密集采样 | | `str` | `mode` | 池化方式。 `avg`或`max` | | `int` | `aligned` | 如果`aligned=1`,则像素会进行-0.5的偏移以达到更好的对齐 | #### 输入
input: T
输入特征图;形状为(N, C, H, W)的四维张量,其中N为batch大小,C为输入通道数,H和W为输入特征图的高和宽。
rois: T
需要进行池化的感兴趣区域;形状为(num_rois, 5)的二维张量,内容为[[batch_index, x1, y1, x2, y2], ...]。rois的坐标为输入特征图的坐标系。
#### 输出
feat: T
池化的输出;形状为(num_rois, C, output_height, output_width)的四维张量。每个输出特征feat[i]都与输入感兴趣区域rois[i]一一对应。
#### 类型约束 - T:tensor(float32) ### NMS #### 描述 根据IoU阈值对候选框进行非极大值抑制。 #### 模型参数 | 类型 | 参数名 | 描述 | | ------- | --------------- | ----------------------------------------------------- | | `float` | `iou_threshold` | 用来判断候选框重合度的阈值,取值范围[0, 1]。默认值为0 | | `int` | `offset` | 用来计算候选框的宽高(x2 - x1 + offset)。可选值0或1 | #### 输入
boxes: T
输入候选框。形状为(N, 4)的二维张量,N为候选框数量。
scores: T
输入得分。形状为(N, )的一维张量。
#### 输出
indices: tensor(int32, Linear)
被选中的候选框索引。形状为(num_valid_boxes, )的一维张量,num_valid_boxes表示被选上的候选框数量。
#### 类型约束 - T:tensor(float32) ### grid_sampler #### 描述 根据`grid`的像素位置对`input`进行网格采样。 #### 模型参数 | 类型 | 参数名 | 描述 | | ----- | -------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- | | `int` | `interpolation_mode` | 计算输出使用的插值模式。(0: `bilinear` , 1: `nearest`) | | `int` | `padding_mode` | 边缘填充模式。(0: `zeros`, 1: `border`, 2: `reflection`) | | `int` | `align_corners` | 如果`align_corners=1`,则极值(`-1`和`1`)会被当做输入边缘像素的中心点。如果`align_corners=0`,则它们会被看做是边缘像素的边缘点,减小分辨率对采样的影响 | #### 输入
input: T
输入特征;形状为(N, C, inH, inW)的四维张量,其中N为batch大小,C为输入通道数,inH和inW为输入特征图的高和宽。
grid: T
输入网格;形状为(N, outH, outW, 2)的四维张量,outH和outW为输出的高和宽。
#### 输出
output: T
输出特征;形状为(N, C, outH, outW)的四维张量。
#### 类型约束 - T:tensor(float32, Linear) ### CornerPool #### 描述 对`input`计算CornerPool。请阅读[CornerNet -- Detecting Objects as Paired Keypoints](https://arxiv.org/abs/1808.01244)了解更多细节。 #### 模型参数 | 类型 | 参数名 | 描述 | | ----- | ------ | -------------------------------------------------------- | | `int` | `mode` | 池化模式。(0: `top`, 1: `bottom`, 2: `left`, 3: `right`) | #### 输入
input: T
输入特征;形状为(N, C, H, W)的四维张量,其中N为batch大小,C为输入通道数,H和W为输入特征图的高和宽。
#### 输出
output: T
输出特征;形状为(N, C, H, W)的四维张量。
#### 类型约束 - T:tensor(float32) ### cummax #### 描述 返回一个元组(`values`, `indices`),其中`values`为`input`第`dim`维的累计最大值,`indices`为第`dim`维最大值位置。请阅读[torch.cummax](https://pytorch.org/docs/stable/generated/torch.cummax.html)了解更多细节。 #### 模型参数 | 类型 | 参数名 | 描述 | | ----- | ------ | ------------------ | | `int` | `dim` | 进行累计计算的维度 | #### 输入
input: T
输入张量;可以使任意形状;也支持空Tensor
#### 输出
output: T
`input`第`dim`维的累计最大值,形状与`input`相同。类型和`input`一致
indices: tensor(int64)
第`dim`维最大值位置,形状与`input`相同。
#### 类型约束 - T:tensor(float32) ### cummin #### 描述 返回一个元组(`values`, `indices`),其中`values`为`input`第`dim`维的累计最小值,`indices`为第`dim`维最小值位置。请阅读[torch.cummin](https://pytorch.org/docs/stable/generated/torch.cummin.html)了解更多细节。 #### 模型参数 | 类型 | 参数名 | 描述 | | ----- | ------ | ------------------ | | `int` | `dim` | 进行累计计算的维度 | #### 输入
input: T
输入张量;可以是任意形状;也支持空Tensor
#### 输出
output: T
`input`第`dim`维的累计最小值,形状与`input`相同。类型和`input`一致
indices: tensor(int64)
第`dim`维最小值位置,形状与`input`相同。
#### 类型约束 - T:tensor(float32) ### MMCVModulatedDeformConv2d #### 描述 在输入特征上计算Modulated Deformable Convolution,请阅读[Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/abs/1811.11168?from=timeline)了解更多细节。 #### 模型参数 | 类型 | 参数名 | 描述 | | -------------- | ------------------- | ------------------------------------------------------------- | | `list of ints` | `stride` | 卷积的步长 (sH, sW) | | `list of ints` | `padding` | 输入特征填充大小 (padH, padW) | | `list of ints` | `dilation` | 卷积核各元素间隔 (dH, dW) | | `int` | `deformable_groups` | 可变偏移量的分组,通常置位1即可 | | `int` | `groups` | 卷积分组数,`input_channel`会根据这个值被分为数个分组进行计算 | #### 输入
inputs[0]: T
输入特征;形状为(N, C, inH, inW)的四维张量,其中N为batch大小,C为输入通道数,inH和inW为输入特征图的高和宽。
inputs[1]: T
输入偏移量;形状为(N, deformable_group* 2* kH* kW, outH, outW)的四维张量,kH和kW为输入特征图的高和宽,outH和outW为输入特征图的高和宽。
inputs[2]: T
输入掩码;形状为(N, deformable_group* kH* kW, outH, outW)的四维张量。
inputs[3]: T
输入权重;形状为(output_channel, input_channel, kH, kW)的四维张量。
inputs[4]: T, optional
输入偏移量;形状为(output_channel)的一维张量。
#### 输出
outputs[0]: T
输出特征;形状为(N, output_channel, outH, outW)的四维张量。
#### 类型约束 - T:tensor(float32, Linear) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/deployment/onnxruntime_op.md ================================================ ## MMCV中的ONNX Runtime自定义算子 ### ONNX Runtime介绍 **ONNX Runtime**是一个跨平台的推理与训练加速器,适配许多常用的机器学习/深度神经网络框架。请访问[github](https://github.com/microsoft/onnxruntime)了解更多信息。 ### ONNX介绍 **ONNX**是**Open Neural Network Exchange**的缩写,是许多机器学习/深度神经网络框架使用的*中间表示(IR)*。请访问[github](https://github.com/onnx/onnx)了解更多信息。 ### 为什么要在MMCV中添加ONNX自定义算子? - 为了验证ONNX模型在ONNX Runtime下的推理的正确性。 - 为了方便使用了`mmcv.ops`自定义算子的模型的部署工作。 ### MMCV已支持的算子 | 算子 | CPU | GPU | MMCV版本 | | :------------------------------------------------------------------------------: | :---: | :---: | :------: | | [SoftNMS](onnxruntime_custom_ops.md#softnms) | Y | N | 1.2.3 | | [RoIAlign](onnxruntime_custom_ops.md#roialign) | Y | N | 1.2.5 | | [NMS](onnxruntime_custom_ops.md#nms) | Y | N | 1.2.7 | | [grid_sampler](onnxruntime_custom_ops.md#grid_sampler) | Y | N | 1.3.1 | | [CornerPool](onnxruntime_custom_ops.md#cornerpool) | Y | N | 1.3.4 | | [cummax](onnxruntime_custom_ops.md#cummax) | Y | N | 1.3.4 | | [cummin](onnxruntime_custom_ops.md#cummin) | Y | N | 1.3.4 | | [MMCVModulatedDeformConv2d](onnxruntime_custom_ops.md#mmcvmodulateddeformconv2d) | Y | N | 1.3.12 | ### 如何编译ONNX Runtime自定义算子? *请注意我们仅在**onnxruntime>=1.8.1**的Linux x86-64 cpu平台上进行过测试* #### 准备工作 - 克隆代码仓库 ```bash git clone https://github.com/open-mmlab/mmcv.git ``` - 从ONNX Runtime下载`onnxruntime-linux`:[releases](https://github.com/microsoft/onnxruntime/releases/tag/v1.8.1),解压缩,根据路径创建变量`ONNXRUNTIME_DIR`并把路径下的lib目录添加到`LD_LIBRARY_PATH`,步骤如下: ```bash wget https://github.com/microsoft/onnxruntime/releases/download/v1.8.1/onnxruntime-linux-x64-1.8.1.tgz tar -zxvf onnxruntime-linux-x64-1.8.1.tgz cd onnxruntime-linux-x64-1.8.1 export ONNXRUNTIME_DIR=$(pwd) export LD_LIBRARY_PATH=$ONNXRUNTIME_DIR/lib:$LD_LIBRARY_PATH ``` #### Linux系统下编译 ```bash cd mmcv ## to MMCV root directory MMCV_WITH_OPS=1 MMCV_WITH_ORT=1 python setup.py develop ``` ### 如何在python下使用ONNX Runtime对导出的ONNX模型做编译 使用`pip`安装ONNX Runtime ```bash pip install onnxruntime==1.8.1 ``` 推理范例 ```python import os import numpy as np import onnxruntime as ort from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() assert os.path.exists(ort_custom_op_path) session_options = ort.SessionOptions() session_options.register_custom_ops_library(ort_custom_op_path) ## exported ONNX model with custom operators onnx_file = 'sample.onnx' input_data = np.random.randn(1, 3, 224, 224).astype(np.float32) sess = ort.InferenceSession(onnx_file, session_options) onnx_results = sess.run(None, {'input' : input_data}) ``` ### 如何为MMCV添加ONNX Runtime的自定义算子 #### 开发前提醒 - 该算子的ONNX Runtime实现尚未在MMCV中支持[已实现算子列表](https://github.com/microsoft/onnxruntime/blob/master/docs/OperatorKernels.md)。 - 确保该自定义算子可以被ONNX导出。 #### 添加方法 以`soft_nms`为例: 1. 在ONNX Runtime头文件目录`mmcv/ops/csrc/onnxruntime/`下添加头文件`soft_nms.h` 2. 在ONNX Runtime源码目录`mmcv/ops/csrc/onnxruntime/cpu/`下添加算子实现`soft_nms.cpp` 3. 在[onnxruntime_register.cpp](../../mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp)中注册实现的算子`soft_nms` ```c++ #include "soft_nms.h" SoftNmsOp c_SoftNmsOp; if (auto status = ortApi->CustomOpDomain_Add(domain, &c_SoftNmsOp)) { return status; } ``` 4. 在`tests/test_ops/test_onnx.py`添加单元测试, 可以参考[here](../../tests/test_ops/test_onnx.py)。 **最后,欢迎为MMCV添加ONNX Runtime自定义算子** :nerd_face: ### 已知问题 - "RuntimeError: tuple appears in op that does not forward tuples, unsupported kind: `prim::PythonOp`." 1. 请注意`cummax`和`cummin`算子是在torch >= 1.5.0被添加的。但他们需要在torch version >= 1.7.0才能正确导出。否则会在导出时发生上面的错误。 2. 解决方法:升级PyTorch到1.7.0以上版本 ### 引用 - [How to export Pytorch model with custom op to ONNX and run it in ONNX Runtime](https://github.com/onnx/tutorials/blob/master/PyTorchCustomOperator/README.md) - [How to add a custom operator/kernel in ONNX Runtime](https://github.com/microsoft/onnxruntime/blob/master/docs/AddingCustomOp.md) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/deployment/tensorrt_custom_ops.md ================================================ ## TensorRT自定义算子 - [TensorRT自定义算子](#tensorrt自定义算子) - [MMCVRoIAlign](#mmcvroialign) - [描述](#描述) - [模型参数](#模型参数) - [输入](#输入) - [输出](#输出) - [类型约束](#类型约束) - [ScatterND](#scatternd) - [描述](#描述-1) - [模型参数](#模型参数-1) - [输入](#输入-1) - [输出](#输出-1) - [类型约束](#类型约束-1) - [NonMaxSuppression](#nonmaxsuppression) - [描述](#描述-2) - [模型参数](#模型参数-2) - [输入](#输入-2) - [输出](#输出-2) - [类型约束](#类型约束-2) - [MMCVDeformConv2d](#mmcvdeformconv2d) - [描述](#描述-3) - [模型参数](#模型参数-3) - [输入](#输入-3) - [输出](#输出-3) - [类型约束](#类型约束-3) - [grid_sampler](#grid_sampler) - [描述](#描述-4) - [模型参数](#模型参数-4) - [输入](#输入-4) - [输出](#输出-4) - [类型约束](#类型约束-4) - [cummax](#cummax) - [描述](#描述-5) - [模型参数](#模型参数-5) - [输入](#输入-5) - [输出](#输出-5) - [类型约束](#类型约束-5) - [cummin](#cummin) - [描述](#描述-6) - [模型参数](#模型参数-6) - [输入](#输入-6) - [输出](#输出-6) - [类型约束](#类型约束-6) - [MMCVInstanceNormalization](#mmcvinstancenormalization) - [描述](#描述-7) - [模型参数](#模型参数-7) - [输入](#输入-7) - [输出](#输出-7) - [类型约束](#类型约束-7) - [MMCVModulatedDeformConv2d](#mmcvmodulateddeformconv2d) - [描述](#描述-8) - [模型参数](#模型参数-8) - [输入](#输入-8) - [输出](#输出-8) - [类型约束](#类型约束-8) ### MMCVRoIAlign #### 描述 在特征图上计算RoIAlign,在多数双阶段目标检测模型的bbox_head中使用 #### 模型参数 | 类型 | 参数名 | 描述 | | ------- | ---------------- | ------------------------------------------------------- | | `int` | `output_height` | roi特征的输出高度 | | `int` | `output_width` | roi特征的输出宽度 | | `float` | `spatial_scale` | 输入检测框的缩放系数 | | `int` | `sampling_ratio` | 输出的采样率。`0`表示使用密集采样 | | `str` | `mode` | 池化方式。 `avg`或`max` | | `int` | `aligned` | 如果`aligned=1`,则像素会进行-0.5的偏移以达到更好的对齐 | #### 输入
inputs[0]: T
输入特征图;形状为(N, C, H, W)的四维张量,其中N为batch大小,C为输入通道数,H和W为输入特征图的高和宽。
inputs[1]: T
需要进行池化的感兴趣区域;形状为(num_rois, 5)的二维张量,内容为[[batch_index, x1, y1, x2, y2], ...]。rois的坐标为输入特征图的坐标系。
#### 输出
outputs[0]: T
池化的输出;形状为(num_rois, C, output_height, output_width)的四维张量。每个输出特征feat[i]都与输入感兴趣区域rois[i]一一对应。
#### 类型约束 - T:tensor(float32, Linear) ### ScatterND #### 描述 ScatterND接收三个输入,分别为秩为r >= 1的`data`,秩为q >= 1的`indices`以及秩为 q + r - indices.shape[-1] -1 的`update`。输出的计算方式为:首先创建一个`data`的拷贝,然后根据`indces`的值使用`update`对拷贝的`data`进行更新。注意`indices`中不应该存在相同的条目,也就是说对同一个位置进行一次以上的更新是不允许的。 输出的计算方式可以参考如下代码: ```python output = np.copy(data) update_indices = indices.shape[:-1] for idx in np.ndindex(update_indices): output[indices[idx]] = updates[idx] ``` #### 模型参数 无 #### 输入
inputs[0]: T
秩为r >= 1的输入`data`
inputs[1]: tensor(int32, Linear)
秩为q >= 1的输入`update`
inputs[2]: T
秩为 q + r - indices.shape[-1] -1 的输入`update`
#### 输出
outputs[0]: T
秩为r >= 1的输出张量
#### 类型约束 - T:tensor(float32, Linear), tensor(int32, Linear) ### NonMaxSuppression #### 描述 根据IoU阈值对候选框进行非极大值抑制。 #### 模型参数 | 类型 | 参数名 | 描述 | | ------- | ---------------------------- | ---------------------------------------------------------------------------------------- | | `int` | `center_point_box` | 0 - 候选框的格式为[y1, x1, y2, x2], 1-候选框的格式为[x_center, y_center, width, height] | | `int` | `max_output_boxes_per_class` | 每一类最大的输出检测框个数。默认为0,输出检测框个数等于输入候选框数 | | `float` | `iou_threshold` | 用来判断候选框重合度的阈值,取值范围[0, 1]。默认值为0 | | `float` | `score_threshold` | 用来判断候选框是否合法的阈值 | | `int` | `offset` | 检测框长宽计算方式为(x2 - x1 + offset),可选值0或1 | #### 输入
inputs[0]: T
输入候选框。形状为(num_batches, spatial_dimension, 4)的三维张量
inputs[1]: T
输入得分。形状为(num_batches, num_classes, spatial_dimension)的三维张量
#### 输出
outputs[0]: tensor(int32, Linear)
被选中的候选框索引。形状为(num_selected_indices, 3)的二维张量。每一行内容为[batch_index, class_index, box_index]。
其中 num_selected_indices=num_batches* num_classes* min(max_output_boxes_per_class, spatial_dimension)。
所有未被选中的候选框索引都会被填充为-1
#### 类型约束 - T:tensor(float32, Linear) ### MMCVDeformConv2d #### 描述 在输入特征上计算Deformable Convolution,请阅读[Deformable Convolutional Network](https://arxiv.org/abs/1703.06211)了解更多细节。 #### 模型参数 | 类型 | 参数名 | 描述 | | -------------- | ------------------ | --------------------------------------------------------------------------------------------- | | `list of ints` | `stride` | 卷积的步长 (sH, sW) | | `list of ints` | `padding` | 输入特征填充大小 (padH, padW) | | `list of ints` | `dilation` | 卷积核各元素间隔 (dH, dW) | | `int` | `deformable_group` | 可变偏移量的分组 | | `int` | `group` | 卷积分组数,`input_channel`会根据这个值被分为数个分组进行计算 | | `int` | `im2col_step` | 可变卷积使用im2col计算卷积。输入与偏移量会以im2col_step为步长分块计算,减少临时空间的使用量。 | #### 输入
inputs[0]: T
输入特征;形状为(N, C, inH, inW)的四维张量,其中N为batch大小,C为输入通道数,inH和inW为输入特征图的高和宽
inputs[1]: T
输入偏移量;形状为(N, deformable_group* 2* kH* kW, outH, outW)的四维张量,kH和kW为输入特征图的高和宽,outH和outW为输入特征图的高和宽
inputs[2]: T
输入权重;形状为(output_channel, input_channel, kH, kW)的四维张量
#### 输出
outputs[0]: T
输出特征;形状为(N, output_channel, outH, outW)的四维张量
#### 类型约束 - T:tensor(float32, Linear) ### grid_sampler #### 描述 根据`grid`的像素位置对`input`进行网格采样。 #### 模型参数 | 类型 | 参数名 | 描述 | | ----- | -------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- | | `int` | `interpolation_mode` | 计算输出使用的插值模式。(0: `bilinear` , 1: `nearest`) | | `int` | `padding_mode` | 边缘填充模式。(0: `zeros`, 1: `border`, 2: `reflection`) | | `int` | `align_corners` | 如果`align_corners=1`,则极值(`-1`和`1`)会被当做输入边缘像素的中心点。如果`align_corners=0`,则它们会被看做是边缘像素的边缘点,减小分辨率对采样的影响 | #### 输入
inputs[0]: T
输入特征;形状为(N, C, inH, inW)的四维张量,其中N为batch大小,C为输入通道数,inH和inW为输入特征图的高和宽
inputs[1]: T
输入网格;形状为(N, outH, outW, 2)的四维张量,outH和outW为输出的高和宽
#### 输出
outputs[0]: T
输出特征;形状为(N, C, outH, outW)的四维张量
#### 类型约束 - T:tensor(float32, Linear) ### cummax #### 描述 返回一个元组(`values`, `indices`),其中`values`为`input`第`dim`维的累计最大值,`indices`为第`dim`维最大值位置。请阅读[torch.cummax](https://pytorch.org/docs/stable/generated/torch.cummax.html)了解更多细节。 #### 模型参数 | 类型 | 参数名 | 描述 | | ----- | ------ | ------------------ | | `int` | `dim` | 进行累计计算的维度 | #### 输入
inputs[0]: T
输入张量;可以使任意形状
#### 输出
outputs[0]: T
`input`第`dim`维的累计最大值,形状与`input`相同。类型和`input`一致
outputs[1]: (int32, Linear)
第`dim`维最大值位置,形状与`input`相同
#### 类型约束 - T:tensor(float32, Linear) ### cummin #### 描述 返回一个元组(`values`, `indices`),其中`values`为`input`第`dim`维的累计最小值,`indices`为第`dim`维最小值位置。请阅读[torch.cummin](https://pytorch.org/docs/stable/generated/torch.cummin.html)了解更多细节。 #### 模型参数 | 类型 | 参数名 | 描述 | | ----- | ------ | ------------------ | | `int` | `dim` | 进行累计计算的维度 | #### 输入
inputs[0]: T
输入张量;可以使任意形状
#### 输出
outputs[0]: T
`input`第`dim`维的累计最小值,形状与`input`相同。类型和`input`一致
outputs[1]: (int32, Linear)
第`dim`维最小值位置,形状与`input`相同
#### 类型约束 - T:tensor(float32, Linear) ### MMCVInstanceNormalization #### 描述 对特征计算instance normalization,请阅读[Instance Normalization: The Missing Ingredient for Fast Stylization](https://arxiv.org/abs/1607.08022)了解更多详细信息。 #### 模型参数 | 类型 | 参数名 | 描述 | | ------- | --------- | ---------------------------- | | `float` | `epsilon` | 用来避免除0错误。默认为1e-05 | #### 输入
inputs[0]: T
输入特征。形状为(N, C, H, W)的四维张量,其中N为batch大小,C为输入通道数,H和W为输入特征图的高和宽
inputs[1]: T
输入缩放系数。形状为(C,)的一维张量
inputs[2]: T
输入偏移量。形状为(C,)的一维张量
#### 输出
outputs[0]: T
输出特征。形状为(N, C, H, W)的四维张量
#### 类型约束 - T:tensor(float32, Linear) ### MMCVModulatedDeformConv2d #### 描述 在输入特征上计算Modulated Deformable Convolution,请阅读[Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/abs/1811.11168?from=timeline)了解更多细节。 #### 模型参数 | 类型 | 参数名 | 描述 | | -------------- | ------------------- | ------------------------------------------------------------- | | `list of ints` | `stride` | 卷积的步长 (sH, sW) | | `list of ints` | `padding` | 输入特征填充大小 (padH, padW) | | `list of ints` | `dilation` | 卷积核各元素间隔 (dH, dW) | | `int` | `deformable_groups` | 可变偏移量的分组,通常置位1即可 | | `int` | `groups` | 卷积分组数,`input_channel`会根据这个值被分为数个分组进行计算 | #### 输入
inputs[0]: T
输入特征;形状为(N, C, inH, inW)的四维张量,其中N为batch大小,C为输入通道数,inH和inW为输入特征图的高和宽
inputs[1]: T
输入偏移量;形状为(N, deformable_group* 2* kH* kW, outH, outW)的四维张量,kH和kW为输入特征图的高和宽,outH和outW为输入特征图的高和宽
inputs[2]: T
输入掩码;形状为(N, deformable_group* kH* kW, outH, outW)的四维张量
inputs[3]: T
输入权重;形状为(output_channel, input_channel, kH, kW)的四维张量
inputs[4]: T, optional
输入偏移量;形状为(output_channel)的一维张量
#### 输出
outputs[0]: T
输出特征;形状为(N, output_channel, outH, outW)的四维张量
#### 类型约束 - T:tensor(float32, Linear) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/deployment/tensorrt_plugin.md ================================================ ## MMCV中的TensorRT自定义算子 (实验性) - [MMCV中的TensorRT自定义算子 (实验性)](#mmcv中的tensorrt自定义算子-实验性) - [介绍](#介绍) - [MMCV中的TensorRT插件列表](#mmcv中的tensorrt插件列表) - [如何编译MMCV中的TensorRT插件](#如何编译mmcv中的tensorrt插件) - [准备](#准备) - [在Linux上编译](#在linux上编译) - [创建TensorRT推理引擎并在python下进行推理](#创建tensorrt推理引擎并在python下进行推理) - [如何在MMCV中添加新的TensorRT自定义算子](#如何在mmcv中添加新的tensorrt自定义算子) - [主要流程](#主要流程) - [注意](#注意) - [已知问题](#已知问题) - [引用](#引用) ### 介绍 **NVIDIA TensorRT**是一个为深度学习模型高性能推理准备的软件开发工具(SDK)。它包括深度学习推理优化器和运行时,可为深度学习推理应用提供低延迟和高吞吐量。请访问[developer's website](https://developer.nvidia.com/tensorrt)了解更多信息。 为了简化TensorRT部署带有MMCV自定义算子的模型的流程,MMCV中添加了一系列TensorRT插件。 ### MMCV中的TensorRT插件列表 | ONNX算子 | TensorRT插件 | MMCV版本 | | :-----------------------: | :-----------------------------------------------------------------------------: | :------: | | MMCVRoiAlign | [MMCVRoiAlign](./tensorrt_custom_ops.md#mmcvroialign) | 1.2.6 | | ScatterND | [ScatterND](./tensorrt_custom_ops.md#scatternd) | 1.2.6 | | NonMaxSuppression | [NonMaxSuppression](./tensorrt_custom_ops.md#nonmaxsuppression) | 1.3.0 | | MMCVDeformConv2d | [MMCVDeformConv2d](./tensorrt_custom_ops.md#mmcvdeformconv2d) | 1.3.0 | | grid_sampler | [grid_sampler](./tensorrt_custom_ops.md#grid-sampler) | 1.3.1 | | cummax | [cummax](./tensorrt_custom_ops.md#cummax) | 1.3.5 | | cummin | [cummin](./tensorrt_custom_ops.md#cummin) | 1.3.5 | | MMCVInstanceNormalization | [MMCVInstanceNormalization](./tensorrt_custom_ops.md#mmcvinstancenormalization) | 1.3.5 | | MMCVModulatedDeformConv2d | [MMCVModulatedDeformConv2d](./tensorrt_custom_ops.md#mmcvmodulateddeformconv2d) | master | 注意 - 以上所有算子均在 TensorRT-7.2.1.6.Ubuntu-16.04.x86_64-gnu.cuda-10.2.cudnn8.0 环境下开发。 ### 如何编译MMCV中的TensorRT插件 #### 准备 - 克隆代码仓库 ```bash git clone https://github.com/open-mmlab/mmcv.git ``` - 安装TensorRT 从 [NVIDIA Developer Zone](https://developer.nvidia.com/nvidia-tensorrt-download) 下载合适的TensorRT版本。 比如,对安装了cuda-10.2的x86-64的Ubuntu 16.04,下载文件为`TensorRT-7.2.1.6.Ubuntu-16.04.x86_64-gnu.cuda-10.2.cudnn8.0.tar.gz`. 然后使用下面方式安装并配置环境 ```bash cd ~/Downloads tar -xvzf TensorRT-7.2.1.6.Ubuntu-16.04.x86_64-gnu.cuda-10.2.cudnn8.0.tar.gz export TENSORRT_DIR=`pwd`/TensorRT-7.2.1.6 export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$TENSORRT_DIR/lib ``` 安装python依赖: tensorrt, graphsurgeon, onnx-graphsurgeon ```bash pip install $TENSORRT_DIR/python/tensorrt-7.2.1.6-cp37-none-linux_x86_64.whl pip install $TENSORRT_DIR/onnx_graphsurgeon/onnx_graphsurgeon-0.2.6-py2.py3-none-any.whl pip install $TENSORRT_DIR/graphsurgeon/graphsurgeon-0.4.5-py2.py3-none-any.whl ``` 想了解更多通过tar包安装TensorRT,请访问[Nvidia' website](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-721/install-guide/index.html#installing-tar). #### 在Linux上编译 ```bash cd mmcv ## to MMCV root directory MMCV_WITH_OPS=1 MMCV_WITH_TRT=1 pip install -e . ``` ### 创建TensorRT推理引擎并在python下进行推理 范例如下: ```python import torch import onnx from mmcv.tensorrt import (TRTWrapper, onnx2trt, save_trt_engine, is_tensorrt_plugin_loaded) assert is_tensorrt_plugin_loaded(), 'Requires to complie TensorRT plugins in mmcv' onnx_file = 'sample.onnx' trt_file = 'sample.trt' onnx_model = onnx.load(onnx_file) ## Model input inputs = torch.rand(1, 3, 224, 224).cuda() ## Model input shape info opt_shape_dict = { 'input': [list(inputs.shape), list(inputs.shape), list(inputs.shape)] } ## Create TensorRT engine max_workspace_size = 1 << 30 trt_engine = onnx2trt( onnx_model, opt_shape_dict, max_workspace_size=max_workspace_size) ## Save TensorRT engine save_trt_engine(trt_engine, trt_file) ## Run inference with TensorRT trt_model = TRTWrapper(trt_file, ['input'], ['output']) with torch.no_grad(): trt_outputs = trt_model({'input': inputs}) output = trt_outputs['output'] ``` ### 如何在MMCV中添加新的TensorRT自定义算子 #### 主要流程 下面是主要的步骤: 1. 添加c++头文件 2. 添加c++源文件 3. 添加cuda kernel文件 4. 在`trt_plugin.cpp`中注册插件 5. 在`tests/test_ops/test_tensorrt.py`中添加单元测试 **以RoIAlign算子插件`roi_align`举例。** 1. 在TensorRT包含目录`mmcv/ops/csrc/tensorrt/`中添加头文件`trt_roi_align.hpp` 2. 在TensorRT源码目录`mmcv/ops/csrc/tensorrt/plugins/`中添加头文件`trt_roi_align.cpp` 3. 在TensorRT源码目录`mmcv/ops/csrc/tensorrt/plugins/`中添加cuda kernel文件`trt_roi_align_kernel.cu` 4. 在[trt_plugin.cpp](https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/csrc/tensorrt/plugins/trt_plugin.cpp)中注册`roi_align`插件 ```c++ #include "trt_plugin.hpp" #include "trt_roi_align.hpp" REGISTER_TENSORRT_PLUGIN(RoIAlignPluginDynamicCreator); extern "C" { bool initLibMMCVInferPlugins() { return true; } } // extern "C" ``` 5. 在`tests/test_ops/test_tensorrt.py`中添加单元测试 #### 注意 - 部分MMCV中的自定义算子存在对应的cuda实现,在进行TensorRT插件开发的时候可以参考。 ### 已知问题 - 无 ### 引用 - [Developer guide of Nvidia TensorRT](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html) - [TensorRT Open Source Software](https://github.com/NVIDIA/TensorRT) - [onnx-tensorrt](https://github.com/onnx/onnx-tensorrt) - [TensorRT python API](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/index.html) - [TensorRT c++ plugin API](https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1_i_plugin.html) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/faq.md ================================================ ## 常见问题 在这里我们列出了用户经常遇到的问题以及对应的解决方法。如果您遇到了其他常见的问题,并且知道可以帮到大家的解决办法, 欢迎随时丰富这个列表。 ### 安装问题 - KeyError: "xxx: 'yyy is not in the zzz registry'" 只有模块所在的文件被导入时,注册机制才会被触发,所以您需要在某处导入该文件,更多详情请查看 https://github.com/open-mmlab/mmdetection/issues/5974。 - "No module named 'mmcv.ops'"; "No module named 'mmcv._ext'" 1. 使用 `pip uninstall mmcv` 卸载您环境中的 mmcv 2. 参考 [installation instruction](https://mmcv.readthedocs.io/en/latest/get_started/installation.html) 或者 [Build MMCV from source](https://mmcv.readthedocs.io/en/latest/get_started/build.html) 安装 mmcv-full - "invalid device function" 或者 "no kernel image is available for execution" 1. 检查 GPU 的 CUDA 计算能力 2. 运行 `python mmdet/utils/collect_env.py` 来检查 PyTorch、torchvision 和 MMCV 是否是针对正确的 GPU 架构构建的,您可能需要去设置 `TORCH_CUDA_ARCH_LIST` 来重新安装 MMCV。兼容性问题可能会出现在使用旧版的 GPUs,如:colab 上的 Tesla K80 (3.7) 3. 检查运行环境是否和 mmcv/mmdet 编译时的环境相同。例如,您可能使用 CUDA 10.0 编译 mmcv,但在 CUDA 9.0 的环境中运行它 - "undefined symbol" 或者 "cannot open xxx.so" 1. 如果符号和 CUDA/C++ 相关(例如:libcudart.so 或者 GLIBCXX),请检查 CUDA/GCC 运行时的版本是否和编译 mmcv 的一致 2. 如果符号和 PyTorch 相关(例如:符号包含 caffe、aten 和 TH),请检查 PyTorch 运行时的版本是否和编译 mmcv 的一致 3. 运行 `python mmdet/utils/collect_env.py` 以检查 PyTorch、torchvision 和 MMCV 构建和运行的环境是否相同 - "RuntimeError: CUDA error: invalid configuration argument" 这个错误可能是由于您的 GPU 性能不佳造成的。尝试降低[THREADS_PER_BLOCK](https://github.com/open-mmlab/mmcv/blob/cac22f8cf5a904477e3b5461b1cc36856c2793da/mmcv/ops/csrc/common_cuda_helper.hpp#L10) 的值并重新编译 mmcv。 - "RuntimeError: nms is not compiled with GPU support" 这个错误是由于您的 CUDA 环境没有正确安装。 您可以尝试重新安装您的 CUDA 环境,然后删除 mmcv/build 文件夹并重新编译 mmcv。 - "Segmentation fault" 1. 检查 GCC 的版本,通常是因为 PyTorch 版本与 GCC 版本不匹配 (例如 GCC < 4.9 ),我们推荐用户使用 GCC 5.4,我们也不推荐使用 GCC 5.5, 因为有反馈 GCC 5.5 会导致 "segmentation fault" 并且切换到 GCC 5.4 就可以解决问题 2. 检查是否正确安装 CUDA 版本的 PyTorc。输入以下命令并检查是否返回 True ```shell python -c 'import torch; print(torch.cuda.is_available())' ``` 3. 如果 `torch` 安装成功,那么检查 MMCV 是否安装成功。输入以下命令,如果没有报错说明 mmcv-full 安装成。 ```shell python -c 'import mmcv; import mmcv.ops' ``` 4. 如果 MMCV 与 PyTorch 都安装成功了,则可以使用 `ipdb` 设置断点或者使用 `print` 函数,分析是哪一部分的代码导致了 `segmentation fault` - "libtorch_cuda_cu.so: cannot open shared object file" `mmcv-full` 依赖 `libtorch_cuda_cu.so` 文件,但程序运行时没能找到该文件。我们可以检查该文件是否存在 `~/miniconda3/envs/{environment-name}/lib/python3.7/site-packages/torch/lib` 也可以尝试重装 PyTorch。 - "fatal error C1189: #error: -- unsupported Microsoft Visual Studio version!" 如果您在 Windows 上编译 mmcv-full 并且 CUDA 的版本是 9.2,您很可能会遇到这个问题 `"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include\crt/host_config.h(133): fatal error C1189: #error: -- unsupported Microsoft Visual Studio version! Only the versions 2012, 2013, 2015 and 2017 are supported!"`,您可以尝试使用低版本的 Microsoft Visual Studio,例如 vs2017。 - "error: member "torch::jit::detail::ModulePolicy::all_slots" may not be initialized" 如果您在 Windows 上编译 mmcv-full 并且 PyTorch 的版本是 1.5.0,您很可能会遇到这个问题 `- torch/csrc/jit/api/module.h(474): error: member "torch::jit::detail::ModulePolicy::all_slots" may not be initialized`。解决这个问题的方法是将 `torch/csrc/jit/api/module.h` 文件中所有 `static constexpr bool all_slots = false;` 替换为 `static bool all_slots = false;`。更多细节可以查看 https://github.com/pytorch/pytorch/issues/39394。 - "error: a member with an in-class initializer must be const" 如果您在 Windows 上编译 mmcv-full 并且 PyTorch 的版本是 1.6.0,您很可能会遇到这个问题 `"- torch/include\torch/csrc/jit/api/module.h(483): error: a member with an in-class initializer must be const"`. 解决这个问题的方法是将 `torch/include\torch/csrc/jit/api/module.h` 文件中的所有 `CONSTEXPR_EXCEPT_WIN_CUDA ` 替换为 `const`。更多细节可以查看 https://github.com/open-mmlab/mmcv/issues/575。 - "error: member "torch::jit::ProfileOptionalOp::Kind" may not be initialized" 如果您在 Windows 上编译 mmcv-full 并且 PyTorch 的版本是 1.7.0,您很可能会遇到这个问题 `torch/include\torch/csrc/jit/ir/ir.h(1347): error: member "torch::jit::ProfileOptionalOp::Kind" may not be initialized`. 解决这个问题的方法是修改 PyTorch 中的几个文件: - 删除 `torch/include\torch/csrc/jit/ir/ir.h` 文件中的 `static constexpr Symbol Kind = ::c10::prim::profile;` 和 `tatic constexpr Symbol Kind = ::c10::prim::profile_optional;` - 将 `torch\include\pybind11\cast.h` 文件中的 `explicit operator type&() { return *(this->value); }` 替换为 `explicit operator type&() { return *((type*)this->value); }` - 将 `torch/include\torch/csrc/jit/api/module.h` 文件中的 所有 `CONSTEXPR_EXCEPT_WIN_CUDA` 替换为 `const` 更多细节可以查看 https://github.com/pytorch/pytorch/pull/45956。 - MMCV 和 MMDetection 的兼容性问题;"ConvWS is already registered in conv layer" 请参考 [installation instruction](https://mmdetection.readthedocs.io/en/latest/get_started.html#installation) 为您的 MMDetection 版本安装正确版本的 MMCV。 ### 使用问题 - "RuntimeError: Expected to have finished reduction in the prior iteration before starting a new one" 1. 这个错误是因为有些参数没有参与 loss 的计算,可能是代码中存在多个分支,导致有些分支没有参与 loss 的计算。更多细节见 https://github.com/pytorch/pytorch/issues/55582。 2. 你可以设置 DDP 中的 `find_unused_parameters` 为 `True`,或者手动查找哪些参数没有用到。 - "RuntimeError: Trying to backward through the graph a second time" 不能同时设置 `GradientCumulativeOptimizerHook` 和 `OptimizerHook`,这会导致 `loss.backward()` 被调用两次,于是程序抛出 `RuntimeError`。我们只需设置其中的一个。更多细节见 https://github.com/open-mmlab/mmcv/issues/1379。 ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/get_started/build.md ================================================ ## 从源码编译 MMCV ### 在 Linux 或者 macOS 上编译 MMCV 克隆算法库 ```bash git clone https://github.com/open-mmlab/mmcv.git cd mmcv ``` 建议安装 `ninja` 以加快编译速度 ```bash pip install -r requirements/optional.txt ``` 你可以安装 lite 版本 ```bash pip install -e . ``` 也可以安装 full 版本 ```bash MMCV_WITH_OPS=1 pip install -e . ``` 如果是在 macOS 上编译,则需要在安装命令前添加一些环境变量 ```bash CC=clang CXX=clang++ CFLAGS='-stdlib=libc++' ``` 例如 ```bash CC=clang CXX=clang++ CFLAGS='-stdlib=libc++' MMCV_WITH_OPS=1 pip install -e . ``` ```{note} 如果你打算使用 `opencv-python-headless` 而不是 `opencv-python`,例如在一个很小的容器环境或者没有图形用户界面的服务器中,你可以先安装 `opencv-python-headless`,这样在安装 mmcv 依赖的过程中会跳过 `opencv-python` ``` ### 在 Windows 上编译 MMCV 在 Windows 上编译 MMCV 比 Linux 复杂,本节将一步步介绍如何在 Windows 上编译 MMCV。 #### 依赖项 请首先安装以下的依赖项: - [Git](https://git-scm.com/download/win):安装期间,请选择 **add git to Path** - [Visual Studio Community 2019](https://visualstudio.microsoft.com):用于编译 C++ 和 CUDA 代码 - [Miniconda](https://docs.conda.io/en/latest/miniconda.html):包管理工具 - [CUDA 10.2](https://developer.nvidia.com/cuda-10.2-download-archive):如果只需要 CPU 版本可以不安装 CUDA,安装CUDA时,可根据需要进行自定义安装。如果已经安装新版本的显卡驱动,建议取消驱动程序的安装 ```{note} 您需要知道如何在 Windows 上设置变量环境,尤其是 "PATH" 的设置,以下安装过程都会用到。 ``` #### 设置 Python 环境 1. 从 Windows 菜单启动 Anaconda 命令行 ```{note} 如 Miniconda 安装程序建议,不要使用原始的 `cmd.exe` 或是 `powershell.exe`。命令行有两个版本,一个基于 PowerShell,一个基于传统的 `cmd.exe`。请注意以下说明都是使用的基于 PowerShell ``` 2. 创建一个新的 Conda 环境 ```shell conda create --name mmcv python=3.7 # 经测试,3.6, 3.7, 3.8 也能通过 conda activate mmcv # 确保做任何操作前先激活环境 ``` 3. 安装 PyTorch 时,可以根据需要安装支持 CUDA 或不支持 CUDA 的版本 ```shell # CUDA version conda install pytorch torchvision cudatoolkit=10.2 -c pytorch # CPU version conda install pytorch torchvision cpuonly -c pytorch ``` 4. 准备 MMCV 源代码 ```shell git clone https://github.com/open-mmlab/mmcv.git cd mmcv ``` 5. 安装所需 Python 依赖包 ```shell pip3 install -r requirements/runtime.txt ``` 6. 建议安装 `ninja` 以加快编译速度 ```bash pip install -r requirements/optional.txt ``` #### 编译与安装 MMCV MMCV 有三种安装的模式: 1. Lite 版本(不包含算子) 这种方式下,没有算子被编译,这种模式的 mmcv 是原生的 python 包 2. Full 版本(只包含 CPU 算子) 编译 CPU 算子,但只有 x86 将会被编译,并且编译版本只能在 CPU only 情况下运行 3. Full 版本(既包含 CPU 算子,又包含 CUDA 算子) 同时编译 CPU 和 CUDA 算子,`ops` 模块的 x86 与 CUDA 的代码都可以被编译。同时编译的版本可以在 CUDA 上调用 GPU ##### 通用步骤 1. 设置 MSVC 编译器 设置环境变量。添加 `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.27.29110\bin\Hostx86\x64` 到 `PATH`,则 `cl.exe` 可以在命令行中运行,如下所示。 ```none (base) PS C:\Users\xxx> cl Microsoft (R) C/C++ Optimizing Compiler Version 19.27.29111 for x64 Copyright (C) Microsoft Corporation. All rights reserved. usage: cl [ option... ] filename... [ / link linkoption... ] ``` 为了兼容性,我们使用 x86-hosted 以及 x64-targeted 版本,即路径中的 `Hostx86\x64` 。 因为 PyTorch 将解析 `cl.exe` 的输出以检查其版本,只有 utf-8 将会被识别,你可能需要将系统语言更改为英语。控制面板 -> 地区-> 管理-> 非 Unicode 来进行语言转换。 ##### 安装方式一:Lite version(不包含算子) 在完成上述的公共步骤后,从菜单打开 Anaconda 命令框,输入以下命令 ```shell # 激活环境 conda activate mmcv # 切换到 mmcv 根目录 cd mmcv # 安装 python setup.py develop # 检查是否安装成功 pip list ``` ##### 安装方式二:Full version(只编译 CPU 算子) 1. 完成上述的公共步骤 2. 设置环境变量 ```shell $env:MMCV_WITH_OPS = 1 $env:MAX_JOBS = 8 # 根据你可用CPU以及内存量进行设置 ``` 3. 编译安装 ```shell conda activate mmcv # 激活环境 cd mmcv # 改变路径 python setup.py build_ext # 如果成功, cl 将被启动用于编译算子 python setup.py develop # 安装 pip list # 检查是否安装成功 ``` ##### 安装方式三:Full version(既编译 CPU 算子又编译 CUDA 算子) 1. 完成上述的公共步骤 2. 设置环境变量 ```shell $env:MMCV_WITH_OPS = 1 $env:MAX_JOBS = 8 # 根据你可用CPU以及内存量进行设置 ``` 3. 检查 `CUDA_PATH` 或者 `CUDA_HOME` 环境变量已经存在在 `envs` 之中 ```none (base) PS C:\Users\WRH> ls env: Name Value ---- ----- CUDA_PATH C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2 CUDA_PATH_V10_1 C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1 CUDA_PATH_V10_2 C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2 ``` 如果没有,你可以按照下面的步骤设置 ```shell $env:CUDA_HOME = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2" # 或者 $env:CUDA_HOME = $env:CUDA_PATH_V10_2 # CUDA_PATH_V10_2 已经在环境变量中 ``` 4. 设置 CUDA 的目标架构 ```shell $env:TORCH_CUDA_ARCH_LIST="6.1" # 支持 GTX 1080 # 或者用所有支持的版本,但可能会变得很慢 $env:TORCH_CUDA_ARCH_LIST="3.5 3.7 5.0 5.2 6.0 6.1 7.0 7.5" ``` ```{note} 我们可以在 [here](https://developer.nvidia.com/cuda-gpus) 查看 GPU 的计算能力 ``` 5. 编译安装 ```shell $env:MMCV_WITH_OPS = 1 $env:MAX_JOBS = 8 # 根据你可用CPU以及内存量进行设置 conda activate mmcv # 激活环境 cd mmcv # 改变路径 python setup.py build_ext # 如果成功, cl 将被启动用于编译算子 python setup.py develop # 安装 pip list # 检查是否安装成功 ``` ```{note} 如果你的 PyTorch 版本是 1.6.0,你可能会遇到一些这个 [issue](https://github.com/pytorch/pytorch/issues/42467) 提到的错误,则可以参考这个 [pull request](https://github.com/pytorch/pytorch/pull/43380/files) 修改 本地环境的 PyTorch 源代码 ``` 如果编译安装 mmcv 的过程中遇到了问题,你也许可以在 [Frequently Asked Question](../faq.html) 找到解决方法 ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/get_started/installation.md ================================================ ## 安装 MMCV MMCV 有两个版本: - **mmcv-full**: 完整版,包含所有的特性以及丰富的开箱即用的 CUDA 算子。注意完整版本可能需要更长时间来编译。 - **mmcv**: 精简版,不包含 CUDA 算子但包含其余所有特性和功能,类似 MMCV 1.0 之前的版本。如果你不需要使用 CUDA 算子的话,精简版可以作为一个考虑选项。 ```{warning} 请不要在同一个环境中安装两个版本,否则可能会遇到类似 `ModuleNotFound` 的错误。在安装一个版本之前,需要先卸载另一个。`如果CUDA可用,强烈推荐安装mmcv-full`。 ``` a. 安装完整版 在安装 mmcv-full 之前,请确保 PyTorch 已经成功安装在环境中,可以参考 PyTorch 官方[文档](https://pytorch.org/)。 我们提供了不同 PyTorch 和 CUDA 版本的 mmcv-full 预编译包,可以大大简化用户安装编译过程。强烈推荐通过预编译包来安装。另外,安装完成后可以运行 [check_installation.py](https://github.com/open-mmlab/mmcv/.dev_scripts/check_installation.py) 脚本检查 mmcv-full 是否安装成功。 i. 安装最新版本 如下是安装最新版 ``mmcv-full`` 的命令 ```shell pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html ``` 请将链接中的 ``{cu_version}`` 和 ``{torch_version}`` 根据自身需求替换成实际的版本号,例如想安装和 ``CUDA 11.1``、``PyTorch 1.9.0`` 兼容的最新版 ``mmcv-full``,使用如下替换过的命令 ```shell pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html ``` ```{note} PyTorch 在 1.x.0 和 1.x.1 之间通常是兼容的,故 mmcv-full 只提供 1.x.0 的编译包。如果你 的 PyTorch 版本是 1.x.1,你可以放心地安装在 1.x.0 版本编译的 mmcv-full。例如,如果你的 PyTorch 版本是 1.8.1、CUDA 版本是 11.1,你可以使用以下命令安装 mmcv-full。 `pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html` ``` 如果想知道更多 CUDA 和 PyTorch 版本的命令,可以参考下面的表格,将链接中的 ``=={mmcv_version}`` 删去即可。 ii. 安装特定的版本 如下是安装特定版本 ``mmcv-full`` 的命令 ```shell pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html ``` 首先请参考版本发布信息找到想要安装的版本号,将 ``{mmcv_version}`` 替换成该版本号,例如 ``1.3.9``。 然后将链接中的 ``{cu_version}`` 和 ``{torch_version}`` 根据自身需求替换成实际的版本号,例如想安装和 ``CUDA 11.1``、``PyTorch 1.9.0`` 兼容的 ``mmcv-full`` 1.3.9 版本,使用如下替换过的命令 ```shell pip install mmcv-full==1.3.9 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html ``` 对于更多的 PyTorch 和 CUDA 版本组合,请参考下表:
CUDA torch 1.10 torch 1.9 torch 1.8 torch 1.7 torch 1.6 torch 1.5
11.3
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu113/torch1.10.0/index.html
11.1
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.10.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html
11.0
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu110/torch1.7.0/index.html
10.2
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.10.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.9.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.7.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.6.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.5.0/index.html
10.1
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.8.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.7.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.6.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.5.0/index.html
9.2
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.7.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.6.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.5.0/index.html
cpu
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.10.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.9.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.8.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.7.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.6.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.5.0/index.html
```{note} 以上提供的预编译包并不囊括所有的 mmcv-full 版本,我们可以点击对应链接查看支持的版本。例如,点击 [cu102-torch1.8.0](https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html),可以看到 `cu102-torch1.8.0` 只提供了 1.3.0 及以上的 mmcv-full 版本。另外,从 `mmcv v1.3.17` 开始,我们不再提供`PyTorch 1.3 & 1.4` 对应的 mmcv-full 预编译包。你可以在 [这](./previous_versions.md) 找到 `PyTorch 1.3 & 1.4` 对应的预编包。虽然我们不再提供 `PyTorch 1.3 & 1.4` 对应的预编译包,但是我们依然在 CI 中保证对它们的兼容持续到下一年。 ``` 除了使用预编译包之外,另一种方式是在本地进行编译,直接运行下述命令 ```python pip install mmcv-full ``` 但注意本地编译可能会耗时 10 分钟以上。 b. 安装精简版 ```python pip install mmcv ``` c. 安装完整版并且编译 onnxruntime 的自定义算子 - 详细的指南请查看 [这里](https://mmcv.readthedocs.io/zh_CN/latest/deployment/onnxruntime_custom_ops.html)。 如果想从源码编译 MMCV,请参考[该文档](https://mmcv.readthedocs.io/zh_CN/latest/get_started/build.html)。 ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/get_started/introduction.md ================================================ ## 介绍 MMCV MMCV 是一个面向计算机视觉的基础库,它支持了很多开源项目,例如: - [MIM](https://github.com/open-mmlab/mim): OpenMMLab 项目、算法、模型的统一入口 - [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab 图像分类工具箱与测试基准 - [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab 检测工具箱与测试基准 - [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab 新一代通用3D目标检测平台 - [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab 语义分割工具箱与测试基准 - [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab 新一代视频理解工具箱与测试基准 - [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab 一体化视频目标感知平台 - [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab 姿态估计工具箱与测试基准 - [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab 图像视频编辑工具箱 - [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab 全流程文字检测识别理解工具包 - [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab 新一代生成模型工具箱 - [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab 光流估计工具箱与测试基准 - [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab 少样本学习工具箱与测试基准 - [MMHuman3D](https://github.com/open-mmlab/mmhuman3d): OpenMMLab 人体参数化模型工具箱与测试基准 - [MMSelfSup](https://github.com/open-mmlab/mmselfsup): OpenMMLab 自监督学习工具箱与测试基准 - [MMRazor](https://github.com/open-mmlab/mmrazor): OpenMMLab 模型压缩工具箱与测试基准 - [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab 模型部署框架 MMCV 提供了如下众多功能: - 通用的 IO 接口 - 图像和视频处理 - 图像和标注结果可视化 - 常用小工具(进度条,计时器等) - 基于 PyTorch 的通用训练框架 - 多种 CNN 网络结构 - 高质量实现的常见 CUDA 算子 如想了解更多特性和使用,请参考[文档](https://mmcv.readthedocs.io/zh_CN/latest)。 ```{note} MMCV 需要 Python 3.6 以上版本。 ``` ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/get_started/previous_versions.md ================================================ ## 其他版本的 PyTorch 我们不再提供在较低的 `PyTorch` 版本下编译的 `mmcv-full` 包,但为了您的方便,您可以在下面找到它们。 ### PyTorch 1.4 | 1.0.0 <= mmcv_version <= 1.2.1 #### CUDA 10.1 ```bash pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.4.0/index.html ``` #### CUDA 9.2 ```bash pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.4.0/index.html ``` #### CPU ```bash pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.4.0/index.html ``` ### PyTorch v1.3 | 1.0.0 <= mmcv_version <= 1.3.16 #### CUDA 10.1 ```bash pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.3.0/index.html ``` #### CUDA 9.2 ```bash pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.3.0/index.html ``` #### CPU ```bash pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.3.0/index.html ``` ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/index.rst ================================================ 欢迎来到 MMCV 的中文文档! ============================= 您可以在页面左下角切换中英文文档。 .. toctree:: :maxdepth: 2 :caption: 介绍与安装 get_started/introduction.md get_started/installation.md get_started/build.md .. toctree:: :maxdepth: 2 :caption: 深入理解 MMCV understand_mmcv/config.md understand_mmcv/registry.md understand_mmcv/runner.md understand_mmcv/io.md understand_mmcv/data_process.md understand_mmcv/visualization.md understand_mmcv/cnn.md understand_mmcv/ops.md understand_mmcv/utils.md .. toctree:: :maxdepth: 2 :caption: 部署 deployment/onnx.md deployment/onnxruntime_op.md deployment/onnxruntime_custom_ops.md deployment/tensorrt_plugin.md deployment/tensorrt_custom_ops.md .. toctree:: :maxdepth: 2 :caption: 兼容性 compatibility.md .. toctree:: :maxdepth: 2 :caption: 常见问题 faq.md .. toctree:: :maxdepth: 2 :caption: 社区 community/contributing.md community/pr.md .. toctree:: :maxdepth: 2 :caption: API 文档 api.rst Indices and tables ================== * :ref:`genindex` * :ref:`search` ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/make.bat ================================================ @ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set SOURCEDIR=. set BUILDDIR=_build if "%1" == "" goto help %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% :end popd ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/understand_mmcv/cnn.md ================================================ ## 卷积神经网络 我们为卷积神经网络提供了一些构建模块,包括层构建、模块组件和权重初始化。 ### 网络层的构建 在运行实验时,我们可能需要尝试同属一种类型但不同配置的层,但又不希望每次都修改代码。于是我们提供一些层构建方法,可以从字典构建层,字典可以在配置文件中配置,也可以通过命令行参数指定。 #### 用法 一个简单的例子: ```python cfg = dict(type='Conv3d') layer = build_conv_layer(cfg, in_channels=3, out_channels=8, kernel_size=3) ``` - `build_conv_layer`: 支持的类型包括 Conv1d、Conv2d、Conv3d、Conv (Conv是Conv2d的别名) - `build_norm_layer`: 支持的类型包括 BN1d、BN2d、BN3d、BN (alias for BN2d)、SyncBN、GN、LN、IN1d、IN2d、IN3d、IN(IN是IN2d的别名) - `build_activation_layer`:支持的类型包括 ReLU、LeakyReLU、PReLU、RReLU、ReLU6、ELU、Sigmoid、Tanh、GELU - `build_upsample_layer`: 支持的类型包括 nearest、bilinear、deconv、pixel_shuffle - `build_padding_layer`: 支持的类型包括 zero、reflect、replicate #### 拓展 我们还允许自定义层和算子来扩展构建方法。 1. 编写和注册自己的模块: ```python from mmcv.cnn import UPSAMPLE_LAYERS @UPSAMPLE_LAYERS.register_module() class MyUpsample: def __init__(self, scale_factor): pass def forward(self, x): pass ``` 2. 在某处导入 `MyUpsample` (例如 `__init__.py` )然后使用它: ```python cfg = dict(type='MyUpsample', scale_factor=2) layer = build_upsample_layer(cfg) ``` ### 模块组件 我们还提供了常用的模块组件,以方便网络构建。 卷积组件 `ConvModule` 由 convolution、normalization以及activation layers 组成,更多细节请参考 [ConvModule api](api.html#mmcv.cnn.ConvModule)。 ```python # conv + bn + relu conv = ConvModule(3, 8, 2, norm_cfg=dict(type='BN')) # conv + gn + relu conv = ConvModule(3, 8, 2, norm_cfg=dict(type='GN', num_groups=2)) # conv + relu conv = ConvModule(3, 8, 2) # conv conv = ConvModule(3, 8, 2, act_cfg=None) # conv + leaky relu conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='LeakyReLU')) # bn + conv + relu conv = ConvModule( 3, 8, 2, norm_cfg=dict(type='BN'), order=('norm', 'conv', 'act')) ``` ### Weight initialization > 实现细节可以在 [mmcv/cnn/utils/weight_init.py](../../mmcv/cnn/utils/weight_init.py)中找到 在训练过程中,适当的初始化策略有利于加快训练速度或者获得更高的性能。 在MMCV中,我们提供了一些常用的方法来初始化模块,比如 `nn.Conv2d` 模块。当然,我们也提供了一些高级API,可用于初始化包含一个或多个模块的模型。 #### Initialization functions 以函数的方式初始化 `nn.Module` ,例如 `nn.Conv2d` 、 `nn.Linear` 等。 我们提供以下初始化方法, - constant_init 使用给定常量值初始化模型参数 ```python >>> import torch.nn as nn >>> from mmcv.cnn import constant_init >>> conv1 = nn.Conv2d(3, 3, 1) >>> # constant_init(module, val, bias=0) >>> constant_init(conv1, 1, 0) >>> conv1.weight ``` - xavier_init 按照 [Understanding the difficulty of training deep feedforward neural networks - Glorot, X. & Bengio, Y. (2010)](http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf) 描述的方法初始化模型参数 ```python >>> import torch.nn as nn >>> from mmcv.cnn import xavier_init >>> conv1 = nn.Conv2d(3, 3, 1) >>> # xavier_init(module, gain=1, bias=0, distribution='normal') >>> xavier_init(conv1, distribution='normal') ``` - normal_init 使用正态分布(高斯分布)初始化模型参数 ```python >>> import torch.nn as nn >>> from mmcv.cnn import normal_init >>> conv1 = nn.Conv2d(3, 3, 1) >>> # normal_init(module, mean=0, std=1, bias=0) >>> normal_init(conv1, std=0.01, bias=0) ``` - uniform_init 使用均匀分布初始化模型参数 ```python >>> import torch.nn as nn >>> from mmcv.cnn import uniform_init >>> conv1 = nn.Conv2d(3, 3, 1) >>> # uniform_init(module, a=0, b=1, bias=0) >>> uniform_init(conv1, a=0, b=1) ``` - kaiming_init 按照 [Delving deep into rectifiers: Surpassing human-level performance on ImageNet classification - He, K. et al. (2015)](https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/He_Delving_Deep_into_ICCV_2015_paper.pdf) 描述的方法来初始化模型参数。 ```python >>> import torch.nn as nn >>> from mmcv.cnn import kaiming_init >>> conv1 = nn.Conv2d(3, 3, 1) >>> # kaiming_init(module, a=0, mode='fan_out', nonlinearity='relu', bias=0, distribution='normal') >>> kaiming_init(conv1) ``` - caffe2_xavier_init caffe2中实现的 `xavier initialization`,对应于 PyTorch中的 `kaiming_uniform_` ```python >>> import torch.nn as nn >>> from mmcv.cnn import caffe2_xavier_init >>> conv1 = nn.Conv2d(3, 3, 1) >>> # caffe2_xavier_init(module, bias=0) >>> caffe2_xavier_init(conv1) ``` - bias_init_with_prob 根据给定的概率初始化 `conv/fc`, 这在 [Focal Loss for Dense Object Detection](https://arxiv.org/pdf/1708.02002.pdf) 提出。 ```python >>> from mmcv.cnn import bias_init_with_prob >>> # bias_init_with_prob is proposed in Focal Loss >>> bias = bias_init_with_prob(0.01) >>> bias -4.59511985013459 ``` #### Initializers and configs 在初始化方法的基础上,我们定义了相应的初始化类,并将它们注册到 `INITIALIZERS` 中,这样我们就可以使用 `config` 配置来初始化模型了。 我们提供以下初始化类: - ConstantInit - XavierInit - NormalInit - UniformInit - KaimingInit - Caffe2XavierInit - PretrainedInit 接下来详细介绍 `initialize` 的使用方法 1. 通过关键字 `layer` 来初始化模型 如果我们只定义了关键字 `layer` ,那么只初始化 `layer` 中包含的层。 注意: 关键字 `layer` 支持的模块是带有 weights 和 bias 属性的 PyTorch 模块,所以不支持 `MultiheadAttention layer` - 定义关键字 `layer` 列表并使用相同相同配置初始化模块 ```python import torch.nn as nn from mmcv.cnn import initialize class FooNet(nn.Module): def __init__(self): super().__init__() self.feat = nn.Conv1d(3, 1, 3) self.reg = nn.Conv2d(3, 3, 3) self.cls = nn.Linear(1, 2) model = FooNet() init_cfg = dict(type='Constant', layer=['Conv1d', 'Conv2d', 'Linear'], val=1) # 使用相同的配置初始化整个模块 initialize(model, init_cfg) # model.feat.weight # Parameter containing: # tensor([[[1., 1., 1.], # [1., 1., 1.], # [1., 1., 1.]]], requires_grad=True) ``` - 定义关键字 `layer` 用于初始化不同配置的层 ```python import torch.nn as nn from mmcv.cnn.utils import initialize class FooNet(nn.Module): def __init__(self): super().__init__() self.feat = nn.Conv1d(3, 1, 3) self.reg = nn.Conv2d(3, 3, 3) self.cls = nn.Linear(1,2) model = FooNet() init_cfg = [dict(type='Constant', layer='Conv1d', val=1), dict(type='Constant', layer='Conv2d', val=2), dict(type='Constant', layer='Linear', val=3)] # nn.Conv1d 使用 dict(type='Constant', val=1) 初始化 # nn.Conv2d 使用 dict(type='Constant', val=2) 初始化 # nn.Linear 使用 dict(type='Constant', val=3) 初始化 initialize(model, init_cfg) # model.reg.weight # Parameter containing: # tensor([[[[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]], # ..., # [[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]]]], requires_grad=True) ``` 2. 定义关键字`override`初始化模型 - 当用属性名初始化某个特定部分时, 我们可以使用关键字 `override`, 关键字 `override` 对应的Value会替代init_cfg中相应的值 ```python import torch.nn as nn from mmcv.cnn import initialize class FooNet(nn.Module): def __init__(self): super().__init__() self.feat = nn.Conv1d(3, 1, 3) self.reg = nn.Conv2d(3, 3, 3) self.cls = nn.Sequential(nn.Conv1d(3, 1, 3), nn.Linear(1,2)) # 如果我们想将模型的权重初始化为 1,将偏差初始化为 2 # 但希望 `cls` 中的权重为 3,偏差为 4,则我们可以使用关键字override model = FooNet() init_cfg = dict(type='Constant', layer=['Conv1d','Conv2d'], val=1, bias=2, override=dict(type='Constant', name='reg', val=3, bias=4)) # 使用 dict(type='Constant', val=1, bias=2)来初始化 self.feat and self.cls # 使用dict(type='Constant', val=3, bias=4)来初始化‘reg’模块。 initialize(model, init_cfg) # model.reg.weight # Parameter containing: # tensor([[[[3., 3., 3.], # [3., 3., 3.], # [3., 3., 3.]], # ..., # [[3., 3., 3.], # [3., 3., 3.], # [3., 3., 3.]]]], requires_grad=True) ``` - 如果 init_cfg 中的关键字`layer`为None,则只初始化在关键字override中的子模块,并且省略override中的 type 和其他参数 ```python model = FooNet() init_cfg = dict(type='Constant', val=1, bias=2, override=dict(name='reg')) # self.feat 和 self.cls 使用pyTorch默认的初始化 # 将使用 dict(type='Constant', val=1, bias=2) 初始化名为 'reg' 的模块 initialize(model, init_cfg) # model.reg.weight # Parameter containing: # tensor([[[[1., 1., 1.], # [1., 1., 1.], # [1., 1., 1.]], # ..., # [[1., 1., 1.], # [1., 1., 1.], # [1., 1., 1.]]]], requires_grad=True) ``` - 如果我们没有定义关键字`layer`或`override` , 将不会初始化任何东西 - 关键字`override`的无效用法 ```python # 没有重写任何子模块 init_cfg = dict(type='Constant', layer=['Conv1d','Conv2d'], val=1, bias=2, override=dict(type='Constant', val=3, bias=4)) # 没有指定type,即便有其他参数,也是无效的。 init_cfg = dict(type='Constant', layer=['Conv1d','Conv2d'], val=1, bias=2, override=dict(name='reg', val=3, bias=4)) ``` 3. 用预训练模型初始化 ```python import torch.nn as nn import torchvision.models as models from mmcv.cnn import initialize # 使用预训练模型来初始化 model = models.resnet50() # model.conv1.weight # Parameter containing: # tensor([[[[-6.7435e-03, -2.3531e-02, -9.0143e-03, ..., -2.1245e-03, # -1.8077e-03, 3.0338e-03], # [-1.2603e-02, -2.7831e-02, 2.3187e-02, ..., -1.5793e-02, # 1.1655e-02, 4.5889e-03], # [-3.7916e-02, 1.2014e-02, 1.3815e-02, ..., -4.2651e-03, # 1.7314e-02, -9.9998e-03], # ..., init_cfg = dict(type='Pretrained', checkpoint='torchvision://resnet50') initialize(model, init_cfg) # model.conv1.weight # Parameter containing: # tensor([[[[ 1.3335e-02, 1.4664e-02, -1.5351e-02, ..., -4.0896e-02, # -4.3034e-02, -7.0755e-02], # [ 4.1205e-03, 5.8477e-03, 1.4948e-02, ..., 2.2060e-03, # -2.0912e-02, -3.8517e-02], # [ 2.2331e-02, 2.3595e-02, 1.6120e-02, ..., 1.0281e-01, # 6.2641e-02, 5.1977e-02], # ..., # 使用关键字'prefix'用预训练模型的特定部分来初始化子模块权重 model = models.resnet50() url = 'http://download.openmmlab.com/mmdetection/v2.0/retinanet/'\ 'retinanet_r50_fpn_1x_coco/'\ 'retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth' init_cfg = dict(type='Pretrained', checkpoint=url, prefix='backbone.') initialize(model, init_cfg) ``` 4. 初始化继承自BaseModule、Sequential、ModuleList、ModuleDict的模型 `BaseModule` 继承自 `torch.nn.Module`, 它们之间唯一的不同是 `BaseModule` 实现了 `init_weight` `Sequential` 继承自 `BaseModule` 和 `torch.nn.Sequential` `ModuleList` 继承自 `BaseModule` 和 `torch.nn.ModuleList` `ModuleDict` 继承自 `BaseModule` 和 `torch.nn.ModuleDict` `````python import torch.nn as nn from mmcv.runner import BaseModule, Sequential, ModuleList, ModuleDict class FooConv1d(BaseModule): def __init__(self, init_cfg=None): super().__init__(init_cfg) self.conv1d = nn.Conv1d(4, 1, 4) def forward(self, x): return self.conv1d(x) class FooConv2d(BaseModule): def __init__(self, init_cfg=None): super().__init__(init_cfg) self.conv2d = nn.Conv2d(3, 1, 3) def forward(self, x): return self.conv2d(x) # BaseModule init_cfg = dict(type='Constant', layer='Conv1d', val=0., bias=1.) model = FooConv1d(init_cfg) model.init_weights() # model.conv1d.weight # Parameter containing: # tensor([[[0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.]]], requires_grad=True) # Sequential init_cfg1 = dict(type='Constant', layer='Conv1d', val=0., bias=1.) init_cfg2 = dict(type='Constant', layer='Conv2d', val=2., bias=3.) model1 = FooConv1d(init_cfg1) model2 = FooConv2d(init_cfg2) seq_model = Sequential(model1, model2) seq_model.init_weights() # seq_model[0].conv1d.weight # Parameter containing: # tensor([[[0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.]]], requires_grad=True) # seq_model[1].conv2d.weight # Parameter containing: # tensor([[[[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]], # ..., # [[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]]]], requires_grad=True) # inner init_cfg has higher priority model1 = FooConv1d(init_cfg1) model2 = FooConv2d(init_cfg2) init_cfg = dict(type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.) seq_model = Sequential(model1, model2, init_cfg=init_cfg) seq_model.init_weights() # seq_model[0].conv1d.weight # Parameter containing: # tensor([[[0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.]]], requires_grad=True) # seq_model[1].conv2d.weight # Parameter containing: # tensor([[[[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]], # ..., # [[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]]]], requires_grad=True) # ModuleList model1 = FooConv1d(init_cfg1) model2 = FooConv2d(init_cfg2) modellist = ModuleList([model1, model2]) modellist.init_weights() # modellist[0].conv1d.weight # Parameter containing: # tensor([[[0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.]]], requires_grad=True) # modellist[1].conv2d.weight # Parameter containing: # tensor([[[[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]], # ..., # [[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]]]], requires_grad=True) # inner init_cfg has higher priority model1 = FooConv1d(init_cfg1) model2 = FooConv2d(init_cfg2) init_cfg = dict(type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.) modellist = ModuleList([model1, model2], init_cfg=init_cfg) modellist.init_weights() # modellist[0].conv1d.weight # Parameter containing: # tensor([[[0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.]]], requires_grad=True) # modellist[1].conv2d.weight # Parameter containing: # tensor([[[[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]], # ..., # [[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]]]], requires_grad=True) # ModuleDict model1 = FooConv1d(init_cfg1) model2 = FooConv2d(init_cfg2) modeldict = ModuleDict(dict(model1=model1, model2=model2)) modeldict.init_weights() # modeldict['model1'].conv1d.weight # Parameter containing: # tensor([[[0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.]]], requires_grad=True) # modeldict['model2'].conv2d.weight # Parameter containing: # tensor([[[[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]], # ..., # [[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]]]], requires_grad=True) # inner init_cfg has higher priority model1 = FooConv1d(init_cfg1) model2 = FooConv2d(init_cfg2) init_cfg = dict(type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.) modeldict = ModuleDict(dict(model1=model1, model2=model2), init_cfg=init_cfg) modeldict.init_weights() # modeldict['model1'].conv1d.weight # Parameter containing: # tensor([[[0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.], # [0., 0., 0., 0.]]], requires_grad=True) # modeldict['model2'].conv2d.weight # Parameter containing: # tensor([[[[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]], # ..., # [[2., 2., 2.], # [2., 2., 2.], # [2., 2., 2.]]]], requires_grad=True) ````` ### Model Zoo 除了`torchvision`的预训练模型,我们还提供以下 CNN 的预训练模型: - VGG Caffe - ResNet Caffe - ResNeXt - ResNet with Group Normalization - ResNet with Group Normalization and Weight Standardization - HRNetV2 - Res2Net - RegNet #### Model URLs in JSON MMCV中的Model Zoo Link 由 JSON 文件管理。 json 文件由模型名称及其url或path的键值对组成,一个json文件可能类似于: ```json { "model_a": "https://example.com/models/model_a_9e5bac.pth", "model_b": "pretrain/model_b_ab3ef2c.pth" } ``` 可以在[此处](https://github.com/open-mmlab/mmcv/blob/master/mmcv/model_zoo/open_mmlab.json)找到托管在 OpenMMLab AWS 上的预训练模型的默认链接。 你可以通过将 `open-mmlab.json` 放在 `MMCV_HOME`下来覆盖默认链接,如果在环境中找不到`MMCV_HOME`,则默认使用 `~/.cache/mmcv`。当然你也可以使用命令 `export MMCV_HOME=/your/path`来设置自己的路径。 外部的json文件将被合并为默认文件,如果相同的键出现在外部`json`和默认`json`中,则将使用外部`json`。 #### Load Checkpoint `mmcv.load_checkpoint()`的参数`filename`支持以下类型: - filepath: `checkpoint`路径 - `http://xxx` and `https://xxx`: 下载checkpoint的链接,文件名中必需包含`SHA256`后缀 - `torchvision://xxx`: `torchvision.models`中的模型链接,更多细节参考 [torchvision](https://pytorch.org/docs/stable/torchvision/models.html) - `open-mmlab://xxx`: 默认和其他 json 文件中提供的模型链接或文件路径 ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/understand_mmcv/config.md ================================================ ## 配置 `Config` 类用于操作配置文件,它支持从多种文件格式中加载配置,包括 **python**, **json** 和 **yaml**。 它提供了类似字典对象的接口来获取和设置值。 以配置文件 `test.py` 为例 ```python a = 1 b = dict(b1=[0, 1, 2], b2=None) c = (1, 2) d = 'string' ``` 加载与使用配置文件 ```python >>> cfg = Config.fromfile('test.py') >>> print(cfg) >>> dict(a=1, ... b=dict(b1=[0, 1, 2], b2=None), ... c=(1, 2), ... d='string') ``` 对于所有格式的配置文件,都支持一些预定义变量。它会将 `{{ var }}` 替换为实际值。 目前支持以下四个预定义变量: `{{ fileDirname }}` - 当前打开文件的目录名,例如 /home/your-username/your-project/folder `{{ fileBasename }}` - 当前打开文件的文件名,例如 file.ext `{{ fileBasenameNoExtension }}` - 当前打开文件不包含扩展名的文件名,例如 file `{{ fileExtname }}` - 当前打开文件的扩展名,例如 .ext 这些变量名引用自 [VS Code](https://code.visualstudio.com/docs/editor/variables-reference)。 这里是一个带有预定义变量的配置文件的例子。 `config_a.py` ```python a = 1 b = './work_dir/{{ fileBasenameNoExtension }}' c = '{{ fileExtname }}' ``` ```python >>> cfg = Config.fromfile('./config_a.py') >>> print(cfg) >>> dict(a=1, ... b='./work_dir/config_a', ... c='.py') ``` 对于所有格式的配置文件, 都支持继承。为了重用其他配置文件的字段, 需要指定 `_base_='./config_a.py'` 或者一个包含配置文件的列表 `_base_=['./config_a.py', './config_b.py']`。 这里有 4 个配置继承关系的例子。 `config_a.py` 作为基类配置文件 ```python a = 1 b = dict(b1=[0, 1, 2], b2=None) ``` ### 不含重复键值对从基类配置文件继承 `config_b.py` ```python _base_ = './config_a.py' c = (1, 2) d = 'string' ``` ```python >>> cfg = Config.fromfile('./config_b.py') >>> print(cfg) >>> dict(a=1, ... b=dict(b1=[0, 1, 2], b2=None), ... c=(1, 2), ... d='string') ``` 在`config_b.py`里的新字段与在`config_a.py`里的旧字段拼接 ### 含重复键值对从基类配置文件继承 `config_c.py` ```python _base_ = './config_a.py' b = dict(b2=1) c = (1, 2) ``` ```python >>> cfg = Config.fromfile('./config_c.py') >>> print(cfg) >>> dict(a=1, ... b=dict(b1=[0, 1, 2], b2=1), ... c=(1, 2)) ``` 在基类配置文件:`config_a` 里的 `b.b2=None`被配置文件:`config_c.py`里的 `b.b2=1`替代。 ### 从具有忽略字段的配置文件继承 `config_d.py` ```python _base_ = './config_a.py' b = dict(_delete_=True, b2=None, b3=0.1) c = (1, 2) ``` ```python >>> cfg = Config.fromfile('./config_d.py') >>> print(cfg) >>> dict(a=1, ... b=dict(b2=None, b3=0.1), ... c=(1, 2)) ``` 您还可以设置 `_delete_=True`忽略基类配置文件中的某些字段。所有在`b`中的旧键 `b1, b2, b3` 将会被新键 `b2, b3` 所取代。 ### 从多个基类配置文件继承(基类配置文件不应包含相同的键) `config_e.py` ```python c = (1, 2) d = 'string' ``` `config_f.py` ```python _base_ = ['./config_a.py', './config_e.py'] ``` ```python >>> cfg = Config.fromfile('./config_f.py') >>> print(cfg) >>> dict(a=1, ... b=dict(b1=[0, 1, 2], b2=None), ... c=(1, 2), ... d='string') ``` ### 从基类引用变量 您可以使用以下语法引用在基类中定义的变量。 `base.py` ```python item1 = 'a' item2 = dict(item3 = 'b') ``` `config_g.py` ```python _base_ = ['./base.py'] item = dict(a = {{ _base_.item1 }}, b = {{ _base_.item2.item3 }}) ``` ```python >>> cfg = Config.fromfile('./config_g.py') >>> print(cfg.pretty_text) item1 = 'a' item2 = dict(item3='b') item = dict(a='a', b='b') ``` ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/understand_mmcv/data_process.md ================================================ ## 数据处理 ### 图像 图像模块提供了一些图像预处理的函数,该模块依赖 `opencv` 。 #### 读取/保存/显示 使用 `imread` 和 `imwrite` 函数可以读取和保存图像。 ```python import mmcv img = mmcv.imread('test.jpg') img = mmcv.imread('test.jpg', flag='grayscale') img_ = mmcv.imread(img) # 相当于什么也没做 mmcv.imwrite(img, 'out.jpg') ``` 从二进制中读取图像 ```python with open('test.jpg', 'rb') as f: data = f.read() img = mmcv.imfrombytes(data) ``` 显示图像文件或已读取的图像 ```python mmcv.imshow('tests/data/color.jpg') for i in range(10): img = np.random.randint(256, size=(100, 100, 3), dtype=np.uint8) mmcv.imshow(img, win_name='test image', wait_time=200) ``` #### 色彩空间转换 支持的转换函数: - bgr2gray - gray2bgr - bgr2rgb - rgb2bgr - bgr2hsv - hsv2bgr ```python img = mmcv.imread('tests/data/color.jpg') img1 = mmcv.bgr2rgb(img) img2 = mmcv.rgb2gray(img1) img3 = mmcv.bgr2hsv(img) ``` #### 缩放 有三种缩放图像的方法。所有以 `imresize_*` 开头的函数都有一个 `return_scale` 参数,如果 该参数为 `False` ,函数的返回值只有调整之后的图像,否则是一个元组 `(resized_img, scale)` 。 ```python # 缩放图像至给定的尺寸 mmcv.imresize(img, (1000, 600), return_scale=True) # 缩放图像至与给定的图像同样的尺寸 mmcv.imresize_like(img, dst_img, return_scale=False) # 以一定的比例缩放图像 mmcv.imrescale(img, 0.5) # 缩放图像至最长的边不大于1000、最短的边不大于800并且没有改变图像的长宽比 mmcv.imrescale(img, (1000, 800)) ``` #### 旋转 我们可以使用 `imrotate` 旋转图像一定的角度。旋转的中心需要指定,默认值是原始图像的中心。有 两种旋转的模式,一种保持图像的尺寸不变,因此旋转后原始图像中的某些部分会被裁剪,另一种是扩大 图像的尺寸进而保留完整的原始图像。 ```python img = mmcv.imread('tests/data/color.jpg') # 顺时针旋转图像30度 img_ = mmcv.imrotate(img, 30) # 逆时针旋转图像90度 img_ = mmcv.imrotate(img, -90) # 顺时针旋转图像30度并且缩放图像为原始图像的1.5倍 img_ = mmcv.imrotate(img, 30, scale=1.5) # 以坐标(100, 100)为中心顺时针旋转图像30度 img_ = mmcv.imrotate(img, 30, center=(100, 100)) # 顺时针旋转图像30度并扩大图像的尺寸 img_ = mmcv.imrotate(img, 30, auto_bound=True) ``` #### 翻转 我们可以使用 `imflip` 翻转图像。 ```python img = mmcv.imread('tests/data/color.jpg') # 水平翻转图像 mmcv.imflip(img) # 垂直翻转图像 mmcv.imflip(img, direction='vertical') ``` #### 裁剪 `imcrop` 可以裁剪图像的一个或多个区域,每个区域用左上角和右下角坐标表示,形如(x1, y1, x2, y2) ```python import mmcv import numpy as np img = mmcv.imread('tests/data/color.jpg') # 裁剪区域 (10, 10, 100, 120) bboxes = np.array([10, 10, 100, 120]) patch = mmcv.imcrop(img, bboxes) # 裁剪两个区域,分别是 (10, 10, 100, 120) 和 (0, 0, 50, 50) bboxes = np.array([[10, 10, 100, 120], [0, 0, 50, 50]]) patches = mmcv.imcrop(img, bboxes) # 裁剪两个区域并且缩放区域1.2倍 patches = mmcv.imcrop(img, bboxes, scale_ratio=1.2) ``` #### 填充 `impad` and `impad_to_multiple` 可以用给定的值将图像填充至给定的尺寸。 ```python img = mmcv.imread('tests/data/color.jpg') # 用给定值将图像填充至 (1000, 1200) img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=0) # 用给定值分别填充图像的3个通道至 (1000, 1200) img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=[100, 50, 200]) # 用给定值填充图像的左、右、上、下四条边 img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=0) # 用3个值分别填充图像的左、右、上、下四条边的3个通道 img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=[100, 50, 200]) # 将图像的四条边填充至能够被给定值整除 img_ = mmcv.impad_to_multiple(img, 32) ``` ### 视频 视频模块提供了以下的功能: - 一个 `VideoReader` 类,具有友好的 API 接口可以读取和转换视频 - 一些编辑视频的方法,包括 `cut` , `concat` , `resize` - 光流的读取/保存/变换 #### VideoReader `VideoReader` 类提供了和序列一样的接口去获取视频帧。该类会缓存所有被访问过的帧。 ```python video = mmcv.VideoReader('test.mp4') # 获取基本的信息 print(len(video)) print(video.width, video.height, video.resolution, video.fps) # 遍历所有的帧 for frame in video: print(frame.shape) # 读取下一帧 img = video.read() # 使用索引获取帧 img = video[100] # 获取指定范围的帧 img = video[5:10] ``` 将视频切成帧并保存至给定目录或者从给定目录中生成视频。 ```python # 将视频切成帧并保存至目录 video = mmcv.VideoReader('test.mp4') video.cvt2frames('out_dir') # 从给定目录中生成视频 mmcv.frames2video('out_dir', 'test.avi') ``` #### 编辑函数 有几个用于编辑视频的函数,这些函数是对 `ffmpeg` 的封装。 ```python # 裁剪视频 mmcv.cut_video('test.mp4', 'clip1.mp4', start=3, end=10, vcodec='h264') # 将多个视频拼接成一个视频 mmcv.concat_video(['clip1.mp4', 'clip2.mp4'], 'joined.mp4', log_level='quiet') # 将视频缩放至给定的尺寸 mmcv.resize_video('test.mp4', 'resized1.mp4', (360, 240)) # 将视频缩放至给定的倍率 mmcv.resize_video('test.mp4', 'resized2.mp4', ratio=2) ``` #### 光流 `mmcv` 提供了以下用于操作光流的函数: - 读取/保存 - 可视化 - 流变换 我们提供了两种将光流dump到文件的方法,分别是非压缩和压缩的方法。非压缩的方法直接将浮点数值的光流 保存至二进制文件,虽然光流无损但文件会比较大。而压缩的方法先量化光流至 0-255 整形数值再保存为 jpeg图像。光流的x维度和y维度会被拼接到图像中。 1. 读取/保存 ```python flow = np.random.rand(800, 600, 2).astype(np.float32) # 保存光流到flo文件 (~3.7M) mmcv.flowwrite(flow, 'uncompressed.flo') # 保存光流为jpeg图像 (~230K),图像的尺寸为 (800, 1200) mmcv.flowwrite(flow, 'compressed.jpg', quantize=True, concat_axis=1) # 读取光流文件,以下两种方式读取的光流尺寸均为 (800, 600, 2) flow = mmcv.flowread('uncompressed.flo') flow = mmcv.flowread('compressed.jpg', quantize=True, concat_axis=1) ``` 2. 可视化 使用 `mmcv.flowshow()` 可视化光流 ```python mmcv.flowshow(flow) ``` ![progress](../../en/_static/flow_visualization.png) 1. 流变换 ```python img1 = mmcv.imread('img1.jpg') flow = mmcv.flowread('flow.flo') warpped_img2 = mmcv.flow_warp(img1, flow) ``` img1 (左) and img2 (右) ![raw images](../../en/_static/flow_raw_images.png) 光流 (img2 -> img1) ![optical flow](../../en/_static/flow_img2toimg1.png) 变换后的图像和真实图像的差异 ![warpped image](../../en/_static/flow_warp_diff.png) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/understand_mmcv/io.md ================================================ ## 文件输入输出 文件输入输出模块提供了两个通用的 API 接口用于读取和保存不同格式的文件。 ```{note} 在 v1.3.16 及之后的版本中,IO 模块支持从不同后端读取数据并支持将数据至不同后端。更多细节请访问 PR [#1330](https://github.com/open-mmlab/mmcv/pull/1330)。 ``` ### 读取和保存数据 `mmcv` 提供了一个通用的 api 用于读取和保存数据,目前支持的格式有 json、yaml 和 pickle。 #### 从硬盘读取数据或者将数据保存至硬盘 ```python import mmcv # 从文件中读取数据 data = mmcv.load('test.json') data = mmcv.load('test.yaml') data = mmcv.load('test.pkl') # 从文件对象中读取数据 with open('test.json', 'r') as f: data = mmcv.load(f, file_format='json') # 将数据序列化为字符串 json_str = mmcv.dump(data, file_format='json') # 将数据保存至文件 (根据文件名后缀反推文件类型) mmcv.dump(data, 'out.pkl') # 将数据保存至文件对象 with open('test.yaml', 'w') as f: data = mmcv.dump(data, f, file_format='yaml') ``` #### 从其他后端加载或者保存至其他后端 ```python import mmcv # 从 s3 文件读取数据 data = mmcv.load('s3://bucket-name/test.json') data = mmcv.load('s3://bucket-name/test.yaml') data = mmcv.load('s3://bucket-name/test.pkl') # 将数据保存至 s3 文件 (根据文件名后缀反推文件类型) mmcv.dump(data, 's3://bucket-name/out.pkl') ``` 我们提供了易于拓展的方式以支持更多的文件格式。我们只需要创建一个继承自 `BaseFileHandler` 的 文件句柄类并将其注册到 `mmcv` 中即可。句柄类至少需要重写三个方法。 ```python import mmcv # 支持为文件句柄类注册多个文件格式 # @mmcv.register_handler(['txt', 'log']) @mmcv.register_handler('txt') class TxtHandler1(mmcv.BaseFileHandler): def load_from_fileobj(self, file): return file.read() def dump_to_fileobj(self, obj, file): file.write(str(obj)) def dump_to_str(self, obj, **kwargs): return str(obj) ``` 以 `PickleHandler` 为例 ```python import pickle class PickleHandler(mmcv.BaseFileHandler): def load_from_fileobj(self, file, **kwargs): return pickle.load(file, **kwargs) def load_from_path(self, filepath, **kwargs): return super(PickleHandler, self).load_from_path( filepath, mode='rb', **kwargs) def dump_to_str(self, obj, **kwargs): kwargs.setdefault('protocol', 2) return pickle.dumps(obj, **kwargs) def dump_to_fileobj(self, obj, file, **kwargs): kwargs.setdefault('protocol', 2) pickle.dump(obj, file, **kwargs) def dump_to_path(self, obj, filepath, **kwargs): super(PickleHandler, self).dump_to_path( obj, filepath, mode='wb', **kwargs) ``` ### 读取文件并返回列表或字典 例如, `a.txt` 是文本文件,一共有5行内容。 ``` a b c d e ``` #### 从硬盘读取 使用 `list_from_file` 读取 `a.txt` ```python >>> mmcv.list_from_file('a.txt') ['a', 'b', 'c', 'd', 'e'] >>> mmcv.list_from_file('a.txt', offset=2) ['c', 'd', 'e'] >>> mmcv.list_from_file('a.txt', max_num=2) ['a', 'b'] >>> mmcv.list_from_file('a.txt', prefix='/mnt/') ['/mnt/a', '/mnt/b', '/mnt/c', '/mnt/d', '/mnt/e'] ``` 同样, `b.txt` 也是文本文件,一共有3行内容 ``` 1 cat 2 dog cow 3 panda ``` 使用 `dict_from_file` 读取 `b.txt` ```python >>> mmcv.dict_from_file('b.txt') {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} >>> mmcv.dict_from_file('b.txt', key_type=int) {1: 'cat', 2: ['dog', 'cow'], 3: 'panda'} ``` #### 从其他后端读取 使用 `list_from_file` 读取 `s3://bucket-name/a.txt` ```python >>> mmcv.list_from_file('s3://bucket-name/a.txt') ['a', 'b', 'c', 'd', 'e'] >>> mmcv.list_from_file('s3://bucket-name/a.txt', offset=2) ['c', 'd', 'e'] >>> mmcv.list_from_file('s3://bucket-name/a.txt', max_num=2) ['a', 'b'] >>> mmcv.list_from_file('s3://bucket-name/a.txt', prefix='/mnt/') ['/mnt/a', '/mnt/b', '/mnt/c', '/mnt/d', '/mnt/e'] ``` 使用 `dict_from_file` 读取 `b.txt` ```python >>> mmcv.dict_from_file('s3://bucket-name/b.txt') {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} >>> mmcv.dict_from_file('s3://bucket-name/b.txt', key_type=int) {1: 'cat', 2: ['dog', 'cow'], 3: 'panda'} ``` ### 读取和保存权重文件 #### 从硬盘读取权重文件或者将权重文件保存至硬盘 我们可以通过下面的方式从磁盘读取权重文件或者将权重文件保存至磁盘 ```python import torch filepath1 = '/path/of/your/checkpoint1.pth' filepath2 = '/path/of/your/checkpoint2.pth' # 从 filepath1 读取权重文件 checkpoint = torch.load(filepath1) # 将权重文件保存至 filepath2 torch.save(checkpoint, filepath2) ``` MMCV 提供了很多后端,`HardDiskBackend` 是其中一个,我们可以通过它来读取或者保存权重文件。 ```python import io from mmcv.fileio.file_client import HardDiskBackend disk_backend = HardDiskBackend() with io.BytesIO(disk_backend.get(filepath1)) as buffer: checkpoint = torch.load(buffer) with io.BytesIO() as buffer: torch.save(checkpoint, f) disk_backend.put(f.getvalue(), filepath2) ``` 如果我们想在接口中实现根据文件路径自动选择对应的后端,我们可以使用 `FileClient`。 例如,我们想实现两个方法,分别是读取权重以及保存权重,它们需支持不同类型的文件路径,可以是磁盘路径,也可以是网络路径或者其他路径。 ```python from mmcv.fileio.file_client import FileClient def load_checkpoint(path): file_client = FileClient.infer(uri=path) with io.BytesIO(file_client.get(path)) as buffer: checkpoint = torch.load(buffer) return checkpoint def save_checkpoint(checkpoint, path): with io.BytesIO() as buffer: torch.save(checkpoint, buffer) file_client.put(buffer.getvalue(), path) file_client = FileClient.infer_client(uri=filepath1) checkpoint = load_checkpoint(filepath1) save_checkpoint(checkpoint, filepath2) ``` #### 从网络远端读取权重文件 ```{note} 目前只支持从网络远端读取权重文件,暂不支持将权重文件写入网络远端 ``` ```python import io import torch from mmcv.fileio.file_client import HTTPBackend, FileClient filepath = 'http://path/of/your/checkpoint.pth' checkpoint = torch.utils.model_zoo.load_url(filepath) http_backend = HTTPBackend() with io.BytesIO(http_backend.get(filepath)) as buffer: checkpoint = torch.load(buffer) file_client = FileClient.infer_client(uri=filepath) with io.BytesIO(file_client.get(filepath)) as buffer: checkpoint = torch.load(buffer) ``` ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/understand_mmcv/ops.md ================================================ ## CUDA 算子 MMCV 提供了检测、分割等任务中常用的 CUDA 算子 - ActiveRotatedFilter - AssignScoreWithK - BallQuery - BBoxOverlaps - CARAFE - CrissCrossAttention - ContextBlock - ConvexIoU - CornerPool - Deformable Convolution v1/v2 - Deformable RoIPool - DynamicScatter - GatherPoints - FurthestPointSample - FurthestPointSampleWithDist - GeneralizedAttention - KNN - MaskedConv - MinAreaPolygon - NMS - PointsInPolygons - PSAMask - RotatedFeatureAlign - RoIPointPool3d - RoIPool - RiRoIAlignRotated - RoIAlign - RoIAwarePool3d - SimpleRoIAlign - SigmoidFocalLoss - SoftmaxFocalLoss - SoftNMS - Synchronized BatchNorm - Voxelization - ThreeInterpolate - ThreeNN - Weight standardization - Correlation ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/understand_mmcv/registry.md ================================================ ## 注册器 MMCV 使用 [注册器](https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/registry.py) 来管理具有相似功能的不同模块, 例如, 检测器中的主干网络、头部、和模型颈部。 在 OpenMMLab 家族中的绝大部分开源项目使用注册器去管理数据集和模型的模块,例如 [MMDetection](https://github.com/open-mmlab/mmdetection), [MMDetection3D](https://github.com/open-mmlab/mmdetection3d), [MMClassification](https://github.com/open-mmlab/mmclassification), [MMEditing](https://github.com/open-mmlab/mmediting) 等。 ### 什么是注册器 在MMCV中,注册器可以看作类到字符串的映射。 一个注册器中的类通常有相似的接口,但是可以实现不同的算法或支持不同的数据集。 借助注册器,用户可以通过使用相应的字符串查找并实例化该类,并根据他们的需要实例化对应模块。 一个典型的案例是,OpenMMLab 中的大部分开源项目的配置系统,这些系统通过配置文件来使用注册器创建钩子、执行器、模型和数据集。 可以在[这里](https://mmcv.readthedocs.io/en/latest/api.html?highlight=registry#mmcv.utils.Registry)找到注册器接口使用文档。 使用 `registry`(注册器)管理代码库中的模型,需要以下三个步骤。 1. 创建一个构建方法(可选,在大多数情况下您可以只使用默认方法) 2. 创建注册器 3. 使用此注册器来管理模块 `Registry`(注册器)的参数 `build_func`(构建函数) 用来自定以如何实例化类的实例,默认使用 [这里](https://mmcv.readthedocs.io/en/latest/api.html?highlight=registry#mmcv.utils.build_from_cfg)实现的`build_from_cfg`。 ### 一个简单的例子 这里是一个使用注册器管理包中模块的简单示例。您可以在 OpenMMLab 开源项目中找到更多实例。 假设我们要实现一系列数据集转换器(Dataset Converter),用于将不同格式的数据转换为标准数据格式。我们先创建一个名为converters的目录作为包,在包中我们创建一个文件来实现构建器(builder),命名为converters/builder.py,如下 ```python from mmcv.utils import Registry # 创建转换器(converter)的注册器(registry) CONVERTERS = Registry('converter') ``` 然后我们在包中可以实现不同的转换器(converter)。例如,在 `converters/converter1.py` 中实现 `Converter1`。 ```python from .builder import CONVERTERS # 使用注册器管理模块 @CONVERTERS.register_module() class Converter1(object): def __init__(self, a, b): self.a = a self.b = b ``` 使用注册器管理模块的关键步骤是,将实现的模块注册到注册表 `CONVERTERS` 中。通过 `@CONVERTERS.register_module()` 装饰所实现的模块,字符串和类之间的映射就可以由 `CONVERTERS` 构建和维护,如下所示: 通过这种方式,就可以通过 `CONVERTERS` 建立字符串与类之间的映射,如下所示: ```python 'Converter1' -> ``` ```{note} 只有模块所在的文件被导入时,注册机制才会被触发,所以您需要在某处导入该文件。更多详情请查看 https://github.com/open-mmlab/mmdetection/issues/5974。 ``` 如果模块被成功注册了,你可以通过配置文件使用这个转换器(converter),如下所示: ```python converter_cfg = dict(type='Converter1', a=a_value, b=b_value) converter = CONVERTERS.build(converter_cfg) ``` ### 自定义构建函数 假设我们想自定义 `converters` 的构建流程,我们可以实现一个自定义的 `build_func` (构建函数)并将其传递到注册器中。 ```python from mmcv.utils import Registry # 创建一个构建函数 def build_converter(cfg, registry, *args, **kwargs): cfg_ = cfg.copy() converter_type = cfg_.pop('type') if converter_type not in registry: raise KeyError(f'Unrecognized converter type {converter_type}') else: converter_cls = registry.get(converter_type) converter = converter_cls(*args, **kwargs, **cfg_) return converter # 创建一个用于转换器(converters)的注册器,并传递(registry)``build_converter`` 函数 CONVERTERS = Registry('converter', build_func=build_converter) ``` ```{note} 注:在这个例子中,我们演示了如何使用参数:`build_func` 自定义构建类的实例的方法。 该功能类似于默认的`build_from_cfg`。在大多数情况下,默认就足够了。 ``` `build_model_from_cfg`也实现了在`nn.Sequentail`中构建PyTorch模块,你可以直接使用它们。 ### 注册器层结构 你也可以从多个 OpenMMLab 开源框架中构建模块,例如,你可以把所有 [MMClassification](https://github.com/open-mmlab/mmclassification) 中的主干网络(backbone)用到 [MMDetection](https://github.com/open-mmlab/mmdetection) 的目标检测中,你也可以融合 [MMDetection](https://github.com/open-mmlab/mmdetection) 中的目标检测模型 和 [MMSegmentation](https://github.com/open-mmlab/mmsegmentation) 语义分割模型。 下游代码库中所有 `MODELS` 注册器都是MMCV `MODELS` 注册器的子注册器。基本上,使用以下两种方法从子注册器或相邻兄弟注册器构建模块。 1. 从子注册器中构建 例如: 我们在 MMDetection 中定义: ```python from mmcv.utils import Registry from mmcv.cnn import MODELS as MMCV_MODELS MODELS = Registry('model', parent=MMCV_MODELS) @MODELS.register_module() class NetA(nn.Module): def forward(self, x): return x ``` 我们在 MMClassification 中定义: ```python from mmcv.utils import Registry from mmcv.cnn import MODELS as MMCV_MODELS MODELS = Registry('model', parent=MMCV_MODELS) @MODELS.register_module() class NetB(nn.Module): def forward(self, x): return x + 1 ``` 我们可以通过以下代码在 MMDetection 或 MMClassification 中构建两个网络: ```python from mmdet.models import MODELS net_a = MODELS.build(cfg=dict(type='NetA')) net_b = MODELS.build(cfg=dict(type='mmcls.NetB')) ``` 或 ```python from mmcls.models import MODELS net_a = MODELS.build(cfg=dict(type='mmdet.NetA')) net_b = MODELS.build(cfg=dict(type='NetB')) ``` 2. 从父注册器中构建 MMCV中的共享`MODELS`注册器是所有下游代码库的父注册器(根注册器): ```python from mmcv.cnn import MODELS as MMCV_MODELS net_a = MMCV_MODELS.build(cfg=dict(type='mmdet.NetA')) net_b = MMCV_MODELS.build(cfg=dict(type='mmcls.NetB')) ``` ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/understand_mmcv/runner.md ================================================ ## 执行器 执行器模块负责模型训练过程调度,主要目的是让用户使用更少的代码以及灵活可配置方式开启训练。其具备如下核心特性: - 支持以 `EpochBasedRunner` 和 `IterBasedRunner` 为单位的迭代模式以满足不同场景 - 支持定制工作流以满足训练过程中各状态自由切换,目前支持训练和验证两个工作流。工作流可以简单理解为一个完成的训练和验证迭代过程。 - 配合各类默认和自定义 Hook,对外提供了灵活扩展能力 ### EpochBasedRunner 顾名思义,`EpochBasedRunner` 是指以 epoch 为周期的工作流,例如设置 workflow = [('train', 2), ('val', 1)] 表示循环迭代地训练 2 个 epoch,然后验证 1 个 epoch。MMDetection 目标检测框架默认采用的是 `EpochBasedRunner`。 其抽象逻辑如下所示: ```python # 训练终止条件 while curr_epoch < max_epochs: # 遍历用户设置的工作流,例如 workflow = [('train', 2),('val', 1)] for i, flow in enumerate(workflow): # mode 是工作流函数,例如 train, epochs 是迭代次数 mode, epochs = flow # 要么调用 self.train(),要么调用 self.val() epoch_runner = getattr(self, mode) # 运行对应工作流函数 for _ in range(epochs): epoch_runner(data_loaders[i], **kwargs) ``` 目前支持训练和验证两个工作流,以训练函数为例,其抽象逻辑是: ```python # epoch_runner 目前可以是 train 或者 val def train(self, data_loader, **kwargs): # 遍历 dataset,共返回一个 epoch 的 batch 数据 for i, data_batch in enumerate(data_loader): self.call_hook('before_train_iter') # 验证时候 train_mode=False self.run_iter(data_batch, train_mode=True, **kwargs) self.call_hook('after_train_iter') self.call_hook('after_train_epoch') ``` ### IterBasedRunner 不同于 `EpochBasedRunner`,`IterBasedRunner` 是指以 iter 为周期的工作流,例如设置 workflow = [('train', 2), ('val', 1)] 表示循环迭代的训练 2 个 iter,然后验证 1 个 iter,MMSegmentation 语义分割框架默认采用的是 `IterBasedRunner`。 其抽象逻辑如下所示: ```python # 虽然是 iter 单位,但是某些场合需要 epoch 信息,由 IterLoader 提供 iter_loaders = [IterLoader(x) for x in data_loaders] # 训练终止条件 while curr_iter < max_iters: # 遍历用户设置的工作流,例如 workflow = [('train', 2), ('val', 1)] for i, flow in enumerate(workflow): # mode 是工作流函数,例如 train, iters 是迭代次数 mode, iters = flow # 要么调用 self.train(),要么调用 self.val() iter_runner = getattr(self, mode) # 运行对应工作流函数 for _ in range(iters): iter_runner(iter_loaders[i], **kwargs) ``` 目前支持训练和验证两个工作流,以验证函数为例,其抽象逻辑是: ```python # iter_runner 目前可以是 train 或者 val def val(self, data_loader, **kwargs): # 获取 batch 数据,用于一次迭代 data_batch = next(data_loader) self.call_hook('before_val_iter') outputs = self.model.val_step(data_batch, self.optimizer, **kwargs) self.outputs = outputs self.call_hook('after_val_iter') ``` 除了上述基础功能外,`EpochBasedRunner` 和 `IterBasedRunner` 还提供了 resume 、 save_checkpoint 和注册 hook 功能。 ### 一个简单例子 以最常用的分类任务为例详细说明 `runner` 的使用方法。 开启任何一个训练任务,都需要包括如下步骤: **(1) dataloader、model 和优化器等类初始化** ```python # 模型类初始化 model=... # 优化器类初始化,典型值 cfg.optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) optimizer = build_optimizer(model, cfg.optimizer) # 工作流对应的 dataloader 初始化 data_loaders = [ build_dataloader( ds, cfg.data.samples_per_gpu, cfg.data.workers_per_gpu, ...) for ds in dataset ] ``` **(2) runner 类初始化** ```python runner = build_runner( # cfg.runner 典型配置为 # runner = dict(type='EpochBasedRunner', max_epochs=200) cfg.runner, default_args=dict( model=model, batch_processor=None, optimizer=optimizer, logger=logger)) ``` **(3) 注册默认训练所必须的 hook,和用户自定义 hook** ```python # 注册定制必需的 hook runner.register_training_hooks( # lr相关配置,典型为 # lr_config = dict(policy='step', step=[100, 150]) cfg.lr_config, # 优化相关配置,例如 grad_clip 等 optimizer_config, # 权重保存相关配置,典型为 # checkpoint_config = dict(interval=1),每个单位都保存权重 cfg.checkpoint_config, # 日志相关配置 cfg.log_config, ...) # 注册用户自定义 hook # 例如想使用 ema 功能,则可以设置 custom_hooks=[dict(type='EMAHook')] if cfg.get('custom_hooks', None): custom_hooks = cfg.custom_hooks for hook_cfg in cfg.custom_hooks: hook_cfg = hook_cfg.copy() priority = hook_cfg.pop('priority', 'NORMAL') hook = build_from_cfg(hook_cfg, HOOKS) runner.register_hook(hook, priority=priority) ``` 然后可以进行 resume 或者 load_checkpoint 对权重进行加载。 **(4) 开启训练流** ```python # workflow 典型为 workflow = [('train', 1)] # 此时就真正开启了训练 runner.run(data_loaders, cfg.workflow) ``` 关于 workflow 设置,以 `EpochBasedRunner` 为例,详情如下: - 假设只想运行训练工作流,则可以设置 workflow = [('train', 1)],表示只进行迭代训练 - 假设想运行训练和验证工作流,则可以设置 workflow = [('train', 3), ('val', 1)],表示先训练 3 个 epoch ,然后切换到 val 工作流,运行 1 个 epoch,然后循环,直到训练 epoch 次数达到指定值 - 工作流设置还自由定制,例如你可以先验证再训练 workflow = [('val', 1), ('train', 1)] 上述代码都已经封装到了各个代码库的 train.py 中,用户只需要设置相应的配置即可,上述流程会自动运行。 ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/understand_mmcv/utils.md ================================================ ## 辅助函数 ### 进度条 如果你想跟踪函数批处理任务的进度,可以使用 `track_progress` 。它能以进度条的形式展示任务的完成情况以及剩余任务所需的时间(内部实现为for循环)。 ```python import mmcv def func(item): # 执行相关操作 pass tasks = [item_1, item_2, ..., item_n] mmcv.track_progress(func, tasks) ``` 效果如下 ![progress](../../en/_static/progress.*) 如果你想可视化多进程任务的进度,你可以使用 `track_parallel_progress` 。 ```python mmcv.track_parallel_progress(func, tasks, 8) # 8 workers ``` ![progress](../../_static/parallel_progress.*) 如果你想要迭代或枚举数据列表并可视化进度,你可以使用 `track_iter_progress` 。 ```python import mmcv tasks = [item_1, item_2, ..., item_n] for task in mmcv.track_iter_progress(tasks): # do something like print print(task) for i, task in enumerate(mmcv.track_iter_progress(tasks)): # do something like print print(i) print(task) ``` ### 计时器 mmcv提供的 `Timer` 可以很方便地计算代码块的执行时间。 ```python import time with mmcv.Timer(): # simulate some code block time.sleep(1) ``` 你也可以使用 `since_start()` 和 `since_last_check()` 。前者返回计时器启动后的运行时长,后者返回最近一次查看计时器后的运行时长。 ```python timer = mmcv.Timer() # code block 1 here print(timer.since_start()) # code block 2 here print(timer.since_last_check()) print(timer.since_start()) ``` ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/understand_mmcv/visualization.md ================================================ ## 可视化 `mmcv` 可以展示图像以及标注(目前只支持标注框) ```python # 展示图像文件 mmcv.imshow('a.jpg') # 展示已加载的图像 img = np.random.rand(100, 100, 3) mmcv.imshow(img) # 展示带有标注框的图像 img = np.random.rand(100, 100, 3) bboxes = np.array([[0, 0, 50, 50], [20, 20, 60, 60]]) mmcv.imshow_bboxes(img, bboxes) ``` `mmcv` 也可以展示特殊的图像,例如光流 ```python flow = mmcv.flowread('test.flo') mmcv.flowshow(flow) ``` ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/examples/train.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import torchvision.transforms as transforms from torch.utils.data import DataLoader from torchvision.datasets import CIFAR10 from mmcv.parallel import MMDataParallel from mmcv.runner import EpochBasedRunner from mmcv.utils import get_logger class Model(nn.Module): def __init__(self): super(Model, self).__init__() self.conv1 = nn.Conv2d(3, 6, 5) self.pool = nn.MaxPool2d(2, 2) self.conv2 = nn.Conv2d(6, 16, 5) self.fc1 = nn.Linear(16 * 5 * 5, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10) self.loss_fn = nn.CrossEntropyLoss() def forward(self, x): x = self.pool(F.relu(self.conv1(x))) x = self.pool(F.relu(self.conv2(x))) x = x.view(-1, 16 * 5 * 5) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x def train_step(self, data, optimizer): images, labels = data predicts = self(images) # -> self.__call__() -> self.forward() loss = self.loss_fn(predicts, labels) return {'loss': loss} if __name__ == '__main__': model = Model() if torch.cuda.is_available(): # only use gpu:0 to train # Solved issue https://github.com/open-mmlab/mmcv/issues/1470 model = MMDataParallel(model.cuda(), device_ids=[0]) # dataset and dataloader transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) trainset = CIFAR10( root='data', train=True, download=True, transform=transform) trainloader = DataLoader( trainset, batch_size=128, shuffle=True, num_workers=2) optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) logger = get_logger('mmcv') # runner is a scheduler to manage the training runner = EpochBasedRunner( model, optimizer=optimizer, work_dir='./work_dir', logger=logger, max_epochs=4) # learning rate scheduler config lr_config = dict(policy='step', step=[2, 3]) # configuration of optimizer optimizer_config = dict(grad_clip=None) # configuration of saving checkpoints periodically checkpoint_config = dict(interval=1) # save log periodically and multiple hooks can be used simultaneously log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) # register hooks to runner and those hooks will be invoked automatically runner.register_training_hooks( lr_config=lr_config, optimizer_config=optimizer_config, checkpoint_config=checkpoint_config, log_config=log_config) runner.run([trainloader], [('train', 1)]) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/__init__.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. # flake8: noqa from .arraymisc import * from .fileio import * from .image import * from .utils import * from .version import * from .video import * from .visualization import * # The following modules are not imported to this level, so mmcv may be used # without PyTorch. # - runner # - parallel # - op ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/arraymisc/__init__.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from .quantization import dequantize, quantize __all__ = ['quantize', 'dequantize'] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/arraymisc/quantization.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import numpy as np def quantize(arr, min_val, max_val, levels, dtype=np.int64): """Quantize an array of (-inf, inf) to [0, levels-1]. Args: arr (ndarray): Input array. min_val (scalar): Minimum value to be clipped. max_val (scalar): Maximum value to be clipped. levels (int): Quantization levels. dtype (np.type): The type of the quantized array. Returns: tuple: Quantized array. """ if not (isinstance(levels, int) and levels > 1): raise ValueError( f'levels must be a positive integer, but got {levels}') if min_val >= max_val: raise ValueError( f'min_val ({min_val}) must be smaller than max_val ({max_val})') arr = np.clip(arr, min_val, max_val) - min_val quantized_arr = np.minimum( np.floor(levels * arr / (max_val - min_val)).astype(dtype), levels - 1) return quantized_arr def dequantize(arr, min_val, max_val, levels, dtype=np.float64): """Dequantize an array. Args: arr (ndarray): Input array. min_val (scalar): Minimum value to be clipped. max_val (scalar): Maximum value to be clipped. levels (int): Quantization levels. dtype (np.type): The type of the dequantized array. Returns: tuple: Dequantized array. """ if not (isinstance(levels, int) and levels > 1): raise ValueError( f'levels must be a positive integer, but got {levels}') if min_val >= max_val: raise ValueError( f'min_val ({min_val}) must be smaller than max_val ({max_val})') dequantized_arr = (arr + 0.5).astype(dtype) * (max_val - min_val) / levels + min_val return dequantized_arr ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/__init__.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from .alexnet import AlexNet # yapf: disable from .bricks import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS, PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS, ContextBlock, Conv2d, Conv3d, ConvAWS2d, ConvModule, ConvTranspose2d, ConvTranspose3d, ConvWS2d, DepthwiseSeparableConvModule, GeneralizedAttention, HSigmoid, HSwish, Linear, MaxPool2d, MaxPool3d, NonLocal1d, NonLocal2d, NonLocal3d, Scale, Swish, build_activation_layer, build_conv_layer, build_norm_layer, build_padding_layer, build_plugin_layer, build_upsample_layer, conv_ws_2d, is_norm) from .builder import MODELS, build_model_from_cfg # yapf: enable from .resnet import ResNet, make_res_layer from .utils import (INITIALIZERS, Caffe2XavierInit, ConstantInit, KaimingInit, NormalInit, PretrainedInit, TruncNormalInit, UniformInit, XavierInit, bias_init_with_prob, caffe2_xavier_init, constant_init, fuse_conv_bn, get_model_complexity_info, initialize, kaiming_init, normal_init, trunc_normal_init, uniform_init, xavier_init) from .vgg import VGG, make_vgg_layer __all__ = [ 'AlexNet', 'VGG', 'make_vgg_layer', 'ResNet', 'make_res_layer', 'constant_init', 'xavier_init', 'normal_init', 'trunc_normal_init', 'uniform_init', 'kaiming_init', 'caffe2_xavier_init', 'bias_init_with_prob', 'ConvModule', 'build_activation_layer', 'build_conv_layer', 'build_norm_layer', 'build_padding_layer', 'build_upsample_layer', 'build_plugin_layer', 'is_norm', 'NonLocal1d', 'NonLocal2d', 'NonLocal3d', 'ContextBlock', 'HSigmoid', 'Swish', 'HSwish', 'GeneralizedAttention', 'ACTIVATION_LAYERS', 'CONV_LAYERS', 'NORM_LAYERS', 'PADDING_LAYERS', 'UPSAMPLE_LAYERS', 'PLUGIN_LAYERS', 'Scale', 'get_model_complexity_info', 'conv_ws_2d', 'ConvAWS2d', 'ConvWS2d', 'fuse_conv_bn', 'DepthwiseSeparableConvModule', 'Linear', 'Conv2d', 'ConvTranspose2d', 'MaxPool2d', 'ConvTranspose3d', 'MaxPool3d', 'Conv3d', 'initialize', 'INITIALIZERS', 'ConstantInit', 'XavierInit', 'NormalInit', 'TruncNormalInit', 'UniformInit', 'KaimingInit', 'PretrainedInit', 'Caffe2XavierInit', 'MODELS', 'build_model_from_cfg' ] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/alexnet.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import logging import torch.nn as nn class AlexNet(nn.Module): """AlexNet backbone. Args: num_classes (int): number of classes for classification. """ def __init__(self, num_classes=-1): super(AlexNet, self).__init__() self.num_classes = num_classes self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(64, 192, kernel_size=5, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(192, 384, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ) if self.num_classes > 0: self.classifier = nn.Sequential( nn.Dropout(), nn.Linear(256 * 6 * 6, 4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Linear(4096, num_classes), ) def init_weights(self, pretrained=None): if isinstance(pretrained, str): logger = logging.getLogger() from ..runner import load_checkpoint load_checkpoint(self, pretrained, strict=False, logger=logger) elif pretrained is None: # use default initializer pass else: raise TypeError('pretrained must be a str or None') def forward(self, x): x = self.features(x) if self.num_classes > 0: x = x.view(x.size(0), 256 * 6 * 6) x = self.classifier(x) return x ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/__init__.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from .activation import build_activation_layer from .context_block import ContextBlock from .conv import build_conv_layer from .conv2d_adaptive_padding import Conv2dAdaptivePadding from .conv_module import ConvModule from .conv_ws import ConvAWS2d, ConvWS2d, conv_ws_2d from .depthwise_separable_conv_module import DepthwiseSeparableConvModule from .drop import Dropout, DropPath from .generalized_attention import GeneralizedAttention from .hsigmoid import HSigmoid from .hswish import HSwish from .non_local import NonLocal1d, NonLocal2d, NonLocal3d from .norm import build_norm_layer, is_norm from .padding import build_padding_layer from .plugin import build_plugin_layer from .registry import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS, PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS) from .scale import Scale from .swish import Swish from .upsample import build_upsample_layer from .wrappers import (Conv2d, Conv3d, ConvTranspose2d, ConvTranspose3d, Linear, MaxPool2d, MaxPool3d) __all__ = [ 'ConvModule', 'build_activation_layer', 'build_conv_layer', 'build_norm_layer', 'build_padding_layer', 'build_upsample_layer', 'build_plugin_layer', 'is_norm', 'HSigmoid', 'HSwish', 'NonLocal1d', 'NonLocal2d', 'NonLocal3d', 'ContextBlock', 'GeneralizedAttention', 'ACTIVATION_LAYERS', 'CONV_LAYERS', 'NORM_LAYERS', 'PADDING_LAYERS', 'UPSAMPLE_LAYERS', 'PLUGIN_LAYERS', 'Scale', 'ConvAWS2d', 'ConvWS2d', 'conv_ws_2d', 'DepthwiseSeparableConvModule', 'Swish', 'Linear', 'Conv2dAdaptivePadding', 'Conv2d', 'ConvTranspose2d', 'MaxPool2d', 'ConvTranspose3d', 'MaxPool3d', 'Conv3d', 'Dropout', 'DropPath' ] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/activation.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch import torch.nn as nn import torch.nn.functional as F from mmcv.utils import TORCH_VERSION, build_from_cfg, digit_version from .registry import ACTIVATION_LAYERS for module in [ nn.ReLU, nn.LeakyReLU, nn.PReLU, nn.RReLU, nn.ReLU6, nn.ELU, nn.Sigmoid, nn.Tanh ]: ACTIVATION_LAYERS.register_module(module=module) @ACTIVATION_LAYERS.register_module(name='Clip') @ACTIVATION_LAYERS.register_module() class Clamp(nn.Module): """Clamp activation layer. This activation function is to clamp the feature map value within :math:`[min, max]`. More details can be found in ``torch.clamp()``. Args: min (Number | optional): Lower-bound of the range to be clamped to. Default to -1. max (Number | optional): Upper-bound of the range to be clamped to. Default to 1. """ def __init__(self, min=-1., max=1.): super(Clamp, self).__init__() self.min = min self.max = max def forward(self, x): """Forward function. Args: x (torch.Tensor): The input tensor. Returns: torch.Tensor: Clamped tensor. """ return torch.clamp(x, min=self.min, max=self.max) class GELU(nn.Module): r"""Applies the Gaussian Error Linear Units function: .. math:: \text{GELU}(x) = x * \Phi(x) where :math:`\Phi(x)` is the Cumulative Distribution Function for Gaussian Distribution. Shape: - Input: :math:`(N, *)` where `*` means, any number of additional dimensions - Output: :math:`(N, *)`, same shape as the input .. image:: scripts/activation_images/GELU.png Examples:: >>> m = nn.GELU() >>> input = torch.randn(2) >>> output = m(input) """ def forward(self, input): return F.gelu(input) if (TORCH_VERSION == 'parrots' or digit_version(TORCH_VERSION) < digit_version('1.4')): ACTIVATION_LAYERS.register_module(module=GELU) else: ACTIVATION_LAYERS.register_module(module=nn.GELU) def build_activation_layer(cfg): """Build activation layer. Args: cfg (dict): The activation layer config, which should contain: - type (str): Layer type. - layer args: Args needed to instantiate an activation layer. Returns: nn.Module: Created activation layer. """ return build_from_cfg(cfg, ACTIVATION_LAYERS) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/context_block.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch from torch import nn from ..utils import constant_init, kaiming_init from .registry import PLUGIN_LAYERS def last_zero_init(m): if isinstance(m, nn.Sequential): constant_init(m[-1], val=0) else: constant_init(m, val=0) @PLUGIN_LAYERS.register_module() class ContextBlock(nn.Module): """ContextBlock module in GCNet. See 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' (https://arxiv.org/abs/1904.11492) for details. Args: in_channels (int): Channels of the input feature map. ratio (float): Ratio of channels of transform bottleneck pooling_type (str): Pooling method for context modeling. Options are 'att' and 'avg', stand for attention pooling and average pooling respectively. Default: 'att'. fusion_types (Sequence[str]): Fusion method for feature fusion, Options are 'channels_add', 'channel_mul', stand for channelwise addition and multiplication respectively. Default: ('channel_add',) """ _abbr_ = 'context_block' def __init__(self, in_channels, ratio, pooling_type='att', fusion_types=('channel_add', )): super(ContextBlock, self).__init__() assert pooling_type in ['avg', 'att'] assert isinstance(fusion_types, (list, tuple)) valid_fusion_types = ['channel_add', 'channel_mul'] assert all([f in valid_fusion_types for f in fusion_types]) assert len(fusion_types) > 0, 'at least one fusion should be used' self.in_channels = in_channels self.ratio = ratio self.planes = int(in_channels * ratio) self.pooling_type = pooling_type self.fusion_types = fusion_types if pooling_type == 'att': self.conv_mask = nn.Conv2d(in_channels, 1, kernel_size=1) self.softmax = nn.Softmax(dim=2) else: self.avg_pool = nn.AdaptiveAvgPool2d(1) if 'channel_add' in fusion_types: self.channel_add_conv = nn.Sequential( nn.Conv2d(self.in_channels, self.planes, kernel_size=1), nn.LayerNorm([self.planes, 1, 1]), nn.ReLU(inplace=True), # yapf: disable nn.Conv2d(self.planes, self.in_channels, kernel_size=1)) else: self.channel_add_conv = None if 'channel_mul' in fusion_types: self.channel_mul_conv = nn.Sequential( nn.Conv2d(self.in_channels, self.planes, kernel_size=1), nn.LayerNorm([self.planes, 1, 1]), nn.ReLU(inplace=True), # yapf: disable nn.Conv2d(self.planes, self.in_channels, kernel_size=1)) else: self.channel_mul_conv = None self.reset_parameters() def reset_parameters(self): if self.pooling_type == 'att': kaiming_init(self.conv_mask, mode='fan_in') self.conv_mask.inited = True if self.channel_add_conv is not None: last_zero_init(self.channel_add_conv) if self.channel_mul_conv is not None: last_zero_init(self.channel_mul_conv) def spatial_pool(self, x): batch, channel, height, width = x.size() if self.pooling_type == 'att': input_x = x # [N, C, H * W] input_x = input_x.view(batch, channel, height * width) # [N, 1, C, H * W] input_x = input_x.unsqueeze(1) # [N, 1, H, W] context_mask = self.conv_mask(x) # [N, 1, H * W] context_mask = context_mask.view(batch, 1, height * width) # [N, 1, H * W] context_mask = self.softmax(context_mask) # [N, 1, H * W, 1] context_mask = context_mask.unsqueeze(-1) # [N, 1, C, 1] context = torch.matmul(input_x, context_mask) # [N, C, 1, 1] context = context.view(batch, channel, 1, 1) else: # [N, C, 1, 1] context = self.avg_pool(x) return context def forward(self, x): # [N, C, 1, 1] context = self.spatial_pool(x) out = x if self.channel_mul_conv is not None: # [N, C, 1, 1] channel_mul_term = torch.sigmoid(self.channel_mul_conv(context)) out = out * channel_mul_term if self.channel_add_conv is not None: # [N, C, 1, 1] channel_add_term = self.channel_add_conv(context) out = out + channel_add_term return out ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/conv.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from torch import nn from .registry import CONV_LAYERS CONV_LAYERS.register_module('Conv1d', module=nn.Conv1d) CONV_LAYERS.register_module('Conv2d', module=nn.Conv2d) CONV_LAYERS.register_module('Conv3d', module=nn.Conv3d) CONV_LAYERS.register_module('Conv', module=nn.Conv2d) def build_conv_layer(cfg, *args, **kwargs): """Build convolution layer. Args: cfg (None or dict): The conv layer config, which should contain: - type (str): Layer type. - layer args: Args needed to instantiate an conv layer. args (argument list): Arguments passed to the `__init__` method of the corresponding conv layer. kwargs (keyword arguments): Keyword arguments passed to the `__init__` method of the corresponding conv layer. Returns: nn.Module: Created conv layer. """ if cfg is None: cfg_ = dict(type='Conv2d') else: if not isinstance(cfg, dict): raise TypeError('cfg must be a dict') if 'type' not in cfg: raise KeyError('the cfg dict must contain the key "type"') cfg_ = cfg.copy() layer_type = cfg_.pop('type') if layer_type not in CONV_LAYERS: raise KeyError(f'Unrecognized norm type {layer_type}') else: conv_layer = CONV_LAYERS.get(layer_type) layer = conv_layer(*args, **kwargs, **cfg_) return layer ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/conv2d_adaptive_padding.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import math from torch import nn from torch.nn import functional as F from .registry import CONV_LAYERS @CONV_LAYERS.register_module() class Conv2dAdaptivePadding(nn.Conv2d): """Implementation of 2D convolution in tensorflow with `padding` as "same", which applies padding to input (if needed) so that input image gets fully covered by filter and stride you specified. For stride 1, this will ensure that output image size is same as input. For stride of 2, output dimensions will be half, for example. Args: in_channels (int): Number of channels in the input image out_channels (int): Number of channels produced by the convolution kernel_size (int or tuple): Size of the convolving kernel stride (int or tuple, optional): Stride of the convolution. Default: 1 padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0 dilation (int or tuple, optional): Spacing between kernel elements. Default: 1 groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True`` """ def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True): super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias) def forward(self, x): img_h, img_w = x.size()[-2:] kernel_h, kernel_w = self.weight.size()[-2:] stride_h, stride_w = self.stride output_h = math.ceil(img_h / stride_h) output_w = math.ceil(img_w / stride_w) pad_h = ( max((output_h - 1) * self.stride[0] + (kernel_h - 1) * self.dilation[0] + 1 - img_h, 0)) pad_w = ( max((output_w - 1) * self.stride[1] + (kernel_w - 1) * self.dilation[1] + 1 - img_w, 0)) if pad_h > 0 or pad_w > 0: x = F.pad(x, [ pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2 ]) return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/conv_module.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import warnings import torch.nn as nn from mmcv.utils import _BatchNorm, _InstanceNorm from ..utils import constant_init, kaiming_init from .activation import build_activation_layer from .conv import build_conv_layer from .norm import build_norm_layer from .padding import build_padding_layer from .registry import PLUGIN_LAYERS @PLUGIN_LAYERS.register_module() class ConvModule(nn.Module): """A conv block that bundles conv/norm/activation layers. This block simplifies the usage of convolution layers, which are commonly used with a norm layer (e.g., BatchNorm) and activation layer (e.g., ReLU). It is based upon three build methods: `build_conv_layer()`, `build_norm_layer()` and `build_activation_layer()`. Besides, we add some additional features in this module. 1. Automatically set `bias` of the conv layer. 2. Spectral norm is supported. 3. More padding modes are supported. Before PyTorch 1.5, nn.Conv2d only supports zero and circular padding, and we add "reflect" padding mode. Args: in_channels (int): Number of channels in the input feature map. Same as that in ``nn._ConvNd``. out_channels (int): Number of channels produced by the convolution. Same as that in ``nn._ConvNd``. kernel_size (int | tuple[int]): Size of the convolving kernel. Same as that in ``nn._ConvNd``. stride (int | tuple[int]): Stride of the convolution. Same as that in ``nn._ConvNd``. padding (int | tuple[int]): Zero-padding added to both sides of the input. Same as that in ``nn._ConvNd``. dilation (int | tuple[int]): Spacing between kernel elements. Same as that in ``nn._ConvNd``. groups (int): Number of blocked connections from input channels to output channels. Same as that in ``nn._ConvNd``. bias (bool | str): If specified as `auto`, it will be decided by the norm_cfg. Bias will be set as True if `norm_cfg` is None, otherwise False. Default: "auto". conv_cfg (dict): Config dict for convolution layer. Default: None, which means using conv2d. norm_cfg (dict): Config dict for normalization layer. Default: None. act_cfg (dict): Config dict for activation layer. Default: dict(type='ReLU'). inplace (bool): Whether to use inplace mode for activation. Default: True. with_spectral_norm (bool): Whether use spectral norm in conv module. Default: False. padding_mode (str): If the `padding_mode` has not been supported by current `Conv2d` in PyTorch, we will use our own padding layer instead. Currently, we support ['zeros', 'circular'] with official implementation and ['reflect'] with our own implementation. Default: 'zeros'. order (tuple[str]): The order of conv/norm/activation layers. It is a sequence of "conv", "norm" and "act". Common examples are ("conv", "norm", "act") and ("act", "conv", "norm"). Default: ('conv', 'norm', 'act'). """ _abbr_ = 'conv_block' def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias='auto', conv_cfg=None, norm_cfg=None, act_cfg=dict(type='ReLU'), inplace=True, with_spectral_norm=False, padding_mode='zeros', order=('conv', 'norm', 'act')): super(ConvModule, self).__init__() assert conv_cfg is None or isinstance(conv_cfg, dict) assert norm_cfg is None or isinstance(norm_cfg, dict) assert act_cfg is None or isinstance(act_cfg, dict) official_padding_mode = ['zeros', 'circular'] self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.inplace = inplace self.with_spectral_norm = with_spectral_norm self.with_explicit_padding = padding_mode not in official_padding_mode self.order = order assert isinstance(self.order, tuple) and len(self.order) == 3 assert set(order) == set(['conv', 'norm', 'act']) self.with_norm = norm_cfg is not None self.with_activation = act_cfg is not None # if the conv layer is before a norm layer, bias is unnecessary. if bias == 'auto': bias = not self.with_norm self.with_bias = bias if self.with_explicit_padding: pad_cfg = dict(type=padding_mode) self.padding_layer = build_padding_layer(pad_cfg, padding) # reset padding to 0 for conv module conv_padding = 0 if self.with_explicit_padding else padding # build convolution layer self.conv = build_conv_layer( conv_cfg, in_channels, out_channels, kernel_size, stride=stride, padding=conv_padding, dilation=dilation, groups=groups, bias=bias) # export the attributes of self.conv to a higher level for convenience self.in_channels = self.conv.in_channels self.out_channels = self.conv.out_channels self.kernel_size = self.conv.kernel_size self.stride = self.conv.stride self.padding = padding self.dilation = self.conv.dilation self.transposed = self.conv.transposed self.output_padding = self.conv.output_padding self.groups = self.conv.groups if self.with_spectral_norm: self.conv = nn.utils.spectral_norm(self.conv) # build normalization layers if self.with_norm: # norm layer is after conv layer if order.index('norm') > order.index('conv'): norm_channels = out_channels else: norm_channels = in_channels self.norm_name, norm = build_norm_layer(norm_cfg, norm_channels) self.add_module(self.norm_name, norm) if self.with_bias: if isinstance(norm, (_BatchNorm, _InstanceNorm)): warnings.warn( 'Unnecessary conv bias before batch/instance norm') else: self.norm_name = None # build activation layer if self.with_activation: act_cfg_ = act_cfg.copy() # nn.Tanh has no 'inplace' argument if act_cfg_['type'] not in [ 'Tanh', 'PReLU', 'Sigmoid', 'HSigmoid', 'Swish' ]: act_cfg_.setdefault('inplace', inplace) self.activate = build_activation_layer(act_cfg_) # Use msra init by default self.init_weights() @property def norm(self): if self.norm_name: return getattr(self, self.norm_name) else: return None def init_weights(self): # 1. It is mainly for customized conv layers with their own # initialization manners by calling their own ``init_weights()``, # and we do not want ConvModule to override the initialization. # 2. For customized conv layers without their own initialization # manners (that is, they don't have their own ``init_weights()``) # and PyTorch's conv layers, they will be initialized by # this method with default ``kaiming_init``. # Note: For PyTorch's conv layers, they will be overwritten by our # initialization implementation using default ``kaiming_init``. if not hasattr(self.conv, 'init_weights'): if self.with_activation and self.act_cfg['type'] == 'LeakyReLU': nonlinearity = 'leaky_relu' a = self.act_cfg.get('negative_slope', 0.01) else: nonlinearity = 'relu' a = 0 kaiming_init(self.conv, a=a, nonlinearity=nonlinearity) if self.with_norm: constant_init(self.norm, 1, bias=0) def forward(self, x, activate=True, norm=True): for layer in self.order: if layer == 'conv': if self.with_explicit_padding: x = self.padding_layer(x) x = self.conv(x) elif layer == 'norm' and norm and self.with_norm: x = self.norm(x) elif layer == 'act' and activate and self.with_activation: x = self.activate(x) return x ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/conv_ws.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch import torch.nn as nn import torch.nn.functional as F from .registry import CONV_LAYERS def conv_ws_2d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1, eps=1e-5): c_in = weight.size(0) weight_flat = weight.view(c_in, -1) mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1) std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1) weight = (weight - mean) / (std + eps) return F.conv2d(input, weight, bias, stride, padding, dilation, groups) @CONV_LAYERS.register_module('ConvWS') class ConvWS2d(nn.Conv2d): def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, eps=1e-5): super(ConvWS2d, self).__init__( in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias) self.eps = eps def forward(self, x): return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups, self.eps) @CONV_LAYERS.register_module(name='ConvAWS') class ConvAWS2d(nn.Conv2d): """AWS (Adaptive Weight Standardization) This is a variant of Weight Standardization (https://arxiv.org/pdf/1903.10520.pdf) It is used in DetectoRS to avoid NaN (https://arxiv.org/pdf/2006.02334.pdf) Args: in_channels (int): Number of channels in the input image out_channels (int): Number of channels produced by the convolution kernel_size (int or tuple): Size of the conv kernel stride (int or tuple, optional): Stride of the convolution. Default: 1 padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0 dilation (int or tuple, optional): Spacing between kernel elements. Default: 1 groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 bias (bool, optional): If set True, adds a learnable bias to the output. Default: True """ def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True): super().__init__( in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias) self.register_buffer('weight_gamma', torch.ones(self.out_channels, 1, 1, 1)) self.register_buffer('weight_beta', torch.zeros(self.out_channels, 1, 1, 1)) def _get_weight(self, weight): weight_flat = weight.view(weight.size(0), -1) mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1) std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1) weight = (weight - mean) / std weight = self.weight_gamma * weight + self.weight_beta return weight def forward(self, x): weight = self._get_weight(self.weight) return F.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups) def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs): """Override default load function. AWS overrides the function _load_from_state_dict to recover weight_gamma and weight_beta if they are missing. If weight_gamma and weight_beta are found in the checkpoint, this function will return after super()._load_from_state_dict. Otherwise, it will compute the mean and std of the pretrained weights and store them in weight_beta and weight_gamma. """ self.weight_gamma.data.fill_(-1) local_missing_keys = [] super()._load_from_state_dict(state_dict, prefix, local_metadata, strict, local_missing_keys, unexpected_keys, error_msgs) if self.weight_gamma.data.mean() > 0: for k in local_missing_keys: missing_keys.append(k) return weight = self.weight.data weight_flat = weight.view(weight.size(0), -1) mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1) std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1) self.weight_beta.data.copy_(mean) self.weight_gamma.data.copy_(std) missing_gamma_beta = [ k for k in local_missing_keys if k.endswith('weight_gamma') or k.endswith('weight_beta') ] for k in missing_gamma_beta: local_missing_keys.remove(k) for k in local_missing_keys: missing_keys.append(k) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/depthwise_separable_conv_module.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch.nn as nn from .conv_module import ConvModule class DepthwiseSeparableConvModule(nn.Module): """Depthwise separable convolution module. See https://arxiv.org/pdf/1704.04861.pdf for details. This module can replace a ConvModule with the conv block replaced by two conv block: depthwise conv block and pointwise conv block. The depthwise conv block contains depthwise-conv/norm/activation layers. The pointwise conv block contains pointwise-conv/norm/activation layers. It should be noted that there will be norm/activation layer in the depthwise conv block if `norm_cfg` and `act_cfg` are specified. Args: in_channels (int): Number of channels in the input feature map. Same as that in ``nn._ConvNd``. out_channels (int): Number of channels produced by the convolution. Same as that in ``nn._ConvNd``. kernel_size (int | tuple[int]): Size of the convolving kernel. Same as that in ``nn._ConvNd``. stride (int | tuple[int]): Stride of the convolution. Same as that in ``nn._ConvNd``. Default: 1. padding (int | tuple[int]): Zero-padding added to both sides of the input. Same as that in ``nn._ConvNd``. Default: 0. dilation (int | tuple[int]): Spacing between kernel elements. Same as that in ``nn._ConvNd``. Default: 1. norm_cfg (dict): Default norm config for both depthwise ConvModule and pointwise ConvModule. Default: None. act_cfg (dict): Default activation config for both depthwise ConvModule and pointwise ConvModule. Default: dict(type='ReLU'). dw_norm_cfg (dict): Norm config of depthwise ConvModule. If it is 'default', it will be the same as `norm_cfg`. Default: 'default'. dw_act_cfg (dict): Activation config of depthwise ConvModule. If it is 'default', it will be the same as `act_cfg`. Default: 'default'. pw_norm_cfg (dict): Norm config of pointwise ConvModule. If it is 'default', it will be the same as `norm_cfg`. Default: 'default'. pw_act_cfg (dict): Activation config of pointwise ConvModule. If it is 'default', it will be the same as `act_cfg`. Default: 'default'. kwargs (optional): Other shared arguments for depthwise and pointwise ConvModule. See ConvModule for ref. """ def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, norm_cfg=None, act_cfg=dict(type='ReLU'), dw_norm_cfg='default', dw_act_cfg='default', pw_norm_cfg='default', pw_act_cfg='default', **kwargs): super(DepthwiseSeparableConvModule, self).__init__() assert 'groups' not in kwargs, 'groups should not be specified' # if norm/activation config of depthwise/pointwise ConvModule is not # specified, use default config. dw_norm_cfg = dw_norm_cfg if dw_norm_cfg != 'default' else norm_cfg dw_act_cfg = dw_act_cfg if dw_act_cfg != 'default' else act_cfg pw_norm_cfg = pw_norm_cfg if pw_norm_cfg != 'default' else norm_cfg pw_act_cfg = pw_act_cfg if pw_act_cfg != 'default' else act_cfg # depthwise convolution self.depthwise_conv = ConvModule( in_channels, in_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=in_channels, norm_cfg=dw_norm_cfg, act_cfg=dw_act_cfg, **kwargs) self.pointwise_conv = ConvModule( in_channels, out_channels, 1, norm_cfg=pw_norm_cfg, act_cfg=pw_act_cfg, **kwargs) def forward(self, x): x = self.depthwise_conv(x) x = self.pointwise_conv(x) return x ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/drop.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch import torch.nn as nn from mmcv import build_from_cfg from .registry import DROPOUT_LAYERS def drop_path(x, drop_prob=0., training=False): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). We follow the implementation https://github.com/rwightman/pytorch-image-models/blob/a2727c1bf78ba0d7b5727f5f95e37fb7f8866b1f/timm/models/layers/drop.py # noqa: E501 """ if drop_prob == 0. or not training: return x keep_prob = 1 - drop_prob # handle tensors with different dimensions, not just 4D tensors. shape = (x.shape[0], ) + (1, ) * (x.ndim - 1) random_tensor = keep_prob + torch.rand( shape, dtype=x.dtype, device=x.device) output = x.div(keep_prob) * random_tensor.floor() return output @DROPOUT_LAYERS.register_module() class DropPath(nn.Module): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). We follow the implementation https://github.com/rwightman/pytorch-image-models/blob/a2727c1bf78ba0d7b5727f5f95e37fb7f8866b1f/timm/models/layers/drop.py # noqa: E501 Args: drop_prob (float): Probability of the path to be zeroed. Default: 0.1 """ def __init__(self, drop_prob=0.1): super(DropPath, self).__init__() self.drop_prob = drop_prob def forward(self, x): return drop_path(x, self.drop_prob, self.training) @DROPOUT_LAYERS.register_module() class Dropout(nn.Dropout): """A wrapper for ``torch.nn.Dropout``, We rename the ``p`` of ``torch.nn.Dropout`` to ``drop_prob`` so as to be consistent with ``DropPath`` Args: drop_prob (float): Probability of the elements to be zeroed. Default: 0.5. inplace (bool): Do the operation inplace or not. Default: False. """ def __init__(self, drop_prob=0.5, inplace=False): super().__init__(p=drop_prob, inplace=inplace) def build_dropout(cfg, default_args=None): """Builder for drop out layers.""" return build_from_cfg(cfg, DROPOUT_LAYERS, default_args) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/generalized_attention.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import math import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from ..utils import kaiming_init from .registry import PLUGIN_LAYERS @PLUGIN_LAYERS.register_module() class GeneralizedAttention(nn.Module): """GeneralizedAttention module. See 'An Empirical Study of Spatial Attention Mechanisms in Deep Networks' (https://arxiv.org/abs/1711.07971) for details. Args: in_channels (int): Channels of the input feature map. spatial_range (int): The spatial range. -1 indicates no spatial range constraint. Default: -1. num_heads (int): The head number of empirical_attention module. Default: 9. position_embedding_dim (int): The position embedding dimension. Default: -1. position_magnitude (int): A multiplier acting on coord difference. Default: 1. kv_stride (int): The feature stride acting on key/value feature map. Default: 2. q_stride (int): The feature stride acting on query feature map. Default: 1. attention_type (str): A binary indicator string for indicating which items in generalized empirical_attention module are used. Default: '1111'. - '1000' indicates 'query and key content' (appr - appr) item, - '0100' indicates 'query content and relative position' (appr - position) item, - '0010' indicates 'key content only' (bias - appr) item, - '0001' indicates 'relative position only' (bias - position) item. """ _abbr_ = 'gen_attention_block' def __init__(self, in_channels, spatial_range=-1, num_heads=9, position_embedding_dim=-1, position_magnitude=1, kv_stride=2, q_stride=1, attention_type='1111'): super(GeneralizedAttention, self).__init__() # hard range means local range for non-local operation self.position_embedding_dim = ( position_embedding_dim if position_embedding_dim > 0 else in_channels) self.position_magnitude = position_magnitude self.num_heads = num_heads self.in_channels = in_channels self.spatial_range = spatial_range self.kv_stride = kv_stride self.q_stride = q_stride self.attention_type = [bool(int(_)) for _ in attention_type] self.qk_embed_dim = in_channels // num_heads out_c = self.qk_embed_dim * num_heads if self.attention_type[0] or self.attention_type[1]: self.query_conv = nn.Conv2d( in_channels=in_channels, out_channels=out_c, kernel_size=1, bias=False) self.query_conv.kaiming_init = True if self.attention_type[0] or self.attention_type[2]: self.key_conv = nn.Conv2d( in_channels=in_channels, out_channels=out_c, kernel_size=1, bias=False) self.key_conv.kaiming_init = True self.v_dim = in_channels // num_heads self.value_conv = nn.Conv2d( in_channels=in_channels, out_channels=self.v_dim * num_heads, kernel_size=1, bias=False) self.value_conv.kaiming_init = True if self.attention_type[1] or self.attention_type[3]: self.appr_geom_fc_x = nn.Linear( self.position_embedding_dim // 2, out_c, bias=False) self.appr_geom_fc_x.kaiming_init = True self.appr_geom_fc_y = nn.Linear( self.position_embedding_dim // 2, out_c, bias=False) self.appr_geom_fc_y.kaiming_init = True if self.attention_type[2]: stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2) appr_bias_value = -2 * stdv * torch.rand(out_c) + stdv self.appr_bias = nn.Parameter(appr_bias_value) if self.attention_type[3]: stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2) geom_bias_value = -2 * stdv * torch.rand(out_c) + stdv self.geom_bias = nn.Parameter(geom_bias_value) self.proj_conv = nn.Conv2d( in_channels=self.v_dim * num_heads, out_channels=in_channels, kernel_size=1, bias=True) self.proj_conv.kaiming_init = True self.gamma = nn.Parameter(torch.zeros(1)) if self.spatial_range >= 0: # only works when non local is after 3*3 conv if in_channels == 256: max_len = 84 elif in_channels == 512: max_len = 42 max_len_kv = int((max_len - 1.0) / self.kv_stride + 1) local_constraint_map = np.ones( (max_len, max_len, max_len_kv, max_len_kv), dtype=int) for iy in range(max_len): for ix in range(max_len): local_constraint_map[ iy, ix, max((iy - self.spatial_range) // self.kv_stride, 0):min((iy + self.spatial_range + 1) // self.kv_stride + 1, max_len), max((ix - self.spatial_range) // self.kv_stride, 0):min((ix + self.spatial_range + 1) // self.kv_stride + 1, max_len)] = 0 self.local_constraint_map = nn.Parameter( torch.from_numpy(local_constraint_map).byte(), requires_grad=False) if self.q_stride > 1: self.q_downsample = nn.AvgPool2d( kernel_size=1, stride=self.q_stride) else: self.q_downsample = None if self.kv_stride > 1: self.kv_downsample = nn.AvgPool2d( kernel_size=1, stride=self.kv_stride) else: self.kv_downsample = None self.init_weights() def get_position_embedding(self, h, w, h_kv, w_kv, q_stride, kv_stride, device, dtype, feat_dim, wave_length=1000): # the default type of Tensor is float32, leading to type mismatch # in fp16 mode. Cast it to support fp16 mode. h_idxs = torch.linspace(0, h - 1, h).to(device=device, dtype=dtype) h_idxs = h_idxs.view((h, 1)) * q_stride w_idxs = torch.linspace(0, w - 1, w).to(device=device, dtype=dtype) w_idxs = w_idxs.view((w, 1)) * q_stride h_kv_idxs = torch.linspace(0, h_kv - 1, h_kv).to( device=device, dtype=dtype) h_kv_idxs = h_kv_idxs.view((h_kv, 1)) * kv_stride w_kv_idxs = torch.linspace(0, w_kv - 1, w_kv).to( device=device, dtype=dtype) w_kv_idxs = w_kv_idxs.view((w_kv, 1)) * kv_stride # (h, h_kv, 1) h_diff = h_idxs.unsqueeze(1) - h_kv_idxs.unsqueeze(0) h_diff *= self.position_magnitude # (w, w_kv, 1) w_diff = w_idxs.unsqueeze(1) - w_kv_idxs.unsqueeze(0) w_diff *= self.position_magnitude feat_range = torch.arange(0, feat_dim / 4).to( device=device, dtype=dtype) dim_mat = torch.Tensor([wave_length]).to(device=device, dtype=dtype) dim_mat = dim_mat**((4. / feat_dim) * feat_range) dim_mat = dim_mat.view((1, 1, -1)) embedding_x = torch.cat( ((w_diff / dim_mat).sin(), (w_diff / dim_mat).cos()), dim=2) embedding_y = torch.cat( ((h_diff / dim_mat).sin(), (h_diff / dim_mat).cos()), dim=2) return embedding_x, embedding_y def forward(self, x_input): num_heads = self.num_heads # use empirical_attention if self.q_downsample is not None: x_q = self.q_downsample(x_input) else: x_q = x_input n, _, h, w = x_q.shape if self.kv_downsample is not None: x_kv = self.kv_downsample(x_input) else: x_kv = x_input _, _, h_kv, w_kv = x_kv.shape if self.attention_type[0] or self.attention_type[1]: proj_query = self.query_conv(x_q).view( (n, num_heads, self.qk_embed_dim, h * w)) proj_query = proj_query.permute(0, 1, 3, 2) if self.attention_type[0] or self.attention_type[2]: proj_key = self.key_conv(x_kv).view( (n, num_heads, self.qk_embed_dim, h_kv * w_kv)) if self.attention_type[1] or self.attention_type[3]: position_embed_x, position_embed_y = self.get_position_embedding( h, w, h_kv, w_kv, self.q_stride, self.kv_stride, x_input.device, x_input.dtype, self.position_embedding_dim) # (n, num_heads, w, w_kv, dim) position_feat_x = self.appr_geom_fc_x(position_embed_x).\ view(1, w, w_kv, num_heads, self.qk_embed_dim).\ permute(0, 3, 1, 2, 4).\ repeat(n, 1, 1, 1, 1) # (n, num_heads, h, h_kv, dim) position_feat_y = self.appr_geom_fc_y(position_embed_y).\ view(1, h, h_kv, num_heads, self.qk_embed_dim).\ permute(0, 3, 1, 2, 4).\ repeat(n, 1, 1, 1, 1) position_feat_x /= math.sqrt(2) position_feat_y /= math.sqrt(2) # accelerate for saliency only if (np.sum(self.attention_type) == 1) and self.attention_type[2]: appr_bias = self.appr_bias.\ view(1, num_heads, 1, self.qk_embed_dim).\ repeat(n, 1, 1, 1) energy = torch.matmul(appr_bias, proj_key).\ view(n, num_heads, 1, h_kv * w_kv) h = 1 w = 1 else: # (n, num_heads, h*w, h_kv*w_kv), query before key, 540mb for if not self.attention_type[0]: energy = torch.zeros( n, num_heads, h, w, h_kv, w_kv, dtype=x_input.dtype, device=x_input.device) # attention_type[0]: appr - appr # attention_type[1]: appr - position # attention_type[2]: bias - appr # attention_type[3]: bias - position if self.attention_type[0] or self.attention_type[2]: if self.attention_type[0] and self.attention_type[2]: appr_bias = self.appr_bias.\ view(1, num_heads, 1, self.qk_embed_dim) energy = torch.matmul(proj_query + appr_bias, proj_key).\ view(n, num_heads, h, w, h_kv, w_kv) elif self.attention_type[0]: energy = torch.matmul(proj_query, proj_key).\ view(n, num_heads, h, w, h_kv, w_kv) elif self.attention_type[2]: appr_bias = self.appr_bias.\ view(1, num_heads, 1, self.qk_embed_dim).\ repeat(n, 1, 1, 1) energy += torch.matmul(appr_bias, proj_key).\ view(n, num_heads, 1, 1, h_kv, w_kv) if self.attention_type[1] or self.attention_type[3]: if self.attention_type[1] and self.attention_type[3]: geom_bias = self.geom_bias.\ view(1, num_heads, 1, self.qk_embed_dim) proj_query_reshape = (proj_query + geom_bias).\ view(n, num_heads, h, w, self.qk_embed_dim) energy_x = torch.matmul( proj_query_reshape.permute(0, 1, 3, 2, 4), position_feat_x.permute(0, 1, 2, 4, 3)) energy_x = energy_x.\ permute(0, 1, 3, 2, 4).unsqueeze(4) energy_y = torch.matmul( proj_query_reshape, position_feat_y.permute(0, 1, 2, 4, 3)) energy_y = energy_y.unsqueeze(5) energy += energy_x + energy_y elif self.attention_type[1]: proj_query_reshape = proj_query.\ view(n, num_heads, h, w, self.qk_embed_dim) proj_query_reshape = proj_query_reshape.\ permute(0, 1, 3, 2, 4) position_feat_x_reshape = position_feat_x.\ permute(0, 1, 2, 4, 3) position_feat_y_reshape = position_feat_y.\ permute(0, 1, 2, 4, 3) energy_x = torch.matmul(proj_query_reshape, position_feat_x_reshape) energy_x = energy_x.permute(0, 1, 3, 2, 4).unsqueeze(4) energy_y = torch.matmul(proj_query_reshape, position_feat_y_reshape) energy_y = energy_y.unsqueeze(5) energy += energy_x + energy_y elif self.attention_type[3]: geom_bias = self.geom_bias.\ view(1, num_heads, self.qk_embed_dim, 1).\ repeat(n, 1, 1, 1) position_feat_x_reshape = position_feat_x.\ view(n, num_heads, w*w_kv, self.qk_embed_dim) position_feat_y_reshape = position_feat_y.\ view(n, num_heads, h * h_kv, self.qk_embed_dim) energy_x = torch.matmul(position_feat_x_reshape, geom_bias) energy_x = energy_x.view(n, num_heads, 1, w, 1, w_kv) energy_y = torch.matmul(position_feat_y_reshape, geom_bias) energy_y = energy_y.view(n, num_heads, h, 1, h_kv, 1) energy += energy_x + energy_y energy = energy.view(n, num_heads, h * w, h_kv * w_kv) if self.spatial_range >= 0: cur_local_constraint_map = \ self.local_constraint_map[:h, :w, :h_kv, :w_kv].\ contiguous().\ view(1, 1, h*w, h_kv*w_kv) energy = energy.masked_fill_(cur_local_constraint_map, float('-inf')) attention = F.softmax(energy, 3) proj_value = self.value_conv(x_kv) proj_value_reshape = proj_value.\ view((n, num_heads, self.v_dim, h_kv * w_kv)).\ permute(0, 1, 3, 2) out = torch.matmul(attention, proj_value_reshape).\ permute(0, 1, 3, 2).\ contiguous().\ view(n, self.v_dim * self.num_heads, h, w) out = self.proj_conv(out) # output is downsampled, upsample back to input size if self.q_downsample is not None: out = F.interpolate( out, size=x_input.shape[2:], mode='bilinear', align_corners=False) out = self.gamma * out + x_input return out def init_weights(self): for m in self.modules(): if hasattr(m, 'kaiming_init') and m.kaiming_init: kaiming_init( m, mode='fan_in', nonlinearity='leaky_relu', bias=0, distribution='uniform', a=1) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/hsigmoid.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import warnings import torch.nn as nn from .registry import ACTIVATION_LAYERS @ACTIVATION_LAYERS.register_module() class HSigmoid(nn.Module): """Hard Sigmoid Module. Apply the hard sigmoid function: Hsigmoid(x) = min(max((x + bias) / divisor, min_value), max_value) Default: Hsigmoid(x) = min(max((x + 3) / 6, 0), 1) Note: In MMCV v1.4.4, we modified the default value of args to align with PyTorch official. Args: bias (float): Bias of the input feature map. Default: 3.0. divisor (float): Divisor of the input feature map. Default: 6.0. min_value (float): Lower bound value. Default: 0.0. max_value (float): Upper bound value. Default: 1.0. Returns: Tensor: The output tensor. """ def __init__(self, bias=3.0, divisor=6.0, min_value=0.0, max_value=1.0): super(HSigmoid, self).__init__() warnings.warn( 'In MMCV v1.4.4, we modified the default value of args to align ' 'with PyTorch official. Previous Implementation: ' 'Hsigmoid(x) = min(max((x + 1) / 2, 0), 1). ' 'Current Implementation: ' 'Hsigmoid(x) = min(max((x + 3) / 6, 0), 1).') self.bias = bias self.divisor = divisor assert self.divisor != 0 self.min_value = min_value self.max_value = max_value def forward(self, x): x = (x + self.bias) / self.divisor return x.clamp_(self.min_value, self.max_value) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/hswish.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch.nn as nn from .registry import ACTIVATION_LAYERS @ACTIVATION_LAYERS.register_module() class HSwish(nn.Module): """Hard Swish Module. This module applies the hard swish function: .. math:: Hswish(x) = x * ReLU6(x + 3) / 6 Args: inplace (bool): can optionally do the operation in-place. Default: False. Returns: Tensor: The output tensor. """ def __init__(self, inplace=False): super(HSwish, self).__init__() self.act = nn.ReLU6(inplace) def forward(self, x): return x * self.act(x + 3) / 6 ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/non_local.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from abc import ABCMeta import torch import torch.nn as nn from ..utils import constant_init, normal_init from .conv_module import ConvModule from .registry import PLUGIN_LAYERS class _NonLocalNd(nn.Module, metaclass=ABCMeta): """Basic Non-local module. This module is proposed in "Non-local Neural Networks" Paper reference: https://arxiv.org/abs/1711.07971 Code reference: https://github.com/AlexHex7/Non-local_pytorch Args: in_channels (int): Channels of the input feature map. reduction (int): Channel reduction ratio. Default: 2. use_scale (bool): Whether to scale pairwise_weight by `1/sqrt(inter_channels)` when the mode is `embedded_gaussian`. Default: True. conv_cfg (None | dict): The config dict for convolution layers. If not specified, it will use `nn.Conv2d` for convolution layers. Default: None. norm_cfg (None | dict): The config dict for normalization layers. Default: None. (This parameter is only applicable to conv_out.) mode (str): Options are `gaussian`, `concatenation`, `embedded_gaussian` and `dot_product`. Default: embedded_gaussian. """ def __init__(self, in_channels, reduction=2, use_scale=True, conv_cfg=None, norm_cfg=None, mode='embedded_gaussian', **kwargs): super(_NonLocalNd, self).__init__() self.in_channels = in_channels self.reduction = reduction self.use_scale = use_scale self.inter_channels = max(in_channels // reduction, 1) self.mode = mode if mode not in [ 'gaussian', 'embedded_gaussian', 'dot_product', 'concatenation' ]: raise ValueError("Mode should be in 'gaussian', 'concatenation', " f"'embedded_gaussian' or 'dot_product', but got " f'{mode} instead.') # g, theta, phi are defaulted as `nn.ConvNd`. # Here we use ConvModule for potential usage. self.g = ConvModule( self.in_channels, self.inter_channels, kernel_size=1, conv_cfg=conv_cfg, act_cfg=None) self.conv_out = ConvModule( self.inter_channels, self.in_channels, kernel_size=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None) if self.mode != 'gaussian': self.theta = ConvModule( self.in_channels, self.inter_channels, kernel_size=1, conv_cfg=conv_cfg, act_cfg=None) self.phi = ConvModule( self.in_channels, self.inter_channels, kernel_size=1, conv_cfg=conv_cfg, act_cfg=None) if self.mode == 'concatenation': self.concat_project = ConvModule( self.inter_channels * 2, 1, kernel_size=1, stride=1, padding=0, bias=False, act_cfg=dict(type='ReLU')) self.init_weights(**kwargs) def init_weights(self, std=0.01, zeros_init=True): if self.mode != 'gaussian': for m in [self.g, self.theta, self.phi]: normal_init(m.conv, std=std) else: normal_init(self.g.conv, std=std) if zeros_init: if self.conv_out.norm_cfg is None: constant_init(self.conv_out.conv, 0) else: constant_init(self.conv_out.norm, 0) else: if self.conv_out.norm_cfg is None: normal_init(self.conv_out.conv, std=std) else: normal_init(self.conv_out.norm, std=std) def gaussian(self, theta_x, phi_x): # NonLocal1d pairwise_weight: [N, H, H] # NonLocal2d pairwise_weight: [N, HxW, HxW] # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] pairwise_weight = torch.matmul(theta_x, phi_x) pairwise_weight = pairwise_weight.softmax(dim=-1) return pairwise_weight def embedded_gaussian(self, theta_x, phi_x): # NonLocal1d pairwise_weight: [N, H, H] # NonLocal2d pairwise_weight: [N, HxW, HxW] # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] pairwise_weight = torch.matmul(theta_x, phi_x) if self.use_scale: # theta_x.shape[-1] is `self.inter_channels` pairwise_weight /= theta_x.shape[-1]**0.5 pairwise_weight = pairwise_weight.softmax(dim=-1) return pairwise_weight def dot_product(self, theta_x, phi_x): # NonLocal1d pairwise_weight: [N, H, H] # NonLocal2d pairwise_weight: [N, HxW, HxW] # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] pairwise_weight = torch.matmul(theta_x, phi_x) pairwise_weight /= pairwise_weight.shape[-1] return pairwise_weight def concatenation(self, theta_x, phi_x): # NonLocal1d pairwise_weight: [N, H, H] # NonLocal2d pairwise_weight: [N, HxW, HxW] # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] h = theta_x.size(2) w = phi_x.size(3) theta_x = theta_x.repeat(1, 1, 1, w) phi_x = phi_x.repeat(1, 1, h, 1) concat_feature = torch.cat([theta_x, phi_x], dim=1) pairwise_weight = self.concat_project(concat_feature) n, _, h, w = pairwise_weight.size() pairwise_weight = pairwise_weight.view(n, h, w) pairwise_weight /= pairwise_weight.shape[-1] return pairwise_weight def forward(self, x): # Assume `reduction = 1`, then `inter_channels = C` # or `inter_channels = C` when `mode="gaussian"` # NonLocal1d x: [N, C, H] # NonLocal2d x: [N, C, H, W] # NonLocal3d x: [N, C, T, H, W] n = x.size(0) # NonLocal1d g_x: [N, H, C] # NonLocal2d g_x: [N, HxW, C] # NonLocal3d g_x: [N, TxHxW, C] g_x = self.g(x).view(n, self.inter_channels, -1) g_x = g_x.permute(0, 2, 1) # NonLocal1d theta_x: [N, H, C], phi_x: [N, C, H] # NonLocal2d theta_x: [N, HxW, C], phi_x: [N, C, HxW] # NonLocal3d theta_x: [N, TxHxW, C], phi_x: [N, C, TxHxW] if self.mode == 'gaussian': theta_x = x.view(n, self.in_channels, -1) theta_x = theta_x.permute(0, 2, 1) if self.sub_sample: phi_x = self.phi(x).view(n, self.in_channels, -1) else: phi_x = x.view(n, self.in_channels, -1) elif self.mode == 'concatenation': theta_x = self.theta(x).view(n, self.inter_channels, -1, 1) phi_x = self.phi(x).view(n, self.inter_channels, 1, -1) else: theta_x = self.theta(x).view(n, self.inter_channels, -1) theta_x = theta_x.permute(0, 2, 1) phi_x = self.phi(x).view(n, self.inter_channels, -1) pairwise_func = getattr(self, self.mode) # NonLocal1d pairwise_weight: [N, H, H] # NonLocal2d pairwise_weight: [N, HxW, HxW] # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] pairwise_weight = pairwise_func(theta_x, phi_x) # NonLocal1d y: [N, H, C] # NonLocal2d y: [N, HxW, C] # NonLocal3d y: [N, TxHxW, C] y = torch.matmul(pairwise_weight, g_x) # NonLocal1d y: [N, C, H] # NonLocal2d y: [N, C, H, W] # NonLocal3d y: [N, C, T, H, W] y = y.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels, *x.size()[2:]) output = x + self.conv_out(y) return output class NonLocal1d(_NonLocalNd): """1D Non-local module. Args: in_channels (int): Same as `NonLocalND`. sub_sample (bool): Whether to apply max pooling after pairwise function (Note that the `sub_sample` is applied on spatial only). Default: False. conv_cfg (None | dict): Same as `NonLocalND`. Default: dict(type='Conv1d'). """ def __init__(self, in_channels, sub_sample=False, conv_cfg=dict(type='Conv1d'), **kwargs): super(NonLocal1d, self).__init__( in_channels, conv_cfg=conv_cfg, **kwargs) self.sub_sample = sub_sample if sub_sample: max_pool_layer = nn.MaxPool1d(kernel_size=2) self.g = nn.Sequential(self.g, max_pool_layer) if self.mode != 'gaussian': self.phi = nn.Sequential(self.phi, max_pool_layer) else: self.phi = max_pool_layer @PLUGIN_LAYERS.register_module() class NonLocal2d(_NonLocalNd): """2D Non-local module. Args: in_channels (int): Same as `NonLocalND`. sub_sample (bool): Whether to apply max pooling after pairwise function (Note that the `sub_sample` is applied on spatial only). Default: False. conv_cfg (None | dict): Same as `NonLocalND`. Default: dict(type='Conv2d'). """ _abbr_ = 'nonlocal_block' def __init__(self, in_channels, sub_sample=False, conv_cfg=dict(type='Conv2d'), **kwargs): super(NonLocal2d, self).__init__( in_channels, conv_cfg=conv_cfg, **kwargs) self.sub_sample = sub_sample if sub_sample: max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2)) self.g = nn.Sequential(self.g, max_pool_layer) if self.mode != 'gaussian': self.phi = nn.Sequential(self.phi, max_pool_layer) else: self.phi = max_pool_layer class NonLocal3d(_NonLocalNd): """3D Non-local module. Args: in_channels (int): Same as `NonLocalND`. sub_sample (bool): Whether to apply max pooling after pairwise function (Note that the `sub_sample` is applied on spatial only). Default: False. conv_cfg (None | dict): Same as `NonLocalND`. Default: dict(type='Conv3d'). """ def __init__(self, in_channels, sub_sample=False, conv_cfg=dict(type='Conv3d'), **kwargs): super(NonLocal3d, self).__init__( in_channels, conv_cfg=conv_cfg, **kwargs) self.sub_sample = sub_sample if sub_sample: max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2)) self.g = nn.Sequential(self.g, max_pool_layer) if self.mode != 'gaussian': self.phi = nn.Sequential(self.phi, max_pool_layer) else: self.phi = max_pool_layer ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/norm.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import inspect import torch.nn as nn from mmcv.utils import is_tuple_of from mmcv.utils.parrots_wrapper import SyncBatchNorm, _BatchNorm, _InstanceNorm from .registry import NORM_LAYERS NORM_LAYERS.register_module('BN', module=nn.BatchNorm2d) NORM_LAYERS.register_module('BN1d', module=nn.BatchNorm1d) NORM_LAYERS.register_module('BN2d', module=nn.BatchNorm2d) NORM_LAYERS.register_module('BN3d', module=nn.BatchNorm3d) NORM_LAYERS.register_module('SyncBN', module=SyncBatchNorm) NORM_LAYERS.register_module('GN', module=nn.GroupNorm) NORM_LAYERS.register_module('LN', module=nn.LayerNorm) NORM_LAYERS.register_module('IN', module=nn.InstanceNorm2d) NORM_LAYERS.register_module('IN1d', module=nn.InstanceNorm1d) NORM_LAYERS.register_module('IN2d', module=nn.InstanceNorm2d) NORM_LAYERS.register_module('IN3d', module=nn.InstanceNorm3d) def infer_abbr(class_type): """Infer abbreviation from the class name. When we build a norm layer with `build_norm_layer()`, we want to preserve the norm type in variable names, e.g, self.bn1, self.gn. This method will infer the abbreviation to map class types to abbreviations. Rule 1: If the class has the property "_abbr_", return the property. Rule 2: If the parent class is _BatchNorm, GroupNorm, LayerNorm or InstanceNorm, the abbreviation of this layer will be "bn", "gn", "ln" and "in" respectively. Rule 3: If the class name contains "batch", "group", "layer" or "instance", the abbreviation of this layer will be "bn", "gn", "ln" and "in" respectively. Rule 4: Otherwise, the abbreviation falls back to "norm". Args: class_type (type): The norm layer type. Returns: str: The inferred abbreviation. """ if not inspect.isclass(class_type): raise TypeError( f'class_type must be a type, but got {type(class_type)}') if hasattr(class_type, '_abbr_'): return class_type._abbr_ if issubclass(class_type, _InstanceNorm): # IN is a subclass of BN return 'in' elif issubclass(class_type, _BatchNorm): return 'bn' elif issubclass(class_type, nn.GroupNorm): return 'gn' elif issubclass(class_type, nn.LayerNorm): return 'ln' else: class_name = class_type.__name__.lower() if 'batch' in class_name: return 'bn' elif 'group' in class_name: return 'gn' elif 'layer' in class_name: return 'ln' elif 'instance' in class_name: return 'in' else: return 'norm_layer' def build_norm_layer(cfg, num_features, postfix=''): """Build normalization layer. Args: cfg (dict): The norm layer config, which should contain: - type (str): Layer type. - layer args: Args needed to instantiate a norm layer. - requires_grad (bool, optional): Whether stop gradient updates. num_features (int): Number of input channels. postfix (int | str): The postfix to be appended into norm abbreviation to create named layer. Returns: tuple[str, nn.Module]: The first element is the layer name consisting of abbreviation and postfix, e.g., bn1, gn. The second element is the created norm layer. """ if not isinstance(cfg, dict): raise TypeError('cfg must be a dict') if 'type' not in cfg: raise KeyError('the cfg dict must contain the key "type"') cfg_ = cfg.copy() layer_type = cfg_.pop('type') if layer_type not in NORM_LAYERS: raise KeyError(f'Unrecognized norm type {layer_type}') norm_layer = NORM_LAYERS.get(layer_type) abbr = infer_abbr(norm_layer) assert isinstance(postfix, (int, str)) name = abbr + str(postfix) requires_grad = cfg_.pop('requires_grad', True) cfg_.setdefault('eps', 1e-5) if layer_type != 'GN': layer = norm_layer(num_features, **cfg_) if layer_type == 'SyncBN' and hasattr(layer, '_specify_ddp_gpu_num'): layer._specify_ddp_gpu_num(1) else: assert 'num_groups' in cfg_ layer = norm_layer(num_channels=num_features, **cfg_) for param in layer.parameters(): param.requires_grad = requires_grad return name, layer def is_norm(layer, exclude=None): """Check if a layer is a normalization layer. Args: layer (nn.Module): The layer to be checked. exclude (type | tuple[type]): Types to be excluded. Returns: bool: Whether the layer is a norm layer. """ if exclude is not None: if not isinstance(exclude, tuple): exclude = (exclude, ) if not is_tuple_of(exclude, type): raise TypeError( f'"exclude" must be either None or type or a tuple of types, ' f'but got {type(exclude)}: {exclude}') if exclude and isinstance(layer, exclude): return False all_norm_bases = (_BatchNorm, _InstanceNorm, nn.GroupNorm, nn.LayerNorm) return isinstance(layer, all_norm_bases) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/padding.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch.nn as nn from .registry import PADDING_LAYERS PADDING_LAYERS.register_module('zero', module=nn.ZeroPad2d) PADDING_LAYERS.register_module('reflect', module=nn.ReflectionPad2d) PADDING_LAYERS.register_module('replicate', module=nn.ReplicationPad2d) def build_padding_layer(cfg, *args, **kwargs): """Build padding layer. Args: cfg (None or dict): The padding layer config, which should contain: - type (str): Layer type. - layer args: Args needed to instantiate a padding layer. Returns: nn.Module: Created padding layer. """ if not isinstance(cfg, dict): raise TypeError('cfg must be a dict') if 'type' not in cfg: raise KeyError('the cfg dict must contain the key "type"') cfg_ = cfg.copy() padding_type = cfg_.pop('type') if padding_type not in PADDING_LAYERS: raise KeyError(f'Unrecognized padding type {padding_type}.') else: padding_layer = PADDING_LAYERS.get(padding_type) layer = padding_layer(*args, **kwargs, **cfg_) return layer ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/plugin.py ================================================ import inspect import platform from .registry import PLUGIN_LAYERS if platform.system() == 'Windows': import regex as re else: import re def infer_abbr(class_type): """Infer abbreviation from the class name. This method will infer the abbreviation to map class types to abbreviations. Rule 1: If the class has the property "abbr", return the property. Rule 2: Otherwise, the abbreviation falls back to snake case of class name, e.g. the abbreviation of ``FancyBlock`` will be ``fancy_block``. Args: class_type (type): The norm layer type. Returns: str: The inferred abbreviation. """ def camel2snack(word): """Convert camel case word into snack case. Modified from `inflection lib `_. Example:: >>> camel2snack("FancyBlock") 'fancy_block' """ word = re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1_\2', word) word = re.sub(r'([a-z\d])([A-Z])', r'\1_\2', word) word = word.replace('-', '_') return word.lower() if not inspect.isclass(class_type): raise TypeError( f'class_type must be a type, but got {type(class_type)}') if hasattr(class_type, '_abbr_'): return class_type._abbr_ else: return camel2snack(class_type.__name__) def build_plugin_layer(cfg, postfix='', **kwargs): """Build plugin layer. Args: cfg (None or dict): cfg should contain: - type (str): identify plugin layer type. - layer args: args needed to instantiate a plugin layer. postfix (int, str): appended into norm abbreviation to create named layer. Default: ''. Returns: tuple[str, nn.Module]: The first one is the concatenation of abbreviation and postfix. The second is the created plugin layer. """ if not isinstance(cfg, dict): raise TypeError('cfg must be a dict') if 'type' not in cfg: raise KeyError('the cfg dict must contain the key "type"') cfg_ = cfg.copy() layer_type = cfg_.pop('type') if layer_type not in PLUGIN_LAYERS: raise KeyError(f'Unrecognized plugin type {layer_type}') plugin_layer = PLUGIN_LAYERS.get(layer_type) abbr = infer_abbr(plugin_layer) assert isinstance(postfix, (int, str)) name = abbr + str(postfix) layer = plugin_layer(**kwargs, **cfg_) return name, layer ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/registry.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from mmcv.utils import Registry CONV_LAYERS = Registry('conv layer') NORM_LAYERS = Registry('norm layer') ACTIVATION_LAYERS = Registry('activation layer') PADDING_LAYERS = Registry('padding layer') UPSAMPLE_LAYERS = Registry('upsample layer') PLUGIN_LAYERS = Registry('plugin layer') DROPOUT_LAYERS = Registry('drop out layers') POSITIONAL_ENCODING = Registry('position encoding') ATTENTION = Registry('attention') FEEDFORWARD_NETWORK = Registry('feed-forward Network') TRANSFORMER_LAYER = Registry('transformerLayer') TRANSFORMER_LAYER_SEQUENCE = Registry('transformer-layers sequence') ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/scale.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch import torch.nn as nn class Scale(nn.Module): """A learnable scale parameter. This layer scales the input by a learnable factor. It multiplies a learnable scale parameter of shape (1,) with input of any shape. Args: scale (float): Initial value of scale factor. Default: 1.0 """ def __init__(self, scale=1.0): super(Scale, self).__init__() self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float)) def forward(self, x): return x * self.scale ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/swish.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch import torch.nn as nn from .registry import ACTIVATION_LAYERS @ACTIVATION_LAYERS.register_module() class Swish(nn.Module): """Swish Module. This module applies the swish function: .. math:: Swish(x) = x * Sigmoid(x) Returns: Tensor: The output tensor. """ def __init__(self): super(Swish, self).__init__() def forward(self, x): return x * torch.sigmoid(x) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/transformer.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import copy import math import warnings from typing import Sequence import torch import torch.nn as nn import torch.nn.functional as F from mmcv.cnn import (Linear, build_activation_layer, build_conv_layer, build_norm_layer) from mmcv.runner.base_module import BaseModule, ModuleList, Sequential from mmcv.utils import (ConfigDict, build_from_cfg, deprecated_api_warning, to_2tuple) from .drop import build_dropout from .registry import (ATTENTION, FEEDFORWARD_NETWORK, POSITIONAL_ENCODING, TRANSFORMER_LAYER, TRANSFORMER_LAYER_SEQUENCE) # Avoid BC-breaking of importing MultiScaleDeformableAttention from this file try: from mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention # noqa F401 warnings.warn( ImportWarning( '``MultiScaleDeformableAttention`` has been moved to ' '``mmcv.ops.multi_scale_deform_attn``, please change original path ' # noqa E501 '``from mmcv.cnn.bricks.transformer import MultiScaleDeformableAttention`` ' # noqa E501 'to ``from mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention`` ' # noqa E501 )) except ImportError: warnings.warn('Fail to import ``MultiScaleDeformableAttention`` from ' '``mmcv.ops.multi_scale_deform_attn``, ' 'You should install ``mmcv-full`` if you need this module. ') def build_positional_encoding(cfg, default_args=None): """Builder for Position Encoding.""" return build_from_cfg(cfg, POSITIONAL_ENCODING, default_args) def build_attention(cfg, default_args=None): """Builder for attention.""" return build_from_cfg(cfg, ATTENTION, default_args) def build_feedforward_network(cfg, default_args=None): """Builder for feed-forward network (FFN).""" return build_from_cfg(cfg, FEEDFORWARD_NETWORK, default_args) def build_transformer_layer(cfg, default_args=None): """Builder for transformer layer.""" return build_from_cfg(cfg, TRANSFORMER_LAYER, default_args) def build_transformer_layer_sequence(cfg, default_args=None): """Builder for transformer encoder and transformer decoder.""" return build_from_cfg(cfg, TRANSFORMER_LAYER_SEQUENCE, default_args) class AdaptivePadding(nn.Module): """Applies padding adaptively to the input. This module can make input get fully covered by filter you specified. It support two modes "same" and "corner". The "same" mode is same with "SAME" padding mode in TensorFlow, pad zero around input. The "corner" mode would pad zero to bottom right. Args: kernel_size (int | tuple): Size of the kernel. Default: 1. stride (int | tuple): Stride of the filter. Default: 1. dilation (int | tuple): Spacing between kernel elements. Default: 1. padding (str): Support "same" and "corner", "corner" mode would pad zero to bottom right, and "same" mode would pad zero around input. Default: "corner". Example: >>> kernel_size = 16 >>> stride = 16 >>> dilation = 1 >>> input = torch.rand(1, 1, 15, 17) >>> adap_pad = AdaptivePadding( >>> kernel_size=kernel_size, >>> stride=stride, >>> dilation=dilation, >>> padding="corner") >>> out = adap_pad(input) >>> assert (out.shape[2], out.shape[3]) == (16, 32) >>> input = torch.rand(1, 1, 16, 17) >>> out = adap_pad(input) >>> assert (out.shape[2], out.shape[3]) == (16, 32) """ def __init__(self, kernel_size=1, stride=1, dilation=1, padding='corner'): super(AdaptivePadding, self).__init__() assert padding in ('same', 'corner') kernel_size = to_2tuple(kernel_size) stride = to_2tuple(stride) dilation = to_2tuple(dilation) self.padding = padding self.kernel_size = kernel_size self.stride = stride self.dilation = dilation def get_pad_shape(self, input_shape): """Calculate the padding size of input. Args: input_shape (:obj:`torch.Size`): arrange as (H, W). Returns: Tuple[int]: The padding size along the original H and W directions """ input_h, input_w = input_shape kernel_h, kernel_w = self.kernel_size stride_h, stride_w = self.stride output_h = math.ceil(input_h / stride_h) output_w = math.ceil(input_w / stride_w) pad_h = max((output_h - 1) * stride_h + (kernel_h - 1) * self.dilation[0] + 1 - input_h, 0) pad_w = max((output_w - 1) * stride_w + (kernel_w - 1) * self.dilation[1] + 1 - input_w, 0) return pad_h, pad_w def forward(self, x): """Add padding to `x` Args: x (Tensor): Input tensor has shape (B, C, H, W). Returns: Tensor: The tensor with adaptive padding """ pad_h, pad_w = self.get_pad_shape(x.size()[-2:]) if pad_h > 0 or pad_w > 0: if self.padding == 'corner': x = F.pad(x, [0, pad_w, 0, pad_h]) elif self.padding == 'same': x = F.pad(x, [ pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2 ]) return x class PatchEmbed(BaseModule): """Image to Patch Embedding. We use a conv layer to implement PatchEmbed. Args: in_channels (int): The num of input channels. Default: 3 embed_dims (int): The dimensions of embedding. Default: 768 conv_type (str): The type of convolution to generate patch embedding. Default: "Conv2d". kernel_size (int): The kernel_size of embedding conv. Default: 16. stride (int): The slide stride of embedding conv. Default: 16. padding (int | tuple | string): The padding length of embedding conv. When it is a string, it means the mode of adaptive padding, support "same" and "corner" now. Default: "corner". dilation (int): The dilation rate of embedding conv. Default: 1. bias (bool): Bias of embed conv. Default: True. norm_cfg (dict, optional): Config dict for normalization layer. Default: None. input_size (int | tuple | None): The size of input, which will be used to calculate the out size. Only works when `dynamic_size` is False. Default: None. init_cfg (`mmcv.ConfigDict`, optional): The Config for initialization. Default: None. """ def __init__(self, in_channels=3, embed_dims=768, conv_type='Conv2d', kernel_size=16, stride=16, padding='corner', dilation=1, bias=True, norm_cfg=None, input_size=None, init_cfg=None): super(PatchEmbed, self).__init__(init_cfg=init_cfg) self.embed_dims = embed_dims if stride is None: stride = kernel_size kernel_size = to_2tuple(kernel_size) stride = to_2tuple(stride) dilation = to_2tuple(dilation) if isinstance(padding, str): self.adaptive_padding = AdaptivePadding( kernel_size=kernel_size, stride=stride, dilation=dilation, padding=padding) # disable the padding of conv padding = 0 else: self.adaptive_padding = None padding = to_2tuple(padding) self.projection = build_conv_layer( dict(type=conv_type), in_channels=in_channels, out_channels=embed_dims, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, bias=bias) if norm_cfg is not None: self.norm = build_norm_layer(norm_cfg, embed_dims)[1] else: self.norm = None if input_size: input_size = to_2tuple(input_size) # `init_out_size` would be used outside to # calculate the num_patches # e.g. when `use_abs_pos_embed` outside self.init_input_size = input_size if self.adaptive_padding: pad_h, pad_w = self.adaptive_padding.get_pad_shape(input_size) input_h, input_w = input_size input_h = input_h + pad_h input_w = input_w + pad_w input_size = (input_h, input_w) # https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html h_out = (input_size[0] + 2 * padding[0] - dilation[0] * (kernel_size[0] - 1) - 1) // stride[0] + 1 w_out = (input_size[1] + 2 * padding[1] - dilation[1] * (kernel_size[1] - 1) - 1) // stride[1] + 1 self.init_out_size = (h_out, w_out) else: self.init_input_size = None self.init_out_size = None def forward(self, x): """ Args: x (Tensor): Has shape (B, C, H, W). In most case, C is 3. Returns: tuple: Contains merged results and its spatial shape. - x (Tensor): Has shape (B, out_h * out_w, embed_dims) - out_size (tuple[int]): Spatial shape of x, arrange as (out_h, out_w). """ if self.adaptive_padding: x = self.adaptive_padding(x) x = self.projection(x) out_size = (x.shape[2], x.shape[3]) x = x.flatten(2).transpose(1, 2) if self.norm is not None: x = self.norm(x) return x, out_size class PatchMerging(BaseModule): """Merge patch feature map. This layer groups feature map by kernel_size, and applies norm and linear layers to the grouped feature map ((used in Swin Transformer)). Our implementation uses `nn.Unfold` to merge patches, which is about 25% faster than the original implementation. However, we need to modify pretrained models for compatibility. Args: in_channels (int): The num of input channels. to gets fully covered by filter and stride you specified. out_channels (int): The num of output channels. kernel_size (int | tuple, optional): the kernel size in the unfold layer. Defaults to 2. stride (int | tuple, optional): the stride of the sliding blocks in the unfold layer. Default: None. (Would be set as `kernel_size`) padding (int | tuple | string ): The padding length of embedding conv. When it is a string, it means the mode of adaptive padding, support "same" and "corner" now. Default: "corner". dilation (int | tuple, optional): dilation parameter in the unfold layer. Default: 1. bias (bool, optional): Whether to add bias in linear layer or not. Defaults: False. norm_cfg (dict, optional): Config dict for normalization layer. Default: dict(type='LN'). init_cfg (dict, optional): The extra config for initialization. Default: None. """ def __init__(self, in_channels, out_channels, kernel_size=2, stride=None, padding='corner', dilation=1, bias=False, norm_cfg=dict(type='LN'), init_cfg=None): super().__init__(init_cfg=init_cfg) self.in_channels = in_channels self.out_channels = out_channels if stride: stride = stride else: stride = kernel_size kernel_size = to_2tuple(kernel_size) stride = to_2tuple(stride) dilation = to_2tuple(dilation) if isinstance(padding, str): self.adaptive_padding = AdaptivePadding( kernel_size=kernel_size, stride=stride, dilation=dilation, padding=padding) # disable the padding of unfold padding = 0 else: self.adaptive_padding = None padding = to_2tuple(padding) self.sampler = nn.Unfold( kernel_size=kernel_size, dilation=dilation, padding=padding, stride=stride) sample_dim = kernel_size[0] * kernel_size[1] * in_channels if norm_cfg is not None: self.norm = build_norm_layer(norm_cfg, sample_dim)[1] else: self.norm = None self.reduction = nn.Linear(sample_dim, out_channels, bias=bias) def forward(self, x, input_size): """ Args: x (Tensor): Has shape (B, H*W, C_in). input_size (tuple[int]): The spatial shape of x, arrange as (H, W). Default: None. Returns: tuple: Contains merged results and its spatial shape. - x (Tensor): Has shape (B, Merged_H * Merged_W, C_out) - out_size (tuple[int]): Spatial shape of x, arrange as (Merged_H, Merged_W). """ B, L, C = x.shape assert isinstance(input_size, Sequence), f'Expect ' \ f'input_size is ' \ f'`Sequence` ' \ f'but get {input_size}' H, W = input_size assert L == H * W, 'input feature has wrong size' x = x.view(B, H, W, C).permute([0, 3, 1, 2]) # B, C, H, W if self.adaptive_padding: x = self.adaptive_padding(x) H, W = x.shape[-2:] # Use nn.Unfold to merge patch. About 25% faster than original method, # but need to modify pretrained model for compatibility # if kernel_size=2 and stride=2, x should has shape (B, 4*C, H/2*W/2) x = self.sampler(x) out_h = (H + 2 * self.sampler.padding[0] - self.sampler.dilation[0] * (self.sampler.kernel_size[0] - 1) - 1) // self.sampler.stride[0] + 1 out_w = (W + 2 * self.sampler.padding[1] - self.sampler.dilation[1] * (self.sampler.kernel_size[1] - 1) - 1) // self.sampler.stride[1] + 1 output_size = (out_h, out_w) x = x.transpose(1, 2) # B, H/2*W/2, 4*C x = self.norm(x) if self.norm else x x = self.reduction(x) return x, output_size @ATTENTION.register_module() class MultiheadAttention(BaseModule): """A wrapper for ``torch.nn.MultiheadAttention``. This module implements MultiheadAttention with identity connection, and positional encoding is also passed as input. Args: embed_dims (int): The embedding dimension. num_heads (int): Parallel attention heads. attn_drop (float): A Dropout layer on attn_output_weights. Default: 0.0. proj_drop (float): A Dropout layer after `nn.MultiheadAttention`. Default: 0.0. dropout_layer (obj:`ConfigDict`): The dropout_layer used when adding the shortcut. init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. Default: None. batch_first (bool): When it is True, Key, Query and Value are shape of (batch, n, embed_dim), otherwise (n, batch, embed_dim). Default to False. """ def __init__(self, embed_dims, num_heads, attn_drop=0., proj_drop=0., dropout_layer=dict(type='Dropout', drop_prob=0.), init_cfg=None, batch_first=False, **kwargs): super(MultiheadAttention, self).__init__(init_cfg) if 'dropout' in kwargs: warnings.warn( 'The arguments `dropout` in MultiheadAttention ' 'has been deprecated, now you can separately ' 'set `attn_drop`(float), proj_drop(float), ' 'and `dropout_layer`(dict) ', DeprecationWarning) attn_drop = kwargs['dropout'] dropout_layer['drop_prob'] = kwargs.pop('dropout') self.embed_dims = embed_dims self.num_heads = num_heads self.batch_first = batch_first self.attn = nn.MultiheadAttention(embed_dims, num_heads, attn_drop, **kwargs) self.proj_drop = nn.Dropout(proj_drop) self.dropout_layer = build_dropout( dropout_layer) if dropout_layer else nn.Identity() @deprecated_api_warning({'residual': 'identity'}, cls_name='MultiheadAttention') def forward(self, query, key=None, value=None, identity=None, query_pos=None, key_pos=None, attn_mask=None, key_padding_mask=None, **kwargs): """Forward function for `MultiheadAttention`. **kwargs allow passing a more general data flow when combining with other operations in `transformerlayer`. Args: query (Tensor): The input query with shape [num_queries, bs, embed_dims] if self.batch_first is False, else [bs, num_queries embed_dims]. key (Tensor): The key tensor with shape [num_keys, bs, embed_dims] if self.batch_first is False, else [bs, num_keys, embed_dims] . If None, the ``query`` will be used. Defaults to None. value (Tensor): The value tensor with same shape as `key`. Same in `nn.MultiheadAttention.forward`. Defaults to None. If None, the `key` will be used. identity (Tensor): This tensor, with the same shape as x, will be used for the identity link. If None, `x` will be used. Defaults to None. query_pos (Tensor): The positional encoding for query, with the same shape as `x`. If not None, it will be added to `x` before forward function. Defaults to None. key_pos (Tensor): The positional encoding for `key`, with the same shape as `key`. Defaults to None. If not None, it will be added to `key` before forward function. If None, and `query_pos` has the same shape as `key`, then `query_pos` will be used for `key_pos`. Defaults to None. attn_mask (Tensor): ByteTensor mask with shape [num_queries, num_keys]. Same in `nn.MultiheadAttention.forward`. Defaults to None. key_padding_mask (Tensor): ByteTensor with shape [bs, num_keys]. Defaults to None. Returns: Tensor: forwarded results with shape [num_queries, bs, embed_dims] if self.batch_first is False, else [bs, num_queries embed_dims]. """ if key is None: key = query if value is None: value = key if identity is None: identity = query if key_pos is None: if query_pos is not None: # use query_pos if key_pos is not available if query_pos.shape == key.shape: key_pos = query_pos else: warnings.warn(f'position encoding of key is' f'missing in {self.__class__.__name__}.') if query_pos is not None: query = query + query_pos if key_pos is not None: key = key + key_pos # Because the dataflow('key', 'query', 'value') of # ``torch.nn.MultiheadAttention`` is (num_query, batch, # embed_dims), We should adjust the shape of dataflow from # batch_first (batch, num_query, embed_dims) to num_query_first # (num_query ,batch, embed_dims), and recover ``attn_output`` # from num_query_first to batch_first. if self.batch_first: query = query.transpose(0, 1) key = key.transpose(0, 1) value = value.transpose(0, 1) out = self.attn( query=query, key=key, value=value, attn_mask=attn_mask, key_padding_mask=key_padding_mask)[0] if self.batch_first: out = out.transpose(0, 1) return identity + self.dropout_layer(self.proj_drop(out)) @FEEDFORWARD_NETWORK.register_module() class FFN(BaseModule): """Implements feed-forward networks (FFNs) with identity connection. Args: embed_dims (int): The feature dimension. Same as `MultiheadAttention`. Defaults: 256. feedforward_channels (int): The hidden dimension of FFNs. Defaults: 1024. num_fcs (int, optional): The number of fully-connected layers in FFNs. Default: 2. act_cfg (dict, optional): The activation config for FFNs. Default: dict(type='ReLU') ffn_drop (float, optional): Probability of an element to be zeroed in FFN. Default 0.0. add_identity (bool, optional): Whether to add the identity connection. Default: `True`. dropout_layer (obj:`ConfigDict`): The dropout_layer used when adding the shortcut. init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. Default: None. """ @deprecated_api_warning( { 'dropout': 'ffn_drop', 'add_residual': 'add_identity' }, cls_name='FFN') def __init__(self, embed_dims=256, feedforward_channels=1024, num_fcs=2, act_cfg=dict(type='ReLU', inplace=True), ffn_drop=0., dropout_layer=None, add_identity=True, init_cfg=None, **kwargs): super(FFN, self).__init__(init_cfg) assert num_fcs >= 2, 'num_fcs should be no less ' \ f'than 2. got {num_fcs}.' self.embed_dims = embed_dims self.feedforward_channels = feedforward_channels self.num_fcs = num_fcs self.act_cfg = act_cfg self.activate = build_activation_layer(act_cfg) layers = [] in_channels = embed_dims for _ in range(num_fcs - 1): layers.append( Sequential( Linear(in_channels, feedforward_channels), self.activate, nn.Dropout(ffn_drop))) in_channels = feedforward_channels layers.append(Linear(feedforward_channels, embed_dims)) layers.append(nn.Dropout(ffn_drop)) self.layers = Sequential(*layers) self.dropout_layer = build_dropout( dropout_layer) if dropout_layer else torch.nn.Identity() self.add_identity = add_identity @deprecated_api_warning({'residual': 'identity'}, cls_name='FFN') def forward(self, x, identity=None): """Forward function for `FFN`. The function would add x to the output tensor if residue is None. """ out = self.layers(x) if not self.add_identity: return self.dropout_layer(out) if identity is None: identity = x return identity + self.dropout_layer(out) @TRANSFORMER_LAYER.register_module() class BaseTransformerLayer(BaseModule): """Base `TransformerLayer` for vision transformer. It can be built from `mmcv.ConfigDict` and support more flexible customization, for example, using any number of `FFN or LN ` and use different kinds of `attention` by specifying a list of `ConfigDict` named `attn_cfgs`. It is worth mentioning that it supports `prenorm` when you specifying `norm` as the first element of `operation_order`. More details about the `prenorm`: `On Layer Normalization in the Transformer Architecture `_ . Args: attn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )): Configs for `self_attention` or `cross_attention` modules, The order of the configs in the list should be consistent with corresponding attentions in operation_order. If it is a dict, all of the attention modules in operation_order will be built with this config. Default: None. ffn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )): Configs for FFN, The order of the configs in the list should be consistent with corresponding ffn in operation_order. If it is a dict, all of the attention modules in operation_order will be built with this config. operation_order (tuple[str]): The execution order of operation in transformer. Such as ('self_attn', 'norm', 'ffn', 'norm'). Support `prenorm` when you specifying first element as `norm`. Default:None. norm_cfg (dict): Config dict for normalization layer. Default: dict(type='LN'). init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. Default: None. batch_first (bool): Key, Query and Value are shape of (batch, n, embed_dim) or (n, batch, embed_dim). Default to False. """ def __init__(self, attn_cfgs=None, ffn_cfgs=dict( type='FFN', embed_dims=256, feedforward_channels=1024, num_fcs=2, ffn_drop=0., act_cfg=dict(type='ReLU', inplace=True), ), operation_order=None, norm_cfg=dict(type='LN'), init_cfg=None, batch_first=False, **kwargs): deprecated_args = dict( feedforward_channels='feedforward_channels', ffn_dropout='ffn_drop', ffn_num_fcs='num_fcs') for ori_name, new_name in deprecated_args.items(): if ori_name in kwargs: warnings.warn( f'The arguments `{ori_name}` in BaseTransformerLayer ' f'has been deprecated, now you should set `{new_name}` ' f'and other FFN related arguments ' f'to a dict named `ffn_cfgs`. ', DeprecationWarning) ffn_cfgs[new_name] = kwargs[ori_name] super(BaseTransformerLayer, self).__init__(init_cfg) self.batch_first = batch_first assert set(operation_order) & set( ['self_attn', 'norm', 'ffn', 'cross_attn']) == \ set(operation_order), f'The operation_order of' \ f' {self.__class__.__name__} should ' \ f'contains all four operation type ' \ f"{['self_attn', 'norm', 'ffn', 'cross_attn']}" num_attn = operation_order.count('self_attn') + operation_order.count( 'cross_attn') if isinstance(attn_cfgs, dict): attn_cfgs = [copy.deepcopy(attn_cfgs) for _ in range(num_attn)] else: assert num_attn == len(attn_cfgs), f'The length ' \ f'of attn_cfg {num_attn} is ' \ f'not consistent with the number of attention' \ f'in operation_order {operation_order}.' self.num_attn = num_attn self.operation_order = operation_order self.norm_cfg = norm_cfg self.pre_norm = operation_order[0] == 'norm' self.attentions = ModuleList() index = 0 for operation_name in operation_order: if operation_name in ['self_attn', 'cross_attn']: if 'batch_first' in attn_cfgs[index]: assert self.batch_first == attn_cfgs[index]['batch_first'] else: attn_cfgs[index]['batch_first'] = self.batch_first attention = build_attention(attn_cfgs[index]) # Some custom attentions used as `self_attn` # or `cross_attn` can have different behavior. attention.operation_name = operation_name self.attentions.append(attention) index += 1 self.embed_dims = self.attentions[0].embed_dims self.ffns = ModuleList() num_ffns = operation_order.count('ffn') if isinstance(ffn_cfgs, dict): ffn_cfgs = ConfigDict(ffn_cfgs) if isinstance(ffn_cfgs, dict): ffn_cfgs = [copy.deepcopy(ffn_cfgs) for _ in range(num_ffns)] assert len(ffn_cfgs) == num_ffns for ffn_index in range(num_ffns): if 'embed_dims' not in ffn_cfgs[ffn_index]: ffn_cfgs[ffn_index]['embed_dims'] = self.embed_dims else: assert ffn_cfgs[ffn_index]['embed_dims'] == self.embed_dims self.ffns.append( build_feedforward_network(ffn_cfgs[ffn_index], dict(type='FFN'))) self.norms = ModuleList() num_norms = operation_order.count('norm') for _ in range(num_norms): self.norms.append(build_norm_layer(norm_cfg, self.embed_dims)[1]) def forward(self, query, key=None, value=None, query_pos=None, key_pos=None, attn_masks=None, query_key_padding_mask=None, key_padding_mask=None, **kwargs): """Forward function for `TransformerDecoderLayer`. **kwargs contains some specific arguments of attentions. Args: query (Tensor): The input query with shape [num_queries, bs, embed_dims] if self.batch_first is False, else [bs, num_queries embed_dims]. key (Tensor): The key tensor with shape [num_keys, bs, embed_dims] if self.batch_first is False, else [bs, num_keys, embed_dims] . value (Tensor): The value tensor with same shape as `key`. query_pos (Tensor): The positional encoding for `query`. Default: None. key_pos (Tensor): The positional encoding for `key`. Default: None. attn_masks (List[Tensor] | None): 2D Tensor used in calculation of corresponding attention. The length of it should equal to the number of `attention` in `operation_order`. Default: None. query_key_padding_mask (Tensor): ByteTensor for `query`, with shape [bs, num_queries]. Only used in `self_attn` layer. Defaults to None. key_padding_mask (Tensor): ByteTensor for `query`, with shape [bs, num_keys]. Default: None. Returns: Tensor: forwarded results with shape [num_queries, bs, embed_dims]. """ norm_index = 0 attn_index = 0 ffn_index = 0 identity = query if attn_masks is None: attn_masks = [None for _ in range(self.num_attn)] elif isinstance(attn_masks, torch.Tensor): attn_masks = [ copy.deepcopy(attn_masks) for _ in range(self.num_attn) ] warnings.warn(f'Use same attn_mask in all attentions in ' f'{self.__class__.__name__} ') else: assert len(attn_masks) == self.num_attn, f'The length of ' \ f'attn_masks {len(attn_masks)} must be equal ' \ f'to the number of attention in ' \ f'operation_order {self.num_attn}' for layer in self.operation_order: if layer == 'self_attn': temp_key = temp_value = query query = self.attentions[attn_index]( query, temp_key, temp_value, identity if self.pre_norm else None, query_pos=query_pos, key_pos=query_pos, attn_mask=attn_masks[attn_index], key_padding_mask=query_key_padding_mask, **kwargs) attn_index += 1 identity = query elif layer == 'norm': query = self.norms[norm_index](query) norm_index += 1 elif layer == 'cross_attn': query = self.attentions[attn_index]( query, key, value, identity if self.pre_norm else None, query_pos=query_pos, key_pos=key_pos, attn_mask=attn_masks[attn_index], key_padding_mask=key_padding_mask, **kwargs) attn_index += 1 identity = query elif layer == 'ffn': query = self.ffns[ffn_index]( query, identity if self.pre_norm else None) ffn_index += 1 return query @TRANSFORMER_LAYER_SEQUENCE.register_module() class TransformerLayerSequence(BaseModule): """Base class for TransformerEncoder and TransformerDecoder in vision transformer. As base-class of Encoder and Decoder in vision transformer. Support customization such as specifying different kind of `transformer_layer` in `transformer_coder`. Args: transformerlayer (list[obj:`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict`): Config of transformerlayer in TransformerCoder. If it is obj:`mmcv.ConfigDict`, it would be repeated `num_layer` times to a list[`mmcv.ConfigDict`]. Default: None. num_layers (int): The number of `TransformerLayer`. Default: None. init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. Default: None. """ def __init__(self, transformerlayers=None, num_layers=None, init_cfg=None): super(TransformerLayerSequence, self).__init__(init_cfg) if isinstance(transformerlayers, dict): transformerlayers = [ copy.deepcopy(transformerlayers) for _ in range(num_layers) ] else: assert isinstance(transformerlayers, list) and \ len(transformerlayers) == num_layers self.num_layers = num_layers self.layers = ModuleList() for i in range(num_layers): self.layers.append(build_transformer_layer(transformerlayers[i])) self.embed_dims = self.layers[0].embed_dims self.pre_norm = self.layers[0].pre_norm def forward(self, query, key, value, query_pos=None, key_pos=None, attn_masks=None, query_key_padding_mask=None, key_padding_mask=None, **kwargs): """Forward function for `TransformerCoder`. Args: query (Tensor): Input query with shape `(num_queries, bs, embed_dims)`. key (Tensor): The key tensor with shape `(num_keys, bs, embed_dims)`. value (Tensor): The value tensor with shape `(num_keys, bs, embed_dims)`. query_pos (Tensor): The positional encoding for `query`. Default: None. key_pos (Tensor): The positional encoding for `key`. Default: None. attn_masks (List[Tensor], optional): Each element is 2D Tensor which is used in calculation of corresponding attention in operation_order. Default: None. query_key_padding_mask (Tensor): ByteTensor for `query`, with shape [bs, num_queries]. Only used in self-attention Default: None. key_padding_mask (Tensor): ByteTensor for `query`, with shape [bs, num_keys]. Default: None. Returns: Tensor: results with shape [num_queries, bs, embed_dims]. """ for layer in self.layers: query = layer( query, key, value, query_pos=query_pos, key_pos=key_pos, attn_masks=attn_masks, query_key_padding_mask=query_key_padding_mask, key_padding_mask=key_padding_mask, **kwargs) return query ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/upsample.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch.nn as nn import torch.nn.functional as F from ..utils import xavier_init from .registry import UPSAMPLE_LAYERS UPSAMPLE_LAYERS.register_module('nearest', module=nn.Upsample) UPSAMPLE_LAYERS.register_module('bilinear', module=nn.Upsample) @UPSAMPLE_LAYERS.register_module(name='pixel_shuffle') class PixelShufflePack(nn.Module): """Pixel Shuffle upsample layer. This module packs `F.pixel_shuffle()` and a nn.Conv2d module together to achieve a simple upsampling with pixel shuffle. Args: in_channels (int): Number of input channels. out_channels (int): Number of output channels. scale_factor (int): Upsample ratio. upsample_kernel (int): Kernel size of the conv layer to expand the channels. """ def __init__(self, in_channels, out_channels, scale_factor, upsample_kernel): super(PixelShufflePack, self).__init__() self.in_channels = in_channels self.out_channels = out_channels self.scale_factor = scale_factor self.upsample_kernel = upsample_kernel self.upsample_conv = nn.Conv2d( self.in_channels, self.out_channels * scale_factor * scale_factor, self.upsample_kernel, padding=(self.upsample_kernel - 1) // 2) self.init_weights() def init_weights(self): xavier_init(self.upsample_conv, distribution='uniform') def forward(self, x): x = self.upsample_conv(x) x = F.pixel_shuffle(x, self.scale_factor) return x def build_upsample_layer(cfg, *args, **kwargs): """Build upsample layer. Args: cfg (dict): The upsample layer config, which should contain: - type (str): Layer type. - scale_factor (int): Upsample ratio, which is not applicable to deconv. - layer args: Args needed to instantiate a upsample layer. args (argument list): Arguments passed to the ``__init__`` method of the corresponding conv layer. kwargs (keyword arguments): Keyword arguments passed to the ``__init__`` method of the corresponding conv layer. Returns: nn.Module: Created upsample layer. """ if not isinstance(cfg, dict): raise TypeError(f'cfg must be a dict, but got {type(cfg)}') if 'type' not in cfg: raise KeyError( f'the cfg dict must contain the key "type", but got {cfg}') cfg_ = cfg.copy() layer_type = cfg_.pop('type') if layer_type not in UPSAMPLE_LAYERS: raise KeyError(f'Unrecognized upsample type {layer_type}') else: upsample = UPSAMPLE_LAYERS.get(layer_type) if upsample is nn.Upsample: cfg_['mode'] = layer_type layer = upsample(*args, **kwargs, **cfg_) return layer ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/wrappers.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. r"""Modified from https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/wrappers.py # noqa: E501 Wrap some nn modules to support empty tensor input. Currently, these wrappers are mainly used in mask heads like fcn_mask_head and maskiou_heads since mask heads are trained on only positive RoIs. """ import math import torch import torch.nn as nn from torch.nn.modules.utils import _pair, _triple from .registry import CONV_LAYERS, UPSAMPLE_LAYERS if torch.__version__ == 'parrots': TORCH_VERSION = torch.__version__ else: # torch.__version__ could be 1.3.1+cu92, we only need the first two # for comparison TORCH_VERSION = tuple(int(x) for x in torch.__version__.split('.')[:2]) def obsolete_torch_version(torch_version, version_threshold): return torch_version == 'parrots' or torch_version <= version_threshold class NewEmptyTensorOp(torch.autograd.Function): @staticmethod def forward(ctx, x, new_shape): ctx.shape = x.shape return x.new_empty(new_shape) @staticmethod def backward(ctx, grad): shape = ctx.shape return NewEmptyTensorOp.apply(grad, shape), None @CONV_LAYERS.register_module('Conv', force=True) class Conv2d(nn.Conv2d): def forward(self, x): if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): out_shape = [x.shape[0], self.out_channels] for i, k, p, s, d in zip(x.shape[-2:], self.kernel_size, self.padding, self.stride, self.dilation): o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1 out_shape.append(o) empty = NewEmptyTensorOp.apply(x, out_shape) if self.training: # produce dummy gradient to avoid DDP warning. dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 return empty + dummy else: return empty return super().forward(x) @CONV_LAYERS.register_module('Conv3d', force=True) class Conv3d(nn.Conv3d): def forward(self, x): if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): out_shape = [x.shape[0], self.out_channels] for i, k, p, s, d in zip(x.shape[-3:], self.kernel_size, self.padding, self.stride, self.dilation): o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1 out_shape.append(o) empty = NewEmptyTensorOp.apply(x, out_shape) if self.training: # produce dummy gradient to avoid DDP warning. dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 return empty + dummy else: return empty return super().forward(x) @CONV_LAYERS.register_module() @CONV_LAYERS.register_module('deconv') @UPSAMPLE_LAYERS.register_module('deconv', force=True) class ConvTranspose2d(nn.ConvTranspose2d): def forward(self, x): if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): out_shape = [x.shape[0], self.out_channels] for i, k, p, s, d, op in zip(x.shape[-2:], self.kernel_size, self.padding, self.stride, self.dilation, self.output_padding): out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op) empty = NewEmptyTensorOp.apply(x, out_shape) if self.training: # produce dummy gradient to avoid DDP warning. dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 return empty + dummy else: return empty return super().forward(x) @CONV_LAYERS.register_module() @CONV_LAYERS.register_module('deconv3d') @UPSAMPLE_LAYERS.register_module('deconv3d', force=True) class ConvTranspose3d(nn.ConvTranspose3d): def forward(self, x): if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): out_shape = [x.shape[0], self.out_channels] for i, k, p, s, d, op in zip(x.shape[-3:], self.kernel_size, self.padding, self.stride, self.dilation, self.output_padding): out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op) empty = NewEmptyTensorOp.apply(x, out_shape) if self.training: # produce dummy gradient to avoid DDP warning. dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 return empty + dummy else: return empty return super().forward(x) class MaxPool2d(nn.MaxPool2d): def forward(self, x): # PyTorch 1.9 does not support empty tensor inference yet if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)): out_shape = list(x.shape[:2]) for i, k, p, s, d in zip(x.shape[-2:], _pair(self.kernel_size), _pair(self.padding), _pair(self.stride), _pair(self.dilation)): o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1 o = math.ceil(o) if self.ceil_mode else math.floor(o) out_shape.append(o) empty = NewEmptyTensorOp.apply(x, out_shape) return empty return super().forward(x) class MaxPool3d(nn.MaxPool3d): def forward(self, x): # PyTorch 1.9 does not support empty tensor inference yet if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)): out_shape = list(x.shape[:2]) for i, k, p, s, d in zip(x.shape[-3:], _triple(self.kernel_size), _triple(self.padding), _triple(self.stride), _triple(self.dilation)): o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1 o = math.ceil(o) if self.ceil_mode else math.floor(o) out_shape.append(o) empty = NewEmptyTensorOp.apply(x, out_shape) return empty return super().forward(x) class Linear(torch.nn.Linear): def forward(self, x): # empty tensor forward of Linear layer is supported in Pytorch 1.6 if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 5)): out_shape = [x.shape[0], self.out_features] empty = NewEmptyTensorOp.apply(x, out_shape) if self.training: # produce dummy gradient to avoid DDP warning. dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 return empty + dummy else: return empty return super().forward(x) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/builder.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from ..runner import Sequential from ..utils import Registry, build_from_cfg def build_model_from_cfg(cfg, registry, default_args=None): """Build a PyTorch model from config dict(s). Different from ``build_from_cfg``, if cfg is a list, a ``nn.Sequential`` will be built. Args: cfg (dict, list[dict]): The config of modules, is is either a config dict or a list of config dicts. If cfg is a list, a the built modules will be wrapped with ``nn.Sequential``. registry (:obj:`Registry`): A registry the module belongs to. default_args (dict, optional): Default arguments to build the module. Defaults to None. Returns: nn.Module: A built nn module. """ if isinstance(cfg, list): modules = [ build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg ] return Sequential(*modules) else: return build_from_cfg(cfg, registry, default_args) MODELS = Registry('model', build_func=build_model_from_cfg) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/resnet.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import logging import torch.nn as nn import torch.utils.checkpoint as cp from .utils import constant_init, kaiming_init def conv3x3(in_planes, out_planes, stride=1, dilation=1): """3x3 convolution with padding.""" return nn.Conv2d( in_planes, out_planes, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, bias=False) class BasicBlock(nn.Module): expansion = 1 def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, style='pytorch', with_cp=False): super(BasicBlock, self).__init__() assert style in ['pytorch', 'caffe'] self.conv1 = conv3x3(inplanes, planes, stride, dilation) self.bn1 = nn.BatchNorm2d(planes) self.relu = nn.ReLU(inplace=True) self.conv2 = conv3x3(planes, planes) self.bn2 = nn.BatchNorm2d(planes) self.downsample = downsample self.stride = stride self.dilation = dilation assert not with_cp def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class Bottleneck(nn.Module): expansion = 4 def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, style='pytorch', with_cp=False): """Bottleneck block. If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is "caffe", the stride-two layer is the first 1x1 conv layer. """ super(Bottleneck, self).__init__() assert style in ['pytorch', 'caffe'] if style == 'pytorch': conv1_stride = 1 conv2_stride = stride else: conv1_stride = stride conv2_stride = 1 self.conv1 = nn.Conv2d( inplanes, planes, kernel_size=1, stride=conv1_stride, bias=False) self.conv2 = nn.Conv2d( planes, planes, kernel_size=3, stride=conv2_stride, padding=dilation, dilation=dilation, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.bn2 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d( planes, planes * self.expansion, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(planes * self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride self.dilation = dilation self.with_cp = with_cp def forward(self, x): def _inner_forward(x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: residual = self.downsample(x) out += residual return out if self.with_cp and x.requires_grad: out = cp.checkpoint(_inner_forward, x) else: out = _inner_forward(x) out = self.relu(out) return out def make_res_layer(block, inplanes, planes, blocks, stride=1, dilation=1, style='pytorch', with_cp=False): downsample = None if stride != 1 or inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d( inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(planes * block.expansion), ) layers = [] layers.append( block( inplanes, planes, stride, dilation, downsample, style=style, with_cp=with_cp)) inplanes = planes * block.expansion for _ in range(1, blocks): layers.append( block(inplanes, planes, 1, dilation, style=style, with_cp=with_cp)) return nn.Sequential(*layers) class ResNet(nn.Module): """ResNet backbone. Args: depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. num_stages (int): Resnet stages, normally 4. strides (Sequence[int]): Strides of the first block of each stage. dilations (Sequence[int]): Dilation of each stage. out_indices (Sequence[int]): Output from which stages. style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two layer is the 3x3 conv layer, otherwise the stride-two layer is the first 1x1 conv layer. frozen_stages (int): Stages to be frozen (all param fixed). -1 means not freezing any parameters. bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze running stats (mean and var). bn_frozen (bool): Whether to freeze weight and bias of BN layers. with_cp (bool): Use checkpoint or not. Using checkpoint will save some memory while slowing down the training speed. """ arch_settings = { 18: (BasicBlock, (2, 2, 2, 2)), 34: (BasicBlock, (3, 4, 6, 3)), 50: (Bottleneck, (3, 4, 6, 3)), 101: (Bottleneck, (3, 4, 23, 3)), 152: (Bottleneck, (3, 8, 36, 3)) } def __init__(self, depth, num_stages=4, strides=(1, 2, 2, 2), dilations=(1, 1, 1, 1), out_indices=(0, 1, 2, 3), style='pytorch', frozen_stages=-1, bn_eval=True, bn_frozen=False, with_cp=False): super(ResNet, self).__init__() if depth not in self.arch_settings: raise KeyError(f'invalid depth {depth} for resnet') assert num_stages >= 1 and num_stages <= 4 block, stage_blocks = self.arch_settings[depth] stage_blocks = stage_blocks[:num_stages] assert len(strides) == len(dilations) == num_stages assert max(out_indices) < num_stages self.out_indices = out_indices self.style = style self.frozen_stages = frozen_stages self.bn_eval = bn_eval self.bn_frozen = bn_frozen self.with_cp = with_cp self.inplanes = 64 self.conv1 = nn.Conv2d( 3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.res_layers = [] for i, num_blocks in enumerate(stage_blocks): stride = strides[i] dilation = dilations[i] planes = 64 * 2**i res_layer = make_res_layer( block, self.inplanes, planes, num_blocks, stride=stride, dilation=dilation, style=self.style, with_cp=with_cp) self.inplanes = planes * block.expansion layer_name = f'layer{i + 1}' self.add_module(layer_name, res_layer) self.res_layers.append(layer_name) self.feat_dim = block.expansion * 64 * 2**(len(stage_blocks) - 1) def init_weights(self, pretrained=None): if isinstance(pretrained, str): logger = logging.getLogger() from ..runner import load_checkpoint load_checkpoint(self, pretrained, strict=False, logger=logger) elif pretrained is None: for m in self.modules(): if isinstance(m, nn.Conv2d): kaiming_init(m) elif isinstance(m, nn.BatchNorm2d): constant_init(m, 1) else: raise TypeError('pretrained must be a str or None') def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) outs = [] for i, layer_name in enumerate(self.res_layers): res_layer = getattr(self, layer_name) x = res_layer(x) if i in self.out_indices: outs.append(x) if len(outs) == 1: return outs[0] else: return tuple(outs) def train(self, mode=True): super(ResNet, self).train(mode) if self.bn_eval: for m in self.modules(): if isinstance(m, nn.BatchNorm2d): m.eval() if self.bn_frozen: for params in m.parameters(): params.requires_grad = False if mode and self.frozen_stages >= 0: for param in self.conv1.parameters(): param.requires_grad = False for param in self.bn1.parameters(): param.requires_grad = False self.bn1.eval() self.bn1.weight.requires_grad = False self.bn1.bias.requires_grad = False for i in range(1, self.frozen_stages + 1): mod = getattr(self, f'layer{i}') mod.eval() for param in mod.parameters(): param.requires_grad = False ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/utils/__init__.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from .flops_counter import get_model_complexity_info from .fuse_conv_bn import fuse_conv_bn from .sync_bn import revert_sync_batchnorm from .weight_init import (INITIALIZERS, Caffe2XavierInit, ConstantInit, KaimingInit, NormalInit, PretrainedInit, TruncNormalInit, UniformInit, XavierInit, bias_init_with_prob, caffe2_xavier_init, constant_init, initialize, kaiming_init, normal_init, trunc_normal_init, uniform_init, xavier_init) __all__ = [ 'get_model_complexity_info', 'bias_init_with_prob', 'caffe2_xavier_init', 'constant_init', 'kaiming_init', 'normal_init', 'trunc_normal_init', 'uniform_init', 'xavier_init', 'fuse_conv_bn', 'initialize', 'INITIALIZERS', 'ConstantInit', 'XavierInit', 'NormalInit', 'TruncNormalInit', 'UniformInit', 'KaimingInit', 'PretrainedInit', 'Caffe2XavierInit', 'revert_sync_batchnorm' ] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/utils/flops_counter.py ================================================ # Modified from flops-counter.pytorch by Vladislav Sovrasov # original repo: https://github.com/sovrasov/flops-counter.pytorch # MIT License # Copyright (c) 2018 Vladislav Sovrasov # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import sys import warnings from functools import partial import numpy as np import torch import torch.nn as nn import mmcv def get_model_complexity_info(model, input_shape, print_per_layer_stat=True, as_strings=True, input_constructor=None, flush=False, ost=sys.stdout): """Get complexity information of a model. This method can calculate FLOPs and parameter counts of a model with corresponding input shape. It can also print complexity information for each layer in a model. Supported layers are listed as below: - Convolutions: ``nn.Conv1d``, ``nn.Conv2d``, ``nn.Conv3d``. - Activations: ``nn.ReLU``, ``nn.PReLU``, ``nn.ELU``, ``nn.LeakyReLU``, ``nn.ReLU6``. - Poolings: ``nn.MaxPool1d``, ``nn.MaxPool2d``, ``nn.MaxPool3d``, ``nn.AvgPool1d``, ``nn.AvgPool2d``, ``nn.AvgPool3d``, ``nn.AdaptiveMaxPool1d``, ``nn.AdaptiveMaxPool2d``, ``nn.AdaptiveMaxPool3d``, ``nn.AdaptiveAvgPool1d``, ``nn.AdaptiveAvgPool2d``, ``nn.AdaptiveAvgPool3d``. - BatchNorms: ``nn.BatchNorm1d``, ``nn.BatchNorm2d``, ``nn.BatchNorm3d``, ``nn.GroupNorm``, ``nn.InstanceNorm1d``, ``InstanceNorm2d``, ``InstanceNorm3d``, ``nn.LayerNorm``. - Linear: ``nn.Linear``. - Deconvolution: ``nn.ConvTranspose2d``. - Upsample: ``nn.Upsample``. Args: model (nn.Module): The model for complexity calculation. input_shape (tuple): Input shape used for calculation. print_per_layer_stat (bool): Whether to print complexity information for each layer in a model. Default: True. as_strings (bool): Output FLOPs and params counts in a string form. Default: True. input_constructor (None | callable): If specified, it takes a callable method that generates input. otherwise, it will generate a random tensor with input shape to calculate FLOPs. Default: None. flush (bool): same as that in :func:`print`. Default: False. ost (stream): same as ``file`` param in :func:`print`. Default: sys.stdout. Returns: tuple[float | str]: If ``as_strings`` is set to True, it will return FLOPs and parameter counts in a string format. otherwise, it will return those in a float number format. """ assert type(input_shape) is tuple assert len(input_shape) >= 1 assert isinstance(model, nn.Module) flops_model = add_flops_counting_methods(model) flops_model.eval() flops_model.start_flops_count() if input_constructor: input = input_constructor(input_shape) _ = flops_model(**input) else: try: batch = torch.ones(()).new_empty( (1, *input_shape), dtype=next(flops_model.parameters()).dtype, device=next(flops_model.parameters()).device) except StopIteration: # Avoid StopIteration for models which have no parameters, # like `nn.Relu()`, `nn.AvgPool2d`, etc. batch = torch.ones(()).new_empty((1, *input_shape)) _ = flops_model(batch) flops_count, params_count = flops_model.compute_average_flops_cost() if print_per_layer_stat: print_model_with_flops( flops_model, flops_count, params_count, ost=ost, flush=flush) flops_model.stop_flops_count() if as_strings: return flops_to_string(flops_count), params_to_string(params_count) return flops_count, params_count def flops_to_string(flops, units='GFLOPs', precision=2): """Convert FLOPs number into a string. Note that Here we take a multiply-add counts as one FLOP. Args: flops (float): FLOPs number to be converted. units (str | None): Converted FLOPs units. Options are None, 'GFLOPs', 'MFLOPs', 'KFLOPs', 'FLOPs'. If set to None, it will automatically choose the most suitable unit for FLOPs. Default: 'GFLOPs'. precision (int): Digit number after the decimal point. Default: 2. Returns: str: The converted FLOPs number with units. Examples: >>> flops_to_string(1e9) '1.0 GFLOPs' >>> flops_to_string(2e5, 'MFLOPs') '0.2 MFLOPs' >>> flops_to_string(3e-9, None) '3e-09 FLOPs' """ if units is None: if flops // 10**9 > 0: return str(round(flops / 10.**9, precision)) + ' GFLOPs' elif flops // 10**6 > 0: return str(round(flops / 10.**6, precision)) + ' MFLOPs' elif flops // 10**3 > 0: return str(round(flops / 10.**3, precision)) + ' KFLOPs' else: return str(flops) + ' FLOPs' else: if units == 'GFLOPs': return str(round(flops / 10.**9, precision)) + ' ' + units elif units == 'MFLOPs': return str(round(flops / 10.**6, precision)) + ' ' + units elif units == 'KFLOPs': return str(round(flops / 10.**3, precision)) + ' ' + units else: return str(flops) + ' FLOPs' def params_to_string(num_params, units=None, precision=2): """Convert parameter number into a string. Args: num_params (float): Parameter number to be converted. units (str | None): Converted FLOPs units. Options are None, 'M', 'K' and ''. If set to None, it will automatically choose the most suitable unit for Parameter number. Default: None. precision (int): Digit number after the decimal point. Default: 2. Returns: str: The converted parameter number with units. Examples: >>> params_to_string(1e9) '1000.0 M' >>> params_to_string(2e5) '200.0 k' >>> params_to_string(3e-9) '3e-09' """ if units is None: if num_params // 10**6 > 0: return str(round(num_params / 10**6, precision)) + ' M' elif num_params // 10**3: return str(round(num_params / 10**3, precision)) + ' k' else: return str(num_params) else: if units == 'M': return str(round(num_params / 10.**6, precision)) + ' ' + units elif units == 'K': return str(round(num_params / 10.**3, precision)) + ' ' + units else: return str(num_params) def print_model_with_flops(model, total_flops, total_params, units='GFLOPs', precision=3, ost=sys.stdout, flush=False): """Print a model with FLOPs for each layer. Args: model (nn.Module): The model to be printed. total_flops (float): Total FLOPs of the model. total_params (float): Total parameter counts of the model. units (str | None): Converted FLOPs units. Default: 'GFLOPs'. precision (int): Digit number after the decimal point. Default: 3. ost (stream): same as `file` param in :func:`print`. Default: sys.stdout. flush (bool): same as that in :func:`print`. Default: False. Example: >>> class ExampleModel(nn.Module): >>> def __init__(self): >>> super().__init__() >>> self.conv1 = nn.Conv2d(3, 8, 3) >>> self.conv2 = nn.Conv2d(8, 256, 3) >>> self.conv3 = nn.Conv2d(256, 8, 3) >>> self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) >>> self.flatten = nn.Flatten() >>> self.fc = nn.Linear(8, 1) >>> def forward(self, x): >>> x = self.conv1(x) >>> x = self.conv2(x) >>> x = self.conv3(x) >>> x = self.avg_pool(x) >>> x = self.flatten(x) >>> x = self.fc(x) >>> return x >>> model = ExampleModel() >>> x = (3, 16, 16) to print the complexity information state for each layer, you can use >>> get_model_complexity_info(model, x) or directly use >>> print_model_with_flops(model, 4579784.0, 37361) ExampleModel( 0.037 M, 100.000% Params, 0.005 GFLOPs, 100.000% FLOPs, (conv1): Conv2d(0.0 M, 0.600% Params, 0.0 GFLOPs, 0.959% FLOPs, 3, 8, kernel_size=(3, 3), stride=(1, 1)) # noqa: E501 (conv2): Conv2d(0.019 M, 50.020% Params, 0.003 GFLOPs, 58.760% FLOPs, 8, 256, kernel_size=(3, 3), stride=(1, 1)) (conv3): Conv2d(0.018 M, 49.356% Params, 0.002 GFLOPs, 40.264% FLOPs, 256, 8, kernel_size=(3, 3), stride=(1, 1)) (avg_pool): AdaptiveAvgPool2d(0.0 M, 0.000% Params, 0.0 GFLOPs, 0.017% FLOPs, output_size=(1, 1)) (flatten): Flatten(0.0 M, 0.000% Params, 0.0 GFLOPs, 0.000% FLOPs, ) (fc): Linear(0.0 M, 0.024% Params, 0.0 GFLOPs, 0.000% FLOPs, in_features=8, out_features=1, bias=True) ) """ def accumulate_params(self): if is_supported_instance(self): return self.__params__ else: sum = 0 for m in self.children(): sum += m.accumulate_params() return sum def accumulate_flops(self): if is_supported_instance(self): return self.__flops__ / model.__batch_counter__ else: sum = 0 for m in self.children(): sum += m.accumulate_flops() return sum def flops_repr(self): accumulated_num_params = self.accumulate_params() accumulated_flops_cost = self.accumulate_flops() return ', '.join([ params_to_string( accumulated_num_params, units='M', precision=precision), '{:.3%} Params'.format(accumulated_num_params / total_params), flops_to_string( accumulated_flops_cost, units=units, precision=precision), '{:.3%} FLOPs'.format(accumulated_flops_cost / total_flops), self.original_extra_repr() ]) def add_extra_repr(m): m.accumulate_flops = accumulate_flops.__get__(m) m.accumulate_params = accumulate_params.__get__(m) flops_extra_repr = flops_repr.__get__(m) if m.extra_repr != flops_extra_repr: m.original_extra_repr = m.extra_repr m.extra_repr = flops_extra_repr assert m.extra_repr != m.original_extra_repr def del_extra_repr(m): if hasattr(m, 'original_extra_repr'): m.extra_repr = m.original_extra_repr del m.original_extra_repr if hasattr(m, 'accumulate_flops'): del m.accumulate_flops model.apply(add_extra_repr) print(model, file=ost, flush=flush) model.apply(del_extra_repr) def get_model_parameters_number(model): """Calculate parameter number of a model. Args: model (nn.module): The model for parameter number calculation. Returns: float: Parameter number of the model. """ num_params = sum(p.numel() for p in model.parameters() if p.requires_grad) return num_params def add_flops_counting_methods(net_main_module): # adding additional methods to the existing module object, # this is done this way so that each function has access to self object net_main_module.start_flops_count = start_flops_count.__get__( net_main_module) net_main_module.stop_flops_count = stop_flops_count.__get__( net_main_module) net_main_module.reset_flops_count = reset_flops_count.__get__( net_main_module) net_main_module.compute_average_flops_cost = compute_average_flops_cost.__get__( # noqa: E501 net_main_module) net_main_module.reset_flops_count() return net_main_module def compute_average_flops_cost(self): """Compute average FLOPs cost. A method to compute average FLOPs cost, which will be available after `add_flops_counting_methods()` is called on a desired net object. Returns: float: Current mean flops consumption per image. """ batches_count = self.__batch_counter__ flops_sum = 0 for module in self.modules(): if is_supported_instance(module): flops_sum += module.__flops__ params_sum = get_model_parameters_number(self) return flops_sum / batches_count, params_sum def start_flops_count(self): """Activate the computation of mean flops consumption per image. A method to activate the computation of mean flops consumption per image. which will be available after ``add_flops_counting_methods()`` is called on a desired net object. It should be called before running the network. """ add_batch_counter_hook_function(self) def add_flops_counter_hook_function(module): if is_supported_instance(module): if hasattr(module, '__flops_handle__'): return else: handle = module.register_forward_hook( get_modules_mapping()[type(module)]) module.__flops_handle__ = handle self.apply(partial(add_flops_counter_hook_function)) def stop_flops_count(self): """Stop computing the mean flops consumption per image. A method to stop computing the mean flops consumption per image, which will be available after ``add_flops_counting_methods()`` is called on a desired net object. It can be called to pause the computation whenever. """ remove_batch_counter_hook_function(self) self.apply(remove_flops_counter_hook_function) def reset_flops_count(self): """Reset statistics computed so far. A method to Reset computed statistics, which will be available after `add_flops_counting_methods()` is called on a desired net object. """ add_batch_counter_variables_or_reset(self) self.apply(add_flops_counter_variable_or_reset) # ---- Internal functions def empty_flops_counter_hook(module, input, output): module.__flops__ += 0 def upsample_flops_counter_hook(module, input, output): output_size = output[0] batch_size = output_size.shape[0] output_elements_count = batch_size for val in output_size.shape[1:]: output_elements_count *= val module.__flops__ += int(output_elements_count) def relu_flops_counter_hook(module, input, output): active_elements_count = output.numel() module.__flops__ += int(active_elements_count) def linear_flops_counter_hook(module, input, output): input = input[0] output_last_dim = output.shape[ -1] # pytorch checks dimensions, so here we don't care much module.__flops__ += int(np.prod(input.shape) * output_last_dim) def pool_flops_counter_hook(module, input, output): input = input[0] module.__flops__ += int(np.prod(input.shape)) def norm_flops_counter_hook(module, input, output): input = input[0] batch_flops = np.prod(input.shape) if (getattr(module, 'affine', False) or getattr(module, 'elementwise_affine', False)): batch_flops *= 2 module.__flops__ += int(batch_flops) def deconv_flops_counter_hook(conv_module, input, output): # Can have multiple inputs, getting the first one input = input[0] batch_size = input.shape[0] input_height, input_width = input.shape[2:] kernel_height, kernel_width = conv_module.kernel_size in_channels = conv_module.in_channels out_channels = conv_module.out_channels groups = conv_module.groups filters_per_channel = out_channels // groups conv_per_position_flops = ( kernel_height * kernel_width * in_channels * filters_per_channel) active_elements_count = batch_size * input_height * input_width overall_conv_flops = conv_per_position_flops * active_elements_count bias_flops = 0 if conv_module.bias is not None: output_height, output_width = output.shape[2:] bias_flops = out_channels * batch_size * output_height * output_height overall_flops = overall_conv_flops + bias_flops conv_module.__flops__ += int(overall_flops) def conv_flops_counter_hook(conv_module, input, output): # Can have multiple inputs, getting the first one input = input[0] batch_size = input.shape[0] output_dims = list(output.shape[2:]) kernel_dims = list(conv_module.kernel_size) in_channels = conv_module.in_channels out_channels = conv_module.out_channels groups = conv_module.groups filters_per_channel = out_channels // groups conv_per_position_flops = int( np.prod(kernel_dims)) * in_channels * filters_per_channel active_elements_count = batch_size * int(np.prod(output_dims)) overall_conv_flops = conv_per_position_flops * active_elements_count bias_flops = 0 if conv_module.bias is not None: bias_flops = out_channels * active_elements_count overall_flops = overall_conv_flops + bias_flops conv_module.__flops__ += int(overall_flops) def batch_counter_hook(module, input, output): batch_size = 1 if len(input) > 0: # Can have multiple inputs, getting the first one input = input[0] batch_size = len(input) else: warnings.warn('No positional inputs found for a module, ' 'assuming batch size is 1.') module.__batch_counter__ += batch_size def add_batch_counter_variables_or_reset(module): module.__batch_counter__ = 0 def add_batch_counter_hook_function(module): if hasattr(module, '__batch_counter_handle__'): return handle = module.register_forward_hook(batch_counter_hook) module.__batch_counter_handle__ = handle def remove_batch_counter_hook_function(module): if hasattr(module, '__batch_counter_handle__'): module.__batch_counter_handle__.remove() del module.__batch_counter_handle__ def add_flops_counter_variable_or_reset(module): if is_supported_instance(module): if hasattr(module, '__flops__') or hasattr(module, '__params__'): warnings.warn('variables __flops__ or __params__ are already ' 'defined for the module' + type(module).__name__ + ' ptflops can affect your code!') module.__flops__ = 0 module.__params__ = get_model_parameters_number(module) def is_supported_instance(module): if type(module) in get_modules_mapping(): return True return False def remove_flops_counter_hook_function(module): if is_supported_instance(module): if hasattr(module, '__flops_handle__'): module.__flops_handle__.remove() del module.__flops_handle__ def get_modules_mapping(): return { # convolutions nn.Conv1d: conv_flops_counter_hook, nn.Conv2d: conv_flops_counter_hook, mmcv.cnn.bricks.Conv2d: conv_flops_counter_hook, nn.Conv3d: conv_flops_counter_hook, mmcv.cnn.bricks.Conv3d: conv_flops_counter_hook, # activations nn.ReLU: relu_flops_counter_hook, nn.PReLU: relu_flops_counter_hook, nn.ELU: relu_flops_counter_hook, nn.LeakyReLU: relu_flops_counter_hook, nn.ReLU6: relu_flops_counter_hook, # poolings nn.MaxPool1d: pool_flops_counter_hook, nn.AvgPool1d: pool_flops_counter_hook, nn.AvgPool2d: pool_flops_counter_hook, nn.MaxPool2d: pool_flops_counter_hook, mmcv.cnn.bricks.MaxPool2d: pool_flops_counter_hook, nn.MaxPool3d: pool_flops_counter_hook, mmcv.cnn.bricks.MaxPool3d: pool_flops_counter_hook, nn.AvgPool3d: pool_flops_counter_hook, nn.AdaptiveMaxPool1d: pool_flops_counter_hook, nn.AdaptiveAvgPool1d: pool_flops_counter_hook, nn.AdaptiveMaxPool2d: pool_flops_counter_hook, nn.AdaptiveAvgPool2d: pool_flops_counter_hook, nn.AdaptiveMaxPool3d: pool_flops_counter_hook, nn.AdaptiveAvgPool3d: pool_flops_counter_hook, # normalizations nn.BatchNorm1d: norm_flops_counter_hook, nn.BatchNorm2d: norm_flops_counter_hook, nn.BatchNorm3d: norm_flops_counter_hook, nn.GroupNorm: norm_flops_counter_hook, nn.InstanceNorm1d: norm_flops_counter_hook, nn.InstanceNorm2d: norm_flops_counter_hook, nn.InstanceNorm3d: norm_flops_counter_hook, nn.LayerNorm: norm_flops_counter_hook, # FC nn.Linear: linear_flops_counter_hook, mmcv.cnn.bricks.Linear: linear_flops_counter_hook, # Upscale nn.Upsample: upsample_flops_counter_hook, # Deconvolution nn.ConvTranspose2d: deconv_flops_counter_hook, mmcv.cnn.bricks.ConvTranspose2d: deconv_flops_counter_hook, } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/utils/fuse_conv_bn.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch import torch.nn as nn def _fuse_conv_bn(conv, bn): """Fuse conv and bn into one module. Args: conv (nn.Module): Conv to be fused. bn (nn.Module): BN to be fused. Returns: nn.Module: Fused module. """ conv_w = conv.weight conv_b = conv.bias if conv.bias is not None else torch.zeros_like( bn.running_mean) factor = bn.weight / torch.sqrt(bn.running_var + bn.eps) conv.weight = nn.Parameter(conv_w * factor.reshape([conv.out_channels, 1, 1, 1])) conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias) return conv def fuse_conv_bn(module): """Recursively fuse conv and bn in a module. During inference, the functionary of batch norm layers is turned off but only the mean and var alone channels are used, which exposes the chance to fuse it with the preceding conv layers to save computations and simplify network structures. Args: module (nn.Module): Module to be fused. Returns: nn.Module: Fused module. """ last_conv = None last_conv_name = None for name, child in module.named_children(): if isinstance(child, (nn.modules.batchnorm._BatchNorm, nn.SyncBatchNorm)): if last_conv is None: # only fuse BN that is after Conv continue fused_conv = _fuse_conv_bn(last_conv, child) module._modules[last_conv_name] = fused_conv # To reduce changes, set BN as Identity instead of deleting it. module._modules[name] = nn.Identity() last_conv = None elif isinstance(child, nn.Conv2d): last_conv = child last_conv_name = name else: fuse_conv_bn(child) return module ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/utils/sync_bn.py ================================================ import torch import mmcv class _BatchNormXd(torch.nn.modules.batchnorm._BatchNorm): """A general BatchNorm layer without input dimension check. Reproduced from @kapily's work: (https://github.com/pytorch/pytorch/issues/41081#issuecomment-783961547) The only difference between BatchNorm1d, BatchNorm2d, BatchNorm3d, etc is `_check_input_dim` that is designed for tensor sanity checks. The check has been bypassed in this class for the convenience of converting SyncBatchNorm. """ def _check_input_dim(self, input): return def revert_sync_batchnorm(module): """Helper function to convert all `SyncBatchNorm` (SyncBN) and `mmcv.ops.sync_bn.SyncBatchNorm`(MMSyncBN) layers in the model to `BatchNormXd` layers. Adapted from @kapily's work: (https://github.com/pytorch/pytorch/issues/41081#issuecomment-783961547) Args: module (nn.Module): The module containing `SyncBatchNorm` layers. Returns: module_output: The converted module with `BatchNormXd` layers. """ module_output = module module_checklist = [torch.nn.modules.batchnorm.SyncBatchNorm] if hasattr(mmcv, 'ops'): module_checklist.append(mmcv.ops.SyncBatchNorm) if isinstance(module, tuple(module_checklist)): module_output = _BatchNormXd(module.num_features, module.eps, module.momentum, module.affine, module.track_running_stats) if module.affine: # no_grad() may not be needed here but # just to be consistent with `convert_sync_batchnorm()` with torch.no_grad(): module_output.weight = module.weight module_output.bias = module.bias module_output.running_mean = module.running_mean module_output.running_var = module.running_var module_output.num_batches_tracked = module.num_batches_tracked module_output.training = module.training # qconfig exists in quantized models if hasattr(module, 'qconfig'): module_output.qconfig = module.qconfig for name, child in module.named_children(): module_output.add_module(name, revert_sync_batchnorm(child)) del module return module_output ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/utils/weight_init.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import copy import math import warnings import numpy as np import torch import torch.nn as nn from torch import Tensor from mmcv.utils import Registry, build_from_cfg, print_log, get_logger INITIALIZERS = Registry('initializer') def update_init_info(module, init_info): """Update the `_params_init_info` in the module if the value of parameters are changed. Args: module (obj:`nn.Module`): The module of PyTorch with a user-defined attribute `_params_init_info` which records the initialization information. init_info (str): The string that describes the initialization. """ assert hasattr( module, '_params_init_info'), f'Can not find `_params_init_info` in {module}' for name, param in module.named_parameters(): assert param in module._params_init_info, ( f'Find a new :obj:`Parameter` ' f'named `{name}` during executing the ' f'`init_weights` of ' f'`{module.__class__.__name__}`. ' f'Please do not add or ' f'replace parameters during executing ' f'the `init_weights`. ') # The parameter has been changed during executing the # `init_weights` of module mean_value = param.data.mean() if module._params_init_info[param]['tmp_mean_value'] != mean_value: module._params_init_info[param]['init_info'] = init_info module._params_init_info[param]['tmp_mean_value'] = mean_value def constant_init(module, val, bias=0): if hasattr(module, 'weight') and module.weight is not None: nn.init.constant_(module.weight, val) if hasattr(module, 'bias') and module.bias is not None: nn.init.constant_(module.bias, bias) def xavier_init(module, gain=1, bias=0, distribution='normal'): assert distribution in ['uniform', 'normal'] if hasattr(module, 'weight') and module.weight is not None: if distribution == 'uniform': nn.init.xavier_uniform_(module.weight, gain=gain) else: nn.init.xavier_normal_(module.weight, gain=gain) if hasattr(module, 'bias') and module.bias is not None: nn.init.constant_(module.bias, bias) def normal_init(module, mean=0, std=1, bias=0): if hasattr(module, 'weight') and module.weight is not None: nn.init.normal_(module.weight, mean, std) if hasattr(module, 'bias') and module.bias is not None: nn.init.constant_(module.bias, bias) def trunc_normal_init(module: nn.Module, mean: float = 0, std: float = 1, a: float = -2, b: float = 2, bias: float = 0) -> None: if hasattr(module, 'weight') and module.weight is not None: trunc_normal_(module.weight, mean, std, a, b) # type: ignore if hasattr(module, 'bias') and module.bias is not None: nn.init.constant_(module.bias, bias) # type: ignore def uniform_init(module, a=0, b=1, bias=0): if hasattr(module, 'weight') and module.weight is not None: nn.init.uniform_(module.weight, a, b) if hasattr(module, 'bias') and module.bias is not None: nn.init.constant_(module.bias, bias) def kaiming_init(module, a=0, mode='fan_out', nonlinearity='relu', bias=0, distribution='normal'): assert distribution in ['uniform', 'normal'] if hasattr(module, 'weight') and module.weight is not None: if distribution == 'uniform': nn.init.kaiming_uniform_( module.weight, a=a, mode=mode, nonlinearity=nonlinearity) else: nn.init.kaiming_normal_( module.weight, a=a, mode=mode, nonlinearity=nonlinearity) if hasattr(module, 'bias') and module.bias is not None: nn.init.constant_(module.bias, bias) def caffe2_xavier_init(module, bias=0): # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch # Acknowledgment to FAIR's internal code kaiming_init( module, a=1, mode='fan_in', nonlinearity='leaky_relu', bias=bias, distribution='uniform') def bias_init_with_prob(prior_prob): """initialize conv/fc bias value according to a given probability value.""" bias_init = float(-np.log((1 - prior_prob) / prior_prob)) return bias_init def _get_bases_name(m): return [b.__name__ for b in m.__class__.__bases__] class BaseInit(object): def __init__(self, *, bias=0, bias_prob=None, layer=None): self.wholemodule = False if not isinstance(bias, (int, float)): raise TypeError(f'bias must be a number, but got a {type(bias)}') if bias_prob is not None: if not isinstance(bias_prob, float): raise TypeError(f'bias_prob type must be float, \ but got {type(bias_prob)}') if layer is not None: if not isinstance(layer, (str, list)): raise TypeError(f'layer must be a str or a list of str, \ but got a {type(layer)}') else: layer = [] if bias_prob is not None: self.bias = bias_init_with_prob(bias_prob) else: self.bias = bias self.layer = [layer] if isinstance(layer, str) else layer def _get_init_info(self): info = f'{self.__class__.__name__}, bias={self.bias}' return info @INITIALIZERS.register_module(name='Constant') class ConstantInit(BaseInit): """Initialize module parameters with constant values. Args: val (int | float): the value to fill the weights in the module with bias (int | float): the value to fill the bias. Defaults to 0. bias_prob (float, optional): the probability for bias initialization. Defaults to None. layer (str | list[str], optional): the layer will be initialized. Defaults to None. """ def __init__(self, val, **kwargs): super().__init__(**kwargs) self.val = val def __call__(self, module): def init(m): if self.wholemodule: constant_init(m, self.val, self.bias) else: layername = m.__class__.__name__ basesname = _get_bases_name(m) if len(set(self.layer) & set([layername] + basesname)): constant_init(m, self.val, self.bias) module.apply(init) if hasattr(module, '_params_init_info'): update_init_info(module, init_info=self._get_init_info()) def _get_init_info(self): info = f'{self.__class__.__name__}: val={self.val}, bias={self.bias}' return info @INITIALIZERS.register_module(name='Xavier') class XavierInit(BaseInit): r"""Initialize module parameters with values according to the method described in `Understanding the difficulty of training deep feedforward neural networks - Glorot, X. & Bengio, Y. (2010). `_ Args: gain (int | float): an optional scaling factor. Defaults to 1. bias (int | float): the value to fill the bias. Defaults to 0. bias_prob (float, optional): the probability for bias initialization. Defaults to None. distribution (str): distribution either be ``'normal'`` or ``'uniform'``. Defaults to ``'normal'``. layer (str | list[str], optional): the layer will be initialized. Defaults to None. """ def __init__(self, gain=1, distribution='normal', **kwargs): super().__init__(**kwargs) self.gain = gain self.distribution = distribution def __call__(self, module): def init(m): if self.wholemodule: xavier_init(m, self.gain, self.bias, self.distribution) else: layername = m.__class__.__name__ basesname = _get_bases_name(m) if len(set(self.layer) & set([layername] + basesname)): xavier_init(m, self.gain, self.bias, self.distribution) module.apply(init) if hasattr(module, '_params_init_info'): update_init_info(module, init_info=self._get_init_info()) def _get_init_info(self): info = f'{self.__class__.__name__}: gain={self.gain}, ' \ f'distribution={self.distribution}, bias={self.bias}' return info @INITIALIZERS.register_module(name='Normal') class NormalInit(BaseInit): r"""Initialize module parameters with the values drawn from the normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`. Args: mean (int | float):the mean of the normal distribution. Defaults to 0. std (int | float): the standard deviation of the normal distribution. Defaults to 1. bias (int | float): the value to fill the bias. Defaults to 0. bias_prob (float, optional): the probability for bias initialization. Defaults to None. layer (str | list[str], optional): the layer will be initialized. Defaults to None. """ def __init__(self, mean=0, std=1, **kwargs): super().__init__(**kwargs) self.mean = mean self.std = std def __call__(self, module): def init(m): if self.wholemodule: normal_init(m, self.mean, self.std, self.bias) else: layername = m.__class__.__name__ basesname = _get_bases_name(m) if len(set(self.layer) & set([layername] + basesname)): normal_init(m, self.mean, self.std, self.bias) module.apply(init) if hasattr(module, '_params_init_info'): update_init_info(module, init_info=self._get_init_info()) def _get_init_info(self): info = f'{self.__class__.__name__}: mean={self.mean},' \ f' std={self.std}, bias={self.bias}' return info @INITIALIZERS.register_module(name='TruncNormal') class TruncNormalInit(BaseInit): r"""Initialize module parameters with the values drawn from the normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` with values outside :math:`[a, b]`. Args: mean (float): the mean of the normal distribution. Defaults to 0. std (float): the standard deviation of the normal distribution. Defaults to 1. a (float): The minimum cutoff value. b ( float): The maximum cutoff value. bias (float): the value to fill the bias. Defaults to 0. bias_prob (float, optional): the probability for bias initialization. Defaults to None. layer (str | list[str], optional): the layer will be initialized. Defaults to None. """ def __init__(self, mean: float = 0, std: float = 1, a: float = -2, b: float = 2, **kwargs) -> None: super().__init__(**kwargs) self.mean = mean self.std = std self.a = a self.b = b def __call__(self, module: nn.Module) -> None: def init(m): if self.wholemodule: trunc_normal_init(m, self.mean, self.std, self.a, self.b, self.bias) else: layername = m.__class__.__name__ basesname = _get_bases_name(m) if len(set(self.layer) & set([layername] + basesname)): trunc_normal_init(m, self.mean, self.std, self.a, self.b, self.bias) module.apply(init) if hasattr(module, '_params_init_info'): update_init_info(module, init_info=self._get_init_info()) def _get_init_info(self): info = f'{self.__class__.__name__}: a={self.a}, b={self.b},' \ f' mean={self.mean}, std={self.std}, bias={self.bias}' return info @INITIALIZERS.register_module(name='Uniform') class UniformInit(BaseInit): r"""Initialize module parameters with values drawn from the uniform distribution :math:`\mathcal{U}(a, b)`. Args: a (int | float): the lower bound of the uniform distribution. Defaults to 0. b (int | float): the upper bound of the uniform distribution. Defaults to 1. bias (int | float): the value to fill the bias. Defaults to 0. bias_prob (float, optional): the probability for bias initialization. Defaults to None. layer (str | list[str], optional): the layer will be initialized. Defaults to None. """ def __init__(self, a=0, b=1, **kwargs): super().__init__(**kwargs) self.a = a self.b = b def __call__(self, module): def init(m): if self.wholemodule: uniform_init(m, self.a, self.b, self.bias) else: layername = m.__class__.__name__ basesname = _get_bases_name(m) if len(set(self.layer) & set([layername] + basesname)): uniform_init(m, self.a, self.b, self.bias) module.apply(init) if hasattr(module, '_params_init_info'): update_init_info(module, init_info=self._get_init_info()) def _get_init_info(self): info = f'{self.__class__.__name__}: a={self.a},' \ f' b={self.b}, bias={self.bias}' return info @INITIALIZERS.register_module(name='Kaiming') class KaimingInit(BaseInit): r"""Initialize module parameters with the values according to the method described in `Delving deep into rectifiers: Surpassing human-level performance on ImageNet classification - He, K. et al. (2015). `_ Args: a (int | float): the negative slope of the rectifier used after this layer (only used with ``'leaky_relu'``). Defaults to 0. mode (str): either ``'fan_in'`` or ``'fan_out'``. Choosing ``'fan_in'`` preserves the magnitude of the variance of the weights in the forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the backwards pass. Defaults to ``'fan_out'``. nonlinearity (str): the non-linear function (`nn.functional` name), recommended to use only with ``'relu'`` or ``'leaky_relu'`` . Defaults to 'relu'. bias (int | float): the value to fill the bias. Defaults to 0. bias_prob (float, optional): the probability for bias initialization. Defaults to None. distribution (str): distribution either be ``'normal'`` or ``'uniform'``. Defaults to ``'normal'``. layer (str | list[str], optional): the layer will be initialized. Defaults to None. """ def __init__(self, a=0, mode='fan_out', nonlinearity='relu', distribution='normal', **kwargs): super().__init__(**kwargs) self.a = a self.mode = mode self.nonlinearity = nonlinearity self.distribution = distribution def __call__(self, module): def init(m): if self.wholemodule: kaiming_init(m, self.a, self.mode, self.nonlinearity, self.bias, self.distribution) else: layername = m.__class__.__name__ basesname = _get_bases_name(m) if len(set(self.layer) & set([layername] + basesname)): kaiming_init(m, self.a, self.mode, self.nonlinearity, self.bias, self.distribution) module.apply(init) if hasattr(module, '_params_init_info'): update_init_info(module, init_info=self._get_init_info()) def _get_init_info(self): info = f'{self.__class__.__name__}: a={self.a}, mode={self.mode}, ' \ f'nonlinearity={self.nonlinearity}, ' \ f'distribution ={self.distribution}, bias={self.bias}' return info @INITIALIZERS.register_module(name='Caffe2Xavier') class Caffe2XavierInit(KaimingInit): # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch # Acknowledgment to FAIR's internal code def __init__(self, **kwargs): super().__init__( a=1, mode='fan_in', nonlinearity='leaky_relu', distribution='uniform', **kwargs) def __call__(self, module): super().__call__(module) @INITIALIZERS.register_module(name='Pretrained') class PretrainedInit(object): """Initialize module by loading a pretrained model. Args: checkpoint (str): the checkpoint file of the pretrained model should be load. prefix (str, optional): the prefix of a sub-module in the pretrained model. it is for loading a part of the pretrained model to initialize. For example, if we would like to only load the backbone of a detector model, we can set ``prefix='backbone.'``. Defaults to None. map_location (str): map tensors into proper locations. """ def __init__(self, checkpoint, prefix=None, map_location=None): self.checkpoint = checkpoint self.prefix = prefix self.map_location = map_location def __call__(self, module): from mmcv.runner import (_load_checkpoint_with_prefix, load_checkpoint, load_state_dict) logger = get_logger() if self.prefix is None: print_log(f'load model from: {self.checkpoint}', logger=logger) load_checkpoint( module, self.checkpoint, map_location=self.map_location, strict=False, logger=logger) else: print_log( f'load {self.prefix} in model from: {self.checkpoint}', logger=logger) state_dict = _load_checkpoint_with_prefix( self.prefix, self.checkpoint, map_location=self.map_location) load_state_dict(module, state_dict, strict=False, logger=logger) if hasattr(module, '_params_init_info'): update_init_info(module, init_info=self._get_init_info()) def _get_init_info(self): info = f'{self.__class__.__name__}: load from {self.checkpoint}' return info def _initialize(module, cfg, wholemodule=False): func = build_from_cfg(cfg, INITIALIZERS) # wholemodule flag is for override mode, there is no layer key in override # and initializer will give init values for the whole module with the name # in override. func.wholemodule = wholemodule func(module) def _initialize_override(module, override, cfg): if not isinstance(override, (dict, list)): raise TypeError(f'override must be a dict or a list of dict, \ but got {type(override)}') override = [override] if isinstance(override, dict) else override for override_ in override: cp_override = copy.deepcopy(override_) name = cp_override.pop('name', None) if name is None: raise ValueError('`override` must contain the key "name",' f'but got {cp_override}') # if override only has name key, it means use args in init_cfg if not cp_override: cp_override.update(cfg) # if override has name key and other args except type key, it will # raise error elif 'type' not in cp_override.keys(): raise ValueError( f'`override` need "type" key, but got {cp_override}') if hasattr(module, name): _initialize(getattr(module, name), cp_override, wholemodule=True) else: raise RuntimeError(f'module did not have attribute {name}, ' f'but init_cfg is {cp_override}.') def initialize(module, init_cfg): r"""Initialize a module. Args: module (``torch.nn.Module``): the module will be initialized. init_cfg (dict | list[dict]): initialization configuration dict to define initializer. OpenMMLab has implemented 6 initializers including ``Constant``, ``Xavier``, ``Normal``, ``Uniform``, ``Kaiming``, and ``Pretrained``. Example: >>> module = nn.Linear(2, 3, bias=True) >>> init_cfg = dict(type='Constant', layer='Linear', val =1 , bias =2) >>> initialize(module, init_cfg) >>> module = nn.Sequential(nn.Conv1d(3, 1, 3), nn.Linear(1,2)) >>> # define key ``'layer'`` for initializing layer with different >>> # configuration >>> init_cfg = [dict(type='Constant', layer='Conv1d', val=1), dict(type='Constant', layer='Linear', val=2)] >>> initialize(module, init_cfg) >>> # define key``'override'`` to initialize some specific part in >>> # module >>> class FooNet(nn.Module): >>> def __init__(self): >>> super().__init__() >>> self.feat = nn.Conv2d(3, 16, 3) >>> self.reg = nn.Conv2d(16, 10, 3) >>> self.cls = nn.Conv2d(16, 5, 3) >>> model = FooNet() >>> init_cfg = dict(type='Constant', val=1, bias=2, layer='Conv2d', >>> override=dict(type='Constant', name='reg', val=3, bias=4)) >>> initialize(model, init_cfg) >>> model = ResNet(depth=50) >>> # Initialize weights with the pretrained model. >>> init_cfg = dict(type='Pretrained', checkpoint='torchvision://resnet50') >>> initialize(model, init_cfg) >>> # Initialize weights of a sub-module with the specific part of >>> # a pretrained model by using "prefix". >>> url = 'http://download.openmmlab.com/mmdetection/v2.0/retinanet/'\ >>> 'retinanet_r50_fpn_1x_coco/'\ >>> 'retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth' >>> init_cfg = dict(type='Pretrained', checkpoint=url, prefix='backbone.') """ if not isinstance(init_cfg, (dict, list)): raise TypeError(f'init_cfg must be a dict or a list of dict, \ but got {type(init_cfg)}') if isinstance(init_cfg, dict): init_cfg = [init_cfg] for cfg in init_cfg: # should deeply copy the original config because cfg may be used by # other modules, e.g., one init_cfg shared by multiple bottleneck # blocks, the expected cfg will be changed after pop and will change # the initialization behavior of other modules cp_cfg = copy.deepcopy(cfg) override = cp_cfg.pop('override', None) _initialize(module, cp_cfg) if override is not None: cp_cfg.pop('layer', None) _initialize_override(module, override, cp_cfg) else: # All attributes in module have same initialization. pass def _no_grad_trunc_normal_(tensor: Tensor, mean: float, std: float, a: float, b: float) -> Tensor: # Method based on # https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf # Modified from # https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py def norm_cdf(x): # Computes standard normal cumulative distribution function return (1. + math.erf(x / math.sqrt(2.))) / 2. if (mean < a - 2 * std) or (mean > b + 2 * std): warnings.warn( 'mean is more than 2 std from [a, b] in nn.init.trunc_normal_. ' 'The distribution of values may be incorrect.', stacklevel=2) with torch.no_grad(): # Values are generated by using a truncated uniform distribution and # then using the inverse CDF for the normal distribution. # Get upper and lower cdf values lower = norm_cdf((a - mean) / std) upper = norm_cdf((b - mean) / std) # Uniformly fill tensor with values from [lower, upper], then translate # to [2lower-1, 2upper-1]. tensor.uniform_(2 * lower - 1, 2 * upper - 1) # Use inverse cdf transform for normal distribution to get truncated # standard normal tensor.erfinv_() # Transform to proper mean, std tensor.mul_(std * math.sqrt(2.)) tensor.add_(mean) # Clamp to ensure it's in the proper range tensor.clamp_(min=a, max=b) return tensor def trunc_normal_(tensor: Tensor, mean: float = 0., std: float = 1., a: float = -2., b: float = 2.) -> Tensor: r"""Fills the input Tensor with values drawn from a truncated normal distribution. The values are effectively drawn from the normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` with values outside :math:`[a, b]` redrawn until they are within the bounds. The method used for generating the random values works best when :math:`a \leq \text{mean} \leq b`. Modified from https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py Args: tensor (``torch.Tensor``): an n-dimensional `torch.Tensor`. mean (float): the mean of the normal distribution. std (float): the standard deviation of the normal distribution. a (float): the minimum cutoff value. b (float): the maximum cutoff value. """ return _no_grad_trunc_normal_(tensor, mean, std, a, b) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/vgg.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import logging import torch.nn as nn from .utils import constant_init, kaiming_init, normal_init def conv3x3(in_planes, out_planes, dilation=1): """3x3 convolution with padding.""" return nn.Conv2d( in_planes, out_planes, kernel_size=3, padding=dilation, dilation=dilation) def make_vgg_layer(inplanes, planes, num_blocks, dilation=1, with_bn=False, ceil_mode=False): layers = [] for _ in range(num_blocks): layers.append(conv3x3(inplanes, planes, dilation)) if with_bn: layers.append(nn.BatchNorm2d(planes)) layers.append(nn.ReLU(inplace=True)) inplanes = planes layers.append(nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=ceil_mode)) return layers class VGG(nn.Module): """VGG backbone. Args: depth (int): Depth of vgg, from {11, 13, 16, 19}. with_bn (bool): Use BatchNorm or not. num_classes (int): number of classes for classification. num_stages (int): VGG stages, normally 5. dilations (Sequence[int]): Dilation of each stage. out_indices (Sequence[int]): Output from which stages. frozen_stages (int): Stages to be frozen (all param fixed). -1 means not freezing any parameters. bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze running stats (mean and var). bn_frozen (bool): Whether to freeze weight and bias of BN layers. """ arch_settings = { 11: (1, 1, 2, 2, 2), 13: (2, 2, 2, 2, 2), 16: (2, 2, 3, 3, 3), 19: (2, 2, 4, 4, 4) } def __init__(self, depth, with_bn=False, num_classes=-1, num_stages=5, dilations=(1, 1, 1, 1, 1), out_indices=(0, 1, 2, 3, 4), frozen_stages=-1, bn_eval=True, bn_frozen=False, ceil_mode=False, with_last_pool=True): super(VGG, self).__init__() if depth not in self.arch_settings: raise KeyError(f'invalid depth {depth} for vgg') assert num_stages >= 1 and num_stages <= 5 stage_blocks = self.arch_settings[depth] self.stage_blocks = stage_blocks[:num_stages] assert len(dilations) == num_stages assert max(out_indices) <= num_stages self.num_classes = num_classes self.out_indices = out_indices self.frozen_stages = frozen_stages self.bn_eval = bn_eval self.bn_frozen = bn_frozen self.inplanes = 3 start_idx = 0 vgg_layers = [] self.range_sub_modules = [] for i, num_blocks in enumerate(self.stage_blocks): num_modules = num_blocks * (2 + with_bn) + 1 end_idx = start_idx + num_modules dilation = dilations[i] planes = 64 * 2**i if i < 4 else 512 vgg_layer = make_vgg_layer( self.inplanes, planes, num_blocks, dilation=dilation, with_bn=with_bn, ceil_mode=ceil_mode) vgg_layers.extend(vgg_layer) self.inplanes = planes self.range_sub_modules.append([start_idx, end_idx]) start_idx = end_idx if not with_last_pool: vgg_layers.pop(-1) self.range_sub_modules[-1][1] -= 1 self.module_name = 'features' self.add_module(self.module_name, nn.Sequential(*vgg_layers)) if self.num_classes > 0: self.classifier = nn.Sequential( nn.Linear(512 * 7 * 7, 4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096, num_classes), ) def init_weights(self, pretrained=None): if isinstance(pretrained, str): logger = logging.getLogger() from ..runner import load_checkpoint load_checkpoint(self, pretrained, strict=False, logger=logger) elif pretrained is None: for m in self.modules(): if isinstance(m, nn.Conv2d): kaiming_init(m) elif isinstance(m, nn.BatchNorm2d): constant_init(m, 1) elif isinstance(m, nn.Linear): normal_init(m, std=0.01) else: raise TypeError('pretrained must be a str or None') def forward(self, x): outs = [] vgg_layers = getattr(self, self.module_name) for i in range(len(self.stage_blocks)): for j in range(*self.range_sub_modules[i]): vgg_layer = vgg_layers[j] x = vgg_layer(x) if i in self.out_indices: outs.append(x) if self.num_classes > 0: x = x.view(x.size(0), -1) x = self.classifier(x) outs.append(x) if len(outs) == 1: return outs[0] else: return tuple(outs) def train(self, mode=True): super(VGG, self).train(mode) if self.bn_eval: for m in self.modules(): if isinstance(m, nn.BatchNorm2d): m.eval() if self.bn_frozen: for params in m.parameters(): params.requires_grad = False vgg_layers = getattr(self, self.module_name) if mode and self.frozen_stages >= 0: for i in range(self.frozen_stages): for j in range(*self.range_sub_modules[i]): mod = vgg_layers[j] mod.eval() for param in mod.parameters(): param.requires_grad = False ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/engine/__init__.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from .test import (collect_results_cpu, collect_results_gpu, multi_gpu_test, single_gpu_test) __all__ = [ 'collect_results_cpu', 'collect_results_gpu', 'multi_gpu_test', 'single_gpu_test' ] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/engine/test.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import os.path as osp import pickle import shutil import tempfile import time import torch import torch.distributed as dist import mmcv from mmcv.runner import get_dist_info def single_gpu_test(model, data_loader): """Test model with a single gpu. This method tests model with a single gpu and displays test progress bar. Args: model (nn.Module): Model to be tested. data_loader (nn.Dataloader): Pytorch data loader. Returns: list: The prediction results. """ model.eval() results = [] dataset = data_loader.dataset prog_bar = mmcv.ProgressBar(len(dataset)) for data in data_loader: with torch.no_grad(): result = model(return_loss=False, **data) results.extend(result) # Assume result has the same length of batch_size # refer to https://github.com/open-mmlab/mmcv/issues/985 batch_size = len(result) for _ in range(batch_size): prog_bar.update() return results def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False): """Test model with multiple gpus. This method tests model with multiple gpus and collects the results under two different modes: gpu and cpu modes. By setting ``gpu_collect=True``, it encodes results to gpu tensors and use gpu communication for results collection. On cpu mode it saves the results on different gpus to ``tmpdir`` and collects them by the rank 0 worker. Args: model (nn.Module): Model to be tested. data_loader (nn.Dataloader): Pytorch data loader. tmpdir (str): Path of directory to save the temporary results from different gpus under cpu mode. gpu_collect (bool): Option to use either gpu or cpu to collect results. Returns: list: The prediction results. """ model.eval() results = [] dataset = data_loader.dataset rank, world_size = get_dist_info() if rank == 0: prog_bar = mmcv.ProgressBar(len(dataset)) time.sleep(2) # This line can prevent deadlock problem in some cases. for i, data in enumerate(data_loader): with torch.no_grad(): result = model(return_loss=False, **data) results.extend(result) if rank == 0: batch_size = len(result) batch_size_all = batch_size * world_size if batch_size_all + prog_bar.completed > len(dataset): batch_size_all = len(dataset) - prog_bar.completed for _ in range(batch_size_all): prog_bar.update() # collect results from all ranks if gpu_collect: results = collect_results_gpu(results, len(dataset)) else: results = collect_results_cpu(results, len(dataset), tmpdir) return results def collect_results_cpu(result_part, size, tmpdir=None): """Collect results under cpu mode. On cpu mode, this function will save the results on different gpus to ``tmpdir`` and collect them by the rank 0 worker. Args: result_part (list): Result list containing result parts to be collected. size (int): Size of the results, commonly equal to length of the results. tmpdir (str | None): temporal directory for collected results to store. If set to None, it will create a random temporal directory for it. Returns: list: The collected results. """ rank, world_size = get_dist_info() # create a tmp dir if it is not specified if tmpdir is None: MAX_LEN = 512 # 32 is whitespace dir_tensor = torch.full((MAX_LEN, ), 32, dtype=torch.uint8, device='cuda') if rank == 0: mmcv.mkdir_or_exist('.dist_test') tmpdir = tempfile.mkdtemp(dir='.dist_test') tmpdir = torch.tensor( bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda') dir_tensor[:len(tmpdir)] = tmpdir dist.broadcast(dir_tensor, 0) tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip() else: mmcv.mkdir_or_exist(tmpdir) # dump the part result to the dir mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl')) dist.barrier() # collect all parts if rank != 0: return None else: # load results of all parts from tmp dir part_list = [] for i in range(world_size): part_file = osp.join(tmpdir, f'part_{i}.pkl') part_result = mmcv.load(part_file) # When data is severely insufficient, an empty part_result # on a certain gpu could makes the overall outputs empty. if part_result: part_list.append(part_result) # sort the results ordered_results = [] for res in zip(*part_list): ordered_results.extend(list(res)) # the dataloader may pad some samples ordered_results = ordered_results[:size] # remove tmp dir shutil.rmtree(tmpdir) return ordered_results def collect_results_gpu(result_part, size): """Collect results under gpu mode. On gpu mode, this function will encode results to gpu tensors and use gpu communication for results collection. Args: result_part (list): Result list containing result parts to be collected. size (int): Size of the results, commonly equal to length of the results. Returns: list: The collected results. """ rank, world_size = get_dist_info() # dump result part to tensor with pickle part_tensor = torch.tensor( bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda') # gather all result part tensor shape shape_tensor = torch.tensor(part_tensor.shape, device='cuda') shape_list = [shape_tensor.clone() for _ in range(world_size)] dist.all_gather(shape_list, shape_tensor) # padding result part tensor to max length shape_max = torch.tensor(shape_list).max() part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda') part_send[:shape_tensor[0]] = part_tensor part_recv_list = [ part_tensor.new_zeros(shape_max) for _ in range(world_size) ] # gather all result part dist.all_gather(part_recv_list, part_send) if rank == 0: part_list = [] for recv, shape in zip(part_recv_list, shape_list): part_result = pickle.loads(recv[:shape[0]].cpu().numpy().tobytes()) # When data is severely insufficient, an empty part_result # on a certain gpu could makes the overall outputs empty. if part_result: part_list.append(part_result) # sort the results ordered_results = [] for res in zip(*part_list): ordered_results.extend(list(res)) # the dataloader may pad some samples ordered_results = ordered_results[:size] return ordered_results ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/fileio/__init__.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from .file_client import BaseStorageBackend, FileClient from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler from .io import dump, load, register_handler from .parse import dict_from_file, list_from_file __all__ = [ 'BaseStorageBackend', 'FileClient', 'load', 'dump', 'register_handler', 'BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler', 'list_from_file', 'dict_from_file' ] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/fileio/file_client.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import inspect import os import os.path as osp import re import tempfile import warnings from abc import ABCMeta, abstractmethod from contextlib import contextmanager from pathlib import Path from typing import Iterable, Iterator, Optional, Tuple, Union from urllib.request import urlopen import mmcv from mmcv.utils.misc import has_method from mmcv.utils.path import is_filepath class BaseStorageBackend(metaclass=ABCMeta): """Abstract class of storage backends. All backends need to implement two apis: ``get()`` and ``get_text()``. ``get()`` reads the file as a byte stream and ``get_text()`` reads the file as texts. """ # a flag to indicate whether the backend can create a symlink for a file _allow_symlink = False @property def name(self): return self.__class__.__name__ @property def allow_symlink(self): return self._allow_symlink @abstractmethod def get(self, filepath): pass @abstractmethod def get_text(self, filepath): pass class CephBackend(BaseStorageBackend): """Ceph storage backend (for internal use). Args: path_mapping (dict|None): path mapping dict from local path to Petrel path. When ``path_mapping={'src': 'dst'}``, ``src`` in ``filepath`` will be replaced by ``dst``. Default: None. .. warning:: :class:`mmcv.fileio.file_client.CephBackend` will be deprecated, please use :class:`mmcv.fileio.file_client.PetrelBackend` instead. """ def __init__(self, path_mapping=None): try: import ceph except ImportError: raise ImportError('Please install ceph to enable CephBackend.') warnings.warn( 'CephBackend will be deprecated, please use PetrelBackend instead', DeprecationWarning) self._client = ceph.S3Client() assert isinstance(path_mapping, dict) or path_mapping is None self.path_mapping = path_mapping def get(self, filepath): filepath = str(filepath) if self.path_mapping is not None: for k, v in self.path_mapping.items(): filepath = filepath.replace(k, v) value = self._client.Get(filepath) value_buf = memoryview(value) return value_buf def get_text(self, filepath, encoding=None): raise NotImplementedError class PetrelBackend(BaseStorageBackend): """Petrel storage backend (for internal use). PetrelBackend supports reading and writing data to multiple clusters. If the file path contains the cluster name, PetrelBackend will read data from specified cluster or write data to it. Otherwise, PetrelBackend will access the default cluster. Args: path_mapping (dict, optional): Path mapping dict from local path to Petrel path. When ``path_mapping={'src': 'dst'}``, ``src`` in ``filepath`` will be replaced by ``dst``. Default: None. enable_mc (bool, optional): Whether to enable memcached support. Default: True. Examples: >>> filepath1 = 's3://path/of/file' >>> filepath2 = 'cluster-name:s3://path/of/file' >>> client = PetrelBackend() >>> client.get(filepath1) # get data from default cluster >>> client.get(filepath2) # get data from 'cluster-name' cluster """ def __init__(self, path_mapping: Optional[dict] = None, enable_mc: bool = True): try: from petrel_client import client except ImportError: raise ImportError('Please install petrel_client to enable ' 'PetrelBackend.') self._client = client.Client(enable_mc=enable_mc) assert isinstance(path_mapping, dict) or path_mapping is None self.path_mapping = path_mapping def _map_path(self, filepath: Union[str, Path]) -> str: """Map ``filepath`` to a string path whose prefix will be replaced by :attr:`self.path_mapping`. Args: filepath (str): Path to be mapped. """ filepath = str(filepath) if self.path_mapping is not None: for k, v in self.path_mapping.items(): filepath = filepath.replace(k, v) return filepath def _format_path(self, filepath: str) -> str: """Convert a ``filepath`` to standard format of petrel oss. If the ``filepath`` is concatenated by ``os.path.join``, in a Windows environment, the ``filepath`` will be the format of 's3://bucket_name\\image.jpg'. By invoking :meth:`_format_path`, the above ``filepath`` will be converted to 's3://bucket_name/image.jpg'. Args: filepath (str): Path to be formatted. """ return re.sub(r'\\+', '/', filepath) def get(self, filepath: Union[str, Path]) -> memoryview: """Read data from a given ``filepath`` with 'rb' mode. Args: filepath (str or Path): Path to read data. Returns: memoryview: A memory view of expected bytes object to avoid copying. The memoryview object can be converted to bytes by ``value_buf.tobytes()``. """ filepath = self._map_path(filepath) filepath = self._format_path(filepath) value = self._client.Get(filepath) value_buf = memoryview(value) return value_buf def get_text(self, filepath: Union[str, Path], encoding: str = 'utf-8') -> str: """Read data from a given ``filepath`` with 'r' mode. Args: filepath (str or Path): Path to read data. encoding (str): The encoding format used to open the ``filepath``. Default: 'utf-8'. Returns: str: Expected text reading from ``filepath``. """ return str(self.get(filepath), encoding=encoding) def put(self, obj: bytes, filepath: Union[str, Path]) -> None: """Save data to a given ``filepath``. Args: obj (bytes): Data to be saved. filepath (str or Path): Path to write data. """ filepath = self._map_path(filepath) filepath = self._format_path(filepath) self._client.put(filepath, obj) def put_text(self, obj: str, filepath: Union[str, Path], encoding: str = 'utf-8') -> None: """Save data to a given ``filepath``. Args: obj (str): Data to be written. filepath (str or Path): Path to write data. encoding (str): The encoding format used to encode the ``obj``. Default: 'utf-8'. """ self.put(bytes(obj, encoding=encoding), filepath) def remove(self, filepath: Union[str, Path]) -> None: """Remove a file. Args: filepath (str or Path): Path to be removed. """ if not has_method(self._client, 'delete'): raise NotImplementedError( ('Current version of Petrel Python SDK has not supported ' 'the `delete` method, please use a higher version or dev' ' branch instead.')) filepath = self._map_path(filepath) filepath = self._format_path(filepath) self._client.delete(filepath) def exists(self, filepath: Union[str, Path]) -> bool: """Check whether a file path exists. Args: filepath (str or Path): Path to be checked whether exists. Returns: bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. """ if not (has_method(self._client, 'contains') and has_method(self._client, 'isdir')): raise NotImplementedError( ('Current version of Petrel Python SDK has not supported ' 'the `contains` and `isdir` methods, please use a higher' 'version or dev branch instead.')) filepath = self._map_path(filepath) filepath = self._format_path(filepath) return self._client.contains(filepath) or self._client.isdir(filepath) def isdir(self, filepath: Union[str, Path]) -> bool: """Check whether a file path is a directory. Args: filepath (str or Path): Path to be checked whether it is a directory. Returns: bool: Return ``True`` if ``filepath`` points to a directory, ``False`` otherwise. """ if not has_method(self._client, 'isdir'): raise NotImplementedError( ('Current version of Petrel Python SDK has not supported ' 'the `isdir` method, please use a higher version or dev' ' branch instead.')) filepath = self._map_path(filepath) filepath = self._format_path(filepath) return self._client.isdir(filepath) def isfile(self, filepath: Union[str, Path]) -> bool: """Check whether a file path is a file. Args: filepath (str or Path): Path to be checked whether it is a file. Returns: bool: Return ``True`` if ``filepath`` points to a file, ``False`` otherwise. """ if not has_method(self._client, 'contains'): raise NotImplementedError( ('Current version of Petrel Python SDK has not supported ' 'the `contains` method, please use a higher version or ' 'dev branch instead.')) filepath = self._map_path(filepath) filepath = self._format_path(filepath) return self._client.contains(filepath) def join_path(self, filepath: Union[str, Path], *filepaths: Union[str, Path]) -> str: """Concatenate all file paths. Args: filepath (str or Path): Path to be concatenated. Returns: str: The result after concatenation. """ filepath = self._format_path(self._map_path(filepath)) if filepath.endswith('/'): filepath = filepath[:-1] formatted_paths = [filepath] for path in filepaths: formatted_paths.append(self._format_path(self._map_path(path))) return '/'.join(formatted_paths) @contextmanager def get_local_path(self, filepath: Union[str, Path]) -> Iterable[str]: """Download a file from ``filepath`` and return a temporary path. ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It can be called with ``with`` statement, and when exists from the ``with`` statement, the temporary path will be released. Args: filepath (str | Path): Download a file from ``filepath``. Examples: >>> client = PetrelBackend() >>> # After existing from the ``with`` clause, >>> # the path will be removed >>> with client.get_local_path('s3://path/of/your/file') as path: ... # do something here Yields: Iterable[str]: Only yield one temporary path. """ filepath = self._map_path(filepath) filepath = self._format_path(filepath) assert self.isfile(filepath) try: f = tempfile.NamedTemporaryFile(delete=False) f.write(self.get(filepath)) f.close() yield f.name finally: os.remove(f.name) def list_dir_or_file(self, dir_path: Union[str, Path], list_dir: bool = True, list_file: bool = True, suffix: Optional[Union[str, Tuple[str]]] = None, recursive: bool = False) -> Iterator[str]: """Scan a directory to find the interested directories or files in arbitrary order. Note: Petrel has no concept of directories but it simulates the directory hierarchy in the filesystem through public prefixes. In addition, if the returned path ends with '/', it means the path is a public prefix which is a logical directory. Note: :meth:`list_dir_or_file` returns the path relative to ``dir_path``. In addition, the returned path of directory will not contains the suffix '/' which is consistent with other backends. Args: dir_path (str | Path): Path of the directory. list_dir (bool): List the directories. Default: True. list_file (bool): List the path of files. Default: True. suffix (str or tuple[str], optional): File suffix that we are interested in. Default: None. recursive (bool): If set to True, recursively scan the directory. Default: False. Yields: Iterable[str]: A relative path to ``dir_path``. """ if not has_method(self._client, 'list'): raise NotImplementedError( ('Current version of Petrel Python SDK has not supported ' 'the `list` method, please use a higher version or dev' ' branch instead.')) dir_path = self._map_path(dir_path) dir_path = self._format_path(dir_path) if list_dir and suffix is not None: raise TypeError( '`list_dir` should be False when `suffix` is not None') if (suffix is not None) and not isinstance(suffix, (str, tuple)): raise TypeError('`suffix` must be a string or tuple of strings') # Petrel's simulated directory hierarchy assumes that directory paths # should end with `/` if not dir_path.endswith('/'): dir_path += '/' root = dir_path def _list_dir_or_file(dir_path, list_dir, list_file, suffix, recursive): for path in self._client.list(dir_path): # the `self.isdir` is not used here to determine whether path # is a directory, because `self.isdir` relies on # `self._client.list` if path.endswith('/'): # a directory path next_dir_path = self.join_path(dir_path, path) if list_dir: # get the relative path and exclude the last # character '/' rel_dir = next_dir_path[len(root):-1] yield rel_dir if recursive: yield from _list_dir_or_file(next_dir_path, list_dir, list_file, suffix, recursive) else: # a file path absolute_path = self.join_path(dir_path, path) rel_path = absolute_path[len(root):] if (suffix is None or rel_path.endswith(suffix)) and list_file: yield rel_path return _list_dir_or_file(dir_path, list_dir, list_file, suffix, recursive) class MemcachedBackend(BaseStorageBackend): """Memcached storage backend. Attributes: server_list_cfg (str): Config file for memcached server list. client_cfg (str): Config file for memcached client. sys_path (str | None): Additional path to be appended to `sys.path`. Default: None. """ def __init__(self, server_list_cfg, client_cfg, sys_path=None): if sys_path is not None: import sys sys.path.append(sys_path) try: import mc except ImportError: raise ImportError( 'Please install memcached to enable MemcachedBackend.') self.server_list_cfg = server_list_cfg self.client_cfg = client_cfg self._client = mc.MemcachedClient.GetInstance(self.server_list_cfg, self.client_cfg) # mc.pyvector servers as a point which points to a memory cache self._mc_buffer = mc.pyvector() def get(self, filepath): filepath = str(filepath) import mc self._client.Get(filepath, self._mc_buffer) value_buf = mc.ConvertBuffer(self._mc_buffer) return value_buf def get_text(self, filepath, encoding=None): raise NotImplementedError class LmdbBackend(BaseStorageBackend): """Lmdb storage backend. Args: db_path (str): Lmdb database path. readonly (bool, optional): Lmdb environment parameter. If True, disallow any write operations. Default: True. lock (bool, optional): Lmdb environment parameter. If False, when concurrent access occurs, do not lock the database. Default: False. readahead (bool, optional): Lmdb environment parameter. If False, disable the OS filesystem readahead mechanism, which may improve random read performance when a database is larger than RAM. Default: False. Attributes: db_path (str): Lmdb database path. """ def __init__(self, db_path, readonly=True, lock=False, readahead=False, **kwargs): try: import lmdb except ImportError: raise ImportError('Please install lmdb to enable LmdbBackend.') self.db_path = str(db_path) self._client = lmdb.open( self.db_path, readonly=readonly, lock=lock, readahead=readahead, **kwargs) def get(self, filepath): """Get values according to the filepath. Args: filepath (str | obj:`Path`): Here, filepath is the lmdb key. """ filepath = str(filepath) with self._client.begin(write=False) as txn: value_buf = txn.get(filepath.encode('ascii')) return value_buf def get_text(self, filepath, encoding=None): raise NotImplementedError class HardDiskBackend(BaseStorageBackend): """Raw hard disks storage backend.""" _allow_symlink = True def get(self, filepath: Union[str, Path]) -> bytes: """Read data from a given ``filepath`` with 'rb' mode. Args: filepath (str or Path): Path to read data. Returns: bytes: Expected bytes object. """ with open(filepath, 'rb') as f: value_buf = f.read() return value_buf def get_text(self, filepath: Union[str, Path], encoding: str = 'utf-8') -> str: """Read data from a given ``filepath`` with 'r' mode. Args: filepath (str or Path): Path to read data. encoding (str): The encoding format used to open the ``filepath``. Default: 'utf-8'. Returns: str: Expected text reading from ``filepath``. """ with open(filepath, 'r', encoding=encoding) as f: value_buf = f.read() return value_buf def put(self, obj: bytes, filepath: Union[str, Path]) -> None: """Write data to a given ``filepath`` with 'wb' mode. Note: ``put`` will create a directory if the directory of ``filepath`` does not exist. Args: obj (bytes): Data to be written. filepath (str or Path): Path to write data. """ mmcv.mkdir_or_exist(osp.dirname(filepath)) with open(filepath, 'wb') as f: f.write(obj) def put_text(self, obj: str, filepath: Union[str, Path], encoding: str = 'utf-8') -> None: """Write data to a given ``filepath`` with 'w' mode. Note: ``put_text`` will create a directory if the directory of ``filepath`` does not exist. Args: obj (str): Data to be written. filepath (str or Path): Path to write data. encoding (str): The encoding format used to open the ``filepath``. Default: 'utf-8'. """ mmcv.mkdir_or_exist(osp.dirname(filepath)) with open(filepath, 'w', encoding=encoding) as f: f.write(obj) def remove(self, filepath: Union[str, Path]) -> None: """Remove a file. Args: filepath (str or Path): Path to be removed. """ os.remove(filepath) def exists(self, filepath: Union[str, Path]) -> bool: """Check whether a file path exists. Args: filepath (str or Path): Path to be checked whether exists. Returns: bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. """ return osp.exists(filepath) def isdir(self, filepath: Union[str, Path]) -> bool: """Check whether a file path is a directory. Args: filepath (str or Path): Path to be checked whether it is a directory. Returns: bool: Return ``True`` if ``filepath`` points to a directory, ``False`` otherwise. """ return osp.isdir(filepath) def isfile(self, filepath: Union[str, Path]) -> bool: """Check whether a file path is a file. Args: filepath (str or Path): Path to be checked whether it is a file. Returns: bool: Return ``True`` if ``filepath`` points to a file, ``False`` otherwise. """ return osp.isfile(filepath) def join_path(self, filepath: Union[str, Path], *filepaths: Union[str, Path]) -> str: """Concatenate all file paths. Join one or more filepath components intelligently. The return value is the concatenation of filepath and any members of *filepaths. Args: filepath (str or Path): Path to be concatenated. Returns: str: The result of concatenation. """ return osp.join(filepath, *filepaths) @contextmanager def get_local_path( self, filepath: Union[str, Path]) -> Iterable[Union[str, Path]]: """Only for unified API and do nothing.""" yield filepath def list_dir_or_file(self, dir_path: Union[str, Path], list_dir: bool = True, list_file: bool = True, suffix: Optional[Union[str, Tuple[str]]] = None, recursive: bool = False) -> Iterator[str]: """Scan a directory to find the interested directories or files in arbitrary order. Note: :meth:`list_dir_or_file` returns the path relative to ``dir_path``. Args: dir_path (str | Path): Path of the directory. list_dir (bool): List the directories. Default: True. list_file (bool): List the path of files. Default: True. suffix (str or tuple[str], optional): File suffix that we are interested in. Default: None. recursive (bool): If set to True, recursively scan the directory. Default: False. Yields: Iterable[str]: A relative path to ``dir_path``. """ if list_dir and suffix is not None: raise TypeError('`suffix` should be None when `list_dir` is True') if (suffix is not None) and not isinstance(suffix, (str, tuple)): raise TypeError('`suffix` must be a string or tuple of strings') root = dir_path def _list_dir_or_file(dir_path, list_dir, list_file, suffix, recursive): for entry in os.scandir(dir_path): if not entry.name.startswith('.') and entry.is_file(): rel_path = osp.relpath(entry.path, root) if (suffix is None or rel_path.endswith(suffix)) and list_file: yield rel_path elif osp.isdir(entry.path): if list_dir: rel_dir = osp.relpath(entry.path, root) yield rel_dir if recursive: yield from _list_dir_or_file(entry.path, list_dir, list_file, suffix, recursive) return _list_dir_or_file(dir_path, list_dir, list_file, suffix, recursive) class HTTPBackend(BaseStorageBackend): """HTTP and HTTPS storage bachend.""" def get(self, filepath): value_buf = urlopen(filepath).read() return value_buf def get_text(self, filepath, encoding='utf-8'): value_buf = urlopen(filepath).read() return value_buf.decode(encoding) @contextmanager def get_local_path(self, filepath: str) -> Iterable[str]: """Download a file from ``filepath``. ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It can be called with ``with`` statement, and when exists from the ``with`` statement, the temporary path will be released. Args: filepath (str): Download a file from ``filepath``. Examples: >>> client = HTTPBackend() >>> # After existing from the ``with`` clause, >>> # the path will be removed >>> with client.get_local_path('http://path/of/your/file') as path: ... # do something here """ try: f = tempfile.NamedTemporaryFile(delete=False) f.write(self.get(filepath)) f.close() yield f.name finally: os.remove(f.name) class FileClient: """A general file client to access files in different backends. The client loads a file or text in a specified backend from its path and returns it as a binary or text file. There are two ways to choose a backend, the name of backend and the prefix of path. Although both of them can be used to choose a storage backend, ``backend`` has a higher priority that is if they are all set, the storage backend will be chosen by the backend argument. If they are all `None`, the disk backend will be chosen. Note that It can also register other backend accessor with a given name, prefixes, and backend class. In addition, We use the singleton pattern to avoid repeated object creation. If the arguments are the same, the same object will be returned. Args: backend (str, optional): The storage backend type. Options are "disk", "ceph", "memcached", "lmdb", "http" and "petrel". Default: None. prefix (str, optional): The prefix of the registered storage backend. Options are "s3", "http", "https". Default: None. Examples: >>> # only set backend >>> file_client = FileClient(backend='petrel') >>> # only set prefix >>> file_client = FileClient(prefix='s3') >>> # set both backend and prefix but use backend to choose client >>> file_client = FileClient(backend='petrel', prefix='s3') >>> # if the arguments are the same, the same object is returned >>> file_client1 = FileClient(backend='petrel') >>> file_client1 is file_client True Attributes: client (:obj:`BaseStorageBackend`): The backend object. """ _backends = { 'disk': HardDiskBackend, 'ceph': CephBackend, 'memcached': MemcachedBackend, 'lmdb': LmdbBackend, 'petrel': PetrelBackend, 'http': HTTPBackend, } # This collection is used to record the overridden backends, and when a # backend appears in the collection, the singleton pattern is disabled for # that backend, because if the singleton pattern is used, then the object # returned will be the backend before overwriting _overridden_backends = set() _prefix_to_backends = { 's3': PetrelBackend, 'http': HTTPBackend, 'https': HTTPBackend, } _overridden_prefixes = set() _instances = {} def __new__(cls, backend=None, prefix=None, **kwargs): if backend is None and prefix is None: backend = 'disk' if backend is not None and backend not in cls._backends: raise ValueError( f'Backend {backend} is not supported. Currently supported ones' f' are {list(cls._backends.keys())}') if prefix is not None and prefix not in cls._prefix_to_backends: raise ValueError( f'prefix {prefix} is not supported. Currently supported ones ' f'are {list(cls._prefix_to_backends.keys())}') # concatenate the arguments to a unique key for determining whether # objects with the same arguments were created arg_key = f'{backend}:{prefix}' for key, value in kwargs.items(): arg_key += f':{key}:{value}' # if a backend was overridden, it will create a new object if (arg_key in cls._instances and backend not in cls._overridden_backends and prefix not in cls._overridden_prefixes): _instance = cls._instances[arg_key] else: # create a new object and put it to _instance _instance = super().__new__(cls) if backend is not None: _instance.client = cls._backends[backend](**kwargs) else: _instance.client = cls._prefix_to_backends[prefix](**kwargs) cls._instances[arg_key] = _instance return _instance @property def name(self): return self.client.name @property def allow_symlink(self): return self.client.allow_symlink @staticmethod def parse_uri_prefix(uri: Union[str, Path]) -> Optional[str]: """Parse the prefix of a uri. Args: uri (str | Path): Uri to be parsed that contains the file prefix. Examples: >>> FileClient.parse_uri_prefix('s3://path/of/your/file') 's3' Returns: str | None: Return the prefix of uri if the uri contains '://' else ``None``. """ assert is_filepath(uri) uri = str(uri) if '://' not in uri: return None else: prefix, _ = uri.split('://') # In the case of PetrelBackend, the prefix may contains the cluster # name like clusterName:s3 if ':' in prefix: _, prefix = prefix.split(':') return prefix @classmethod def infer_client(cls, file_client_args: Optional[dict] = None, uri: Optional[Union[str, Path]] = None) -> 'FileClient': """Infer a suitable file client based on the URI and arguments. Args: file_client_args (dict, optional): Arguments to instantiate a FileClient. Default: None. uri (str | Path, optional): Uri to be parsed that contains the file prefix. Default: None. Examples: >>> uri = 's3://path/of/your/file' >>> file_client = FileClient.infer_client(uri=uri) >>> file_client_args = {'backend': 'petrel'} >>> file_client = FileClient.infer_client(file_client_args) Returns: FileClient: Instantiated FileClient object. """ assert file_client_args is not None or uri is not None if file_client_args is None: file_prefix = cls.parse_uri_prefix(uri) # type: ignore return cls(prefix=file_prefix) else: return cls(**file_client_args) @classmethod def _register_backend(cls, name, backend, force=False, prefixes=None): if not isinstance(name, str): raise TypeError('the backend name should be a string, ' f'but got {type(name)}') if not inspect.isclass(backend): raise TypeError( f'backend should be a class but got {type(backend)}') if not issubclass(backend, BaseStorageBackend): raise TypeError( f'backend {backend} is not a subclass of BaseStorageBackend') if not force and name in cls._backends: raise KeyError( f'{name} is already registered as a storage backend, ' 'add "force=True" if you want to override it') if name in cls._backends and force: cls._overridden_backends.add(name) cls._backends[name] = backend if prefixes is not None: if isinstance(prefixes, str): prefixes = [prefixes] else: assert isinstance(prefixes, (list, tuple)) for prefix in prefixes: if prefix not in cls._prefix_to_backends: cls._prefix_to_backends[prefix] = backend elif (prefix in cls._prefix_to_backends) and force: cls._overridden_prefixes.add(prefix) cls._prefix_to_backends[prefix] = backend else: raise KeyError( f'{prefix} is already registered as a storage backend,' ' add "force=True" if you want to override it') @classmethod def register_backend(cls, name, backend=None, force=False, prefixes=None): """Register a backend to FileClient. This method can be used as a normal class method or a decorator. .. code-block:: python class NewBackend(BaseStorageBackend): def get(self, filepath): return filepath def get_text(self, filepath): return filepath FileClient.register_backend('new', NewBackend) or .. code-block:: python @FileClient.register_backend('new') class NewBackend(BaseStorageBackend): def get(self, filepath): return filepath def get_text(self, filepath): return filepath Args: name (str): The name of the registered backend. backend (class, optional): The backend class to be registered, which must be a subclass of :class:`BaseStorageBackend`. When this method is used as a decorator, backend is None. Defaults to None. force (bool, optional): Whether to override the backend if the name has already been registered. Defaults to False. prefixes (str or list[str] or tuple[str], optional): The prefixes of the registered storage backend. Default: None. `New in version 1.3.15.` """ if backend is not None: cls._register_backend( name, backend, force=force, prefixes=prefixes) return def _register(backend_cls): cls._register_backend( name, backend_cls, force=force, prefixes=prefixes) return backend_cls return _register def get(self, filepath: Union[str, Path]) -> Union[bytes, memoryview]: """Read data from a given ``filepath`` with 'rb' mode. Note: There are two types of return values for ``get``, one is ``bytes`` and the other is ``memoryview``. The advantage of using memoryview is that you can avoid copying, and if you want to convert it to ``bytes``, you can use ``.tobytes()``. Args: filepath (str or Path): Path to read data. Returns: bytes | memoryview: Expected bytes object or a memory view of the bytes object. """ return self.client.get(filepath) def get_text(self, filepath: Union[str, Path], encoding='utf-8') -> str: """Read data from a given ``filepath`` with 'r' mode. Args: filepath (str or Path): Path to read data. encoding (str): The encoding format used to open the ``filepath``. Default: 'utf-8'. Returns: str: Expected text reading from ``filepath``. """ return self.client.get_text(filepath, encoding) def put(self, obj: bytes, filepath: Union[str, Path]) -> None: """Write data to a given ``filepath`` with 'wb' mode. Note: ``put`` should create a directory if the directory of ``filepath`` does not exist. Args: obj (bytes): Data to be written. filepath (str or Path): Path to write data. """ self.client.put(obj, filepath) def put_text(self, obj: str, filepath: Union[str, Path]) -> None: """Write data to a given ``filepath`` with 'w' mode. Note: ``put_text`` should create a directory if the directory of ``filepath`` does not exist. Args: obj (str): Data to be written. filepath (str or Path): Path to write data. encoding (str, optional): The encoding format used to open the `filepath`. Default: 'utf-8'. """ self.client.put_text(obj, filepath) def remove(self, filepath: Union[str, Path]) -> None: """Remove a file. Args: filepath (str, Path): Path to be removed. """ self.client.remove(filepath) def exists(self, filepath: Union[str, Path]) -> bool: """Check whether a file path exists. Args: filepath (str or Path): Path to be checked whether exists. Returns: bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. """ return self.client.exists(filepath) def isdir(self, filepath: Union[str, Path]) -> bool: """Check whether a file path is a directory. Args: filepath (str or Path): Path to be checked whether it is a directory. Returns: bool: Return ``True`` if ``filepath`` points to a directory, ``False`` otherwise. """ return self.client.isdir(filepath) def isfile(self, filepath: Union[str, Path]) -> bool: """Check whether a file path is a file. Args: filepath (str or Path): Path to be checked whether it is a file. Returns: bool: Return ``True`` if ``filepath`` points to a file, ``False`` otherwise. """ return self.client.isfile(filepath) def join_path(self, filepath: Union[str, Path], *filepaths: Union[str, Path]) -> str: """Concatenate all file paths. Join one or more filepath components intelligently. The return value is the concatenation of filepath and any members of *filepaths. Args: filepath (str or Path): Path to be concatenated. Returns: str: The result of concatenation. """ return self.client.join_path(filepath, *filepaths) @contextmanager def get_local_path(self, filepath: Union[str, Path]) -> Iterable[str]: """Download data from ``filepath`` and write the data to local path. ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It can be called with ``with`` statement, and when exists from the ``with`` statement, the temporary path will be released. Note: If the ``filepath`` is a local path, just return itself. .. warning:: ``get_local_path`` is an experimental interface that may change in the future. Args: filepath (str or Path): Path to be read data. Examples: >>> file_client = FileClient(prefix='s3') >>> with file_client.get_local_path('s3://bucket/abc.jpg') as path: ... # do something here Yields: Iterable[str]: Only yield one path. """ with self.client.get_local_path(str(filepath)) as local_path: yield local_path def list_dir_or_file(self, dir_path: Union[str, Path], list_dir: bool = True, list_file: bool = True, suffix: Optional[Union[str, Tuple[str]]] = None, recursive: bool = False) -> Iterator[str]: """Scan a directory to find the interested directories or files in arbitrary order. Note: :meth:`list_dir_or_file` returns the path relative to ``dir_path``. Args: dir_path (str | Path): Path of the directory. list_dir (bool): List the directories. Default: True. list_file (bool): List the path of files. Default: True. suffix (str or tuple[str], optional): File suffix that we are interested in. Default: None. recursive (bool): If set to True, recursively scan the directory. Default: False. Yields: Iterable[str]: A relative path to ``dir_path``. """ yield from self.client.list_dir_or_file(dir_path, list_dir, list_file, suffix, recursive) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/fileio/handlers/__init__.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from .base import BaseFileHandler from .json_handler import JsonHandler from .pickle_handler import PickleHandler from .yaml_handler import YamlHandler __all__ = ['BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler'] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/fileio/handlers/base.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from abc import ABCMeta, abstractmethod class BaseFileHandler(metaclass=ABCMeta): # `str_like` is a flag to indicate whether the type of file object is # str-like object or bytes-like object. Pickle only processes bytes-like # objects but json only processes str-like object. If it is str-like # object, `StringIO` will be used to process the buffer. str_like = True @abstractmethod def load_from_fileobj(self, file, **kwargs): pass @abstractmethod def dump_to_fileobj(self, obj, file, **kwargs): pass @abstractmethod def dump_to_str(self, obj, **kwargs): pass def load_from_path(self, filepath, mode='r', **kwargs): with open(filepath, mode) as f: return self.load_from_fileobj(f, **kwargs) def dump_to_path(self, obj, filepath, mode='w', **kwargs): with open(filepath, mode) as f: self.dump_to_fileobj(obj, f, **kwargs) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/fileio/handlers/json_handler.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import json import numpy as np from .base import BaseFileHandler def set_default(obj): """Set default json values for non-serializable values. It helps convert ``set``, ``range`` and ``np.ndarray`` data types to list. It also converts ``np.generic`` (including ``np.int32``, ``np.float32``, etc.) into plain numbers of plain python built-in types. """ if isinstance(obj, (set, range)): return list(obj) elif isinstance(obj, np.ndarray): return obj.tolist() elif isinstance(obj, np.generic): return obj.item() raise TypeError(f'{type(obj)} is unsupported for json dump') class JsonHandler(BaseFileHandler): def load_from_fileobj(self, file): return json.load(file) def dump_to_fileobj(self, obj, file, **kwargs): kwargs.setdefault('default', set_default) json.dump(obj, file, **kwargs) def dump_to_str(self, obj, **kwargs): kwargs.setdefault('default', set_default) return json.dumps(obj, **kwargs) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/fileio/handlers/pickle_handler.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import pickle from .base import BaseFileHandler class PickleHandler(BaseFileHandler): str_like = False def load_from_fileobj(self, file, **kwargs): return pickle.load(file, **kwargs) def load_from_path(self, filepath, **kwargs): return super(PickleHandler, self).load_from_path( filepath, mode='rb', **kwargs) def dump_to_str(self, obj, **kwargs): kwargs.setdefault('protocol', 2) return pickle.dumps(obj, **kwargs) def dump_to_fileobj(self, obj, file, **kwargs): kwargs.setdefault('protocol', 2) pickle.dump(obj, file, **kwargs) def dump_to_path(self, obj, filepath, **kwargs): super(PickleHandler, self).dump_to_path( obj, filepath, mode='wb', **kwargs) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/fileio/handlers/yaml_handler.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import yaml try: from yaml import CLoader as Loader, CDumper as Dumper except ImportError: from yaml import Loader, Dumper from .base import BaseFileHandler # isort:skip class YamlHandler(BaseFileHandler): def load_from_fileobj(self, file, **kwargs): kwargs.setdefault('Loader', Loader) return yaml.load(file, **kwargs) def dump_to_fileobj(self, obj, file, **kwargs): kwargs.setdefault('Dumper', Dumper) yaml.dump(obj, file, **kwargs) def dump_to_str(self, obj, **kwargs): kwargs.setdefault('Dumper', Dumper) return yaml.dump(obj, **kwargs) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/fileio/io.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from io import BytesIO, StringIO from pathlib import Path from ..utils import is_list_of, is_str from .file_client import FileClient from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler file_handlers = { 'json': JsonHandler(), 'yaml': YamlHandler(), 'yml': YamlHandler(), 'pickle': PickleHandler(), 'pkl': PickleHandler() } def load(file, file_format=None, file_client_args=None, **kwargs): """Load data from json/yaml/pickle files. This method provides a unified api for loading data from serialized files. Note: In v1.3.16 and later, ``load`` supports loading data from serialized files those can be storaged in different backends. Args: file (str or :obj:`Path` or file-like object): Filename or a file-like object. file_format (str, optional): If not specified, the file format will be inferred from the file extension, otherwise use the specified one. Currently supported formats include "json", "yaml/yml" and "pickle/pkl". file_client_args (dict, optional): Arguments to instantiate a FileClient. See :class:`mmcv.fileio.FileClient` for details. Default: None. Examples: >>> load('/path/of/your/file') # file is storaged in disk >>> load('https://path/of/your/file') # file is storaged in Internet >>> load('s3://path/of/your/file') # file is storaged in petrel Returns: The content from the file. """ if isinstance(file, Path): file = str(file) if file_format is None and is_str(file): file_format = file.split('.')[-1] if file_format not in file_handlers: raise TypeError(f'Unsupported format: {file_format}') handler = file_handlers[file_format] if is_str(file): file_client = FileClient.infer_client(file_client_args, file) if handler.str_like: with StringIO(file_client.get_text(file)) as f: obj = handler.load_from_fileobj(f, **kwargs) else: with BytesIO(file_client.get(file)) as f: obj = handler.load_from_fileobj(f, **kwargs) elif hasattr(file, 'read'): obj = handler.load_from_fileobj(file, **kwargs) else: raise TypeError('"file" must be a filepath str or a file-object') return obj def dump(obj, file=None, file_format=None, file_client_args=None, **kwargs): """Dump data to json/yaml/pickle strings or files. This method provides a unified api for dumping data as strings or to files, and also supports custom arguments for each file format. Note: In v1.3.16 and later, ``dump`` supports dumping data as strings or to files which is saved to different backends. Args: obj (any): The python object to be dumped. file (str or :obj:`Path` or file-like object, optional): If not specified, then the object is dumped to a str, otherwise to a file specified by the filename or file-like object. file_format (str, optional): Same as :func:`load`. file_client_args (dict, optional): Arguments to instantiate a FileClient. See :class:`mmcv.fileio.FileClient` for details. Default: None. Examples: >>> dump('hello world', '/path/of/your/file') # disk >>> dump('hello world', 's3://path/of/your/file') # ceph or petrel Returns: bool: True for success, False otherwise. """ if isinstance(file, Path): file = str(file) if file_format is None: if is_str(file): file_format = file.split('.')[-1] elif file is None: raise ValueError( 'file_format must be specified since file is None') if file_format not in file_handlers: raise TypeError(f'Unsupported format: {file_format}') handler = file_handlers[file_format] if file is None: return handler.dump_to_str(obj, **kwargs) elif is_str(file): file_client = FileClient.infer_client(file_client_args, file) if handler.str_like: with StringIO() as f: handler.dump_to_fileobj(obj, f, **kwargs) file_client.put_text(f.getvalue(), file) else: with BytesIO() as f: handler.dump_to_fileobj(obj, f, **kwargs) file_client.put(f.getvalue(), file) elif hasattr(file, 'write'): handler.dump_to_fileobj(obj, file, **kwargs) else: raise TypeError('"file" must be a filename str or a file-object') def _register_handler(handler, file_formats): """Register a handler for some file extensions. Args: handler (:obj:`BaseFileHandler`): Handler to be registered. file_formats (str or list[str]): File formats to be handled by this handler. """ if not isinstance(handler, BaseFileHandler): raise TypeError( f'handler must be a child of BaseFileHandler, not {type(handler)}') if isinstance(file_formats, str): file_formats = [file_formats] if not is_list_of(file_formats, str): raise TypeError('file_formats must be a str or a list of str') for ext in file_formats: file_handlers[ext] = handler def register_handler(file_formats, **kwargs): def wrap(cls): _register_handler(cls(**kwargs), file_formats) return cls return wrap ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/fileio/parse.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from io import StringIO from .file_client import FileClient def list_from_file(filename, prefix='', offset=0, max_num=0, encoding='utf-8', file_client_args=None): """Load a text file and parse the content as a list of strings. Note: In v1.3.16 and later, ``list_from_file`` supports loading a text file which can be storaged in different backends and parsing the content as a list for strings. Args: filename (str): Filename. prefix (str): The prefix to be inserted to the beginning of each item. offset (int): The offset of lines. max_num (int): The maximum number of lines to be read, zeros and negatives mean no limitation. encoding (str): Encoding used to open the file. Default utf-8. file_client_args (dict, optional): Arguments to instantiate a FileClient. See :class:`mmcv.fileio.FileClient` for details. Default: None. Examples: >>> list_from_file('/path/of/your/file') # disk ['hello', 'world'] >>> list_from_file('s3://path/of/your/file') # ceph or petrel ['hello', 'world'] Returns: list[str]: A list of strings. """ cnt = 0 item_list = [] file_client = FileClient.infer_client(file_client_args, filename) with StringIO(file_client.get_text(filename, encoding)) as f: for _ in range(offset): f.readline() for line in f: if 0 < max_num <= cnt: break item_list.append(prefix + line.rstrip('\n\r')) cnt += 1 return item_list def dict_from_file(filename, key_type=str, encoding='utf-8', file_client_args=None): """Load a text file and parse the content as a dict. Each line of the text file will be two or more columns split by whitespaces or tabs. The first column will be parsed as dict keys, and the following columns will be parsed as dict values. Note: In v1.3.16 and later, ``dict_from_file`` supports loading a text file which can be storaged in different backends and parsing the content as a dict. Args: filename(str): Filename. key_type(type): Type of the dict keys. str is user by default and type conversion will be performed if specified. encoding (str): Encoding used to open the file. Default utf-8. file_client_args (dict, optional): Arguments to instantiate a FileClient. See :class:`mmcv.fileio.FileClient` for details. Default: None. Examples: >>> dict_from_file('/path/of/your/file') # disk {'key1': 'value1', 'key2': 'value2'} >>> dict_from_file('s3://path/of/your/file') # ceph or petrel {'key1': 'value1', 'key2': 'value2'} Returns: dict: The parsed contents. """ mapping = {} file_client = FileClient.infer_client(file_client_args, filename) with StringIO(file_client.get_text(filename, encoding)) as f: for line in f: items = line.rstrip('\n').split() assert len(items) >= 2 key = key_type(items[0]) val = items[1:] if len(items) > 2 else items[1] mapping[key] = val return mapping ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/image/__init__.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from .colorspace import (bgr2gray, bgr2hls, bgr2hsv, bgr2rgb, bgr2ycbcr, gray2bgr, gray2rgb, hls2bgr, hsv2bgr, imconvert, rgb2bgr, rgb2gray, rgb2ycbcr, ycbcr2bgr, ycbcr2rgb) from .geometric import (cutout, imcrop, imflip, imflip_, impad, impad_to_multiple, imrescale, imresize, imresize_like, imresize_to_multiple, imrotate, imshear, imtranslate, rescale_size) from .io import imfrombytes, imread, imwrite, supported_backends, use_backend from .misc import tensor2imgs from .photometric import (adjust_brightness, adjust_color, adjust_contrast, adjust_lighting, adjust_sharpness, auto_contrast, clahe, imdenormalize, imequalize, iminvert, imnormalize, imnormalize_, lut_transform, posterize, solarize) __all__ = [ 'bgr2gray', 'bgr2hls', 'bgr2hsv', 'bgr2rgb', 'gray2bgr', 'gray2rgb', 'hls2bgr', 'hsv2bgr', 'imconvert', 'rgb2bgr', 'rgb2gray', 'imrescale', 'imresize', 'imresize_like', 'imresize_to_multiple', 'rescale_size', 'imcrop', 'imflip', 'imflip_', 'impad', 'impad_to_multiple', 'imrotate', 'imfrombytes', 'imread', 'imwrite', 'supported_backends', 'use_backend', 'imdenormalize', 'imnormalize', 'imnormalize_', 'iminvert', 'posterize', 'solarize', 'rgb2ycbcr', 'bgr2ycbcr', 'ycbcr2rgb', 'ycbcr2bgr', 'tensor2imgs', 'imshear', 'imtranslate', 'adjust_color', 'imequalize', 'adjust_brightness', 'adjust_contrast', 'lut_transform', 'clahe', 'adjust_sharpness', 'auto_contrast', 'cutout', 'adjust_lighting' ] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/image/colorspace.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import cv2 import numpy as np def imconvert(img, src, dst): """Convert an image from the src colorspace to dst colorspace. Args: img (ndarray): The input image. src (str): The source colorspace, e.g., 'rgb', 'hsv'. dst (str): The destination colorspace, e.g., 'rgb', 'hsv'. Returns: ndarray: The converted image. """ code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}') out_img = cv2.cvtColor(img, code) return out_img def bgr2gray(img, keepdim=False): """Convert a BGR image to grayscale image. Args: img (ndarray): The input image. keepdim (bool): If False (by default), then return the grayscale image with 2 dims, otherwise 3 dims. Returns: ndarray: The converted grayscale image. """ out_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if keepdim: out_img = out_img[..., None] return out_img def rgb2gray(img, keepdim=False): """Convert a RGB image to grayscale image. Args: img (ndarray): The input image. keepdim (bool): If False (by default), then return the grayscale image with 2 dims, otherwise 3 dims. Returns: ndarray: The converted grayscale image. """ out_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) if keepdim: out_img = out_img[..., None] return out_img def gray2bgr(img): """Convert a grayscale image to BGR image. Args: img (ndarray): The input image. Returns: ndarray: The converted BGR image. """ img = img[..., None] if img.ndim == 2 else img out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) return out_img def gray2rgb(img): """Convert a grayscale image to RGB image. Args: img (ndarray): The input image. Returns: ndarray: The converted RGB image. """ img = img[..., None] if img.ndim == 2 else img out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) return out_img def _convert_input_type_range(img): """Convert the type and range of the input image. It converts the input image to np.float32 type and range of [0, 1]. It is mainly used for pre-processing the input image in colorspace conversion functions such as rgb2ycbcr and ycbcr2rgb. Args: img (ndarray): The input image. It accepts: 1. np.uint8 type with range [0, 255]; 2. np.float32 type with range [0, 1]. Returns: (ndarray): The converted image with type of np.float32 and range of [0, 1]. """ img_type = img.dtype img = img.astype(np.float32) if img_type == np.float32: pass elif img_type == np.uint8: img /= 255. else: raise TypeError('The img type should be np.float32 or np.uint8, ' f'but got {img_type}') return img def _convert_output_type_range(img, dst_type): """Convert the type and range of the image according to dst_type. It converts the image to desired type and range. If `dst_type` is np.uint8, images will be converted to np.uint8 type with range [0, 255]. If `dst_type` is np.float32, it converts the image to np.float32 type with range [0, 1]. It is mainly used for post-processing images in colorspace conversion functions such as rgb2ycbcr and ycbcr2rgb. Args: img (ndarray): The image to be converted with np.float32 type and range [0, 255]. dst_type (np.uint8 | np.float32): If dst_type is np.uint8, it converts the image to np.uint8 type with range [0, 255]. If dst_type is np.float32, it converts the image to np.float32 type with range [0, 1]. Returns: (ndarray): The converted image with desired type and range. """ if dst_type not in (np.uint8, np.float32): raise TypeError('The dst_type should be np.float32 or np.uint8, ' f'but got {dst_type}') if dst_type == np.uint8: img = img.round() else: img /= 255. return img.astype(dst_type) def rgb2ycbcr(img, y_only=False): """Convert a RGB image to YCbCr image. This function produces the same results as Matlab's `rgb2ycbcr` function. It implements the ITU-R BT.601 conversion for standard-definition television. See more details in https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. It differs from a similar function in cv2.cvtColor: `RGB <-> YCrCb`. In OpenCV, it implements a JPEG conversion. See more details in https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. Args: img (ndarray): The input image. It accepts: 1. np.uint8 type with range [0, 255]; 2. np.float32 type with range [0, 1]. y_only (bool): Whether to only return Y channel. Default: False. Returns: ndarray: The converted YCbCr image. The output image has the same type and range as input image. """ img_type = img.dtype img = _convert_input_type_range(img) if y_only: out_img = np.dot(img, [65.481, 128.553, 24.966]) + 16.0 else: out_img = np.matmul( img, [[65.481, -37.797, 112.0], [128.553, -74.203, -93.786], [24.966, 112.0, -18.214]]) + [16, 128, 128] out_img = _convert_output_type_range(out_img, img_type) return out_img def bgr2ycbcr(img, y_only=False): """Convert a BGR image to YCbCr image. The bgr version of rgb2ycbcr. It implements the ITU-R BT.601 conversion for standard-definition television. See more details in https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. It differs from a similar function in cv2.cvtColor: `BGR <-> YCrCb`. In OpenCV, it implements a JPEG conversion. See more details in https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. Args: img (ndarray): The input image. It accepts: 1. np.uint8 type with range [0, 255]; 2. np.float32 type with range [0, 1]. y_only (bool): Whether to only return Y channel. Default: False. Returns: ndarray: The converted YCbCr image. The output image has the same type and range as input image. """ img_type = img.dtype img = _convert_input_type_range(img) if y_only: out_img = np.dot(img, [24.966, 128.553, 65.481]) + 16.0 else: out_img = np.matmul( img, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786], [65.481, -37.797, 112.0]]) + [16, 128, 128] out_img = _convert_output_type_range(out_img, img_type) return out_img def ycbcr2rgb(img): """Convert a YCbCr image to RGB image. This function produces the same results as Matlab's ycbcr2rgb function. It implements the ITU-R BT.601 conversion for standard-definition television. See more details in https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. It differs from a similar function in cv2.cvtColor: `YCrCb <-> RGB`. In OpenCV, it implements a JPEG conversion. See more details in https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. Args: img (ndarray): The input image. It accepts: 1. np.uint8 type with range [0, 255]; 2. np.float32 type with range [0, 1]. Returns: ndarray: The converted RGB image. The output image has the same type and range as input image. """ img_type = img.dtype img = _convert_input_type_range(img) * 255 out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621], [0, -0.00153632, 0.00791071], [0.00625893, -0.00318811, 0]]) * 255.0 + [ -222.921, 135.576, -276.836 ] out_img = _convert_output_type_range(out_img, img_type) return out_img def ycbcr2bgr(img): """Convert a YCbCr image to BGR image. The bgr version of ycbcr2rgb. It implements the ITU-R BT.601 conversion for standard-definition television. See more details in https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. It differs from a similar function in cv2.cvtColor: `YCrCb <-> BGR`. In OpenCV, it implements a JPEG conversion. See more details in https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. Args: img (ndarray): The input image. It accepts: 1. np.uint8 type with range [0, 255]; 2. np.float32 type with range [0, 1]. Returns: ndarray: The converted BGR image. The output image has the same type and range as input image. """ img_type = img.dtype img = _convert_input_type_range(img) * 255 out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621], [0.00791071, -0.00153632, 0], [0, -0.00318811, 0.00625893]]) * 255.0 + [ -276.836, 135.576, -222.921 ] out_img = _convert_output_type_range(out_img, img_type) return out_img def convert_color_factory(src, dst): code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}') def convert_color(img): out_img = cv2.cvtColor(img, code) return out_img convert_color.__doc__ = f"""Convert a {src.upper()} image to {dst.upper()} image. Args: img (ndarray or str): The input image. Returns: ndarray: The converted {dst.upper()} image. """ return convert_color bgr2rgb = convert_color_factory('bgr', 'rgb') rgb2bgr = convert_color_factory('rgb', 'bgr') bgr2hsv = convert_color_factory('bgr', 'hsv') hsv2bgr = convert_color_factory('hsv', 'bgr') bgr2hls = convert_color_factory('bgr', 'hls') hls2bgr = convert_color_factory('hls', 'bgr') ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/image/geometric.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import numbers import cv2 import numpy as np from ..utils import to_2tuple from .io import imread_backend try: from PIL import Image except ImportError: Image = None def _scale_size(size, scale): """Rescale a size by a ratio. Args: size (tuple[int]): (w, h). scale (float | tuple(float)): Scaling factor. Returns: tuple[int]: scaled size. """ if isinstance(scale, (float, int)): scale = (scale, scale) w, h = size return int(w * float(scale[0]) + 0.5), int(h * float(scale[1]) + 0.5) cv2_interp_codes = { 'nearest': cv2.INTER_NEAREST, 'bilinear': cv2.INTER_LINEAR, 'bicubic': cv2.INTER_CUBIC, 'area': cv2.INTER_AREA, 'lanczos': cv2.INTER_LANCZOS4 } if Image is not None: pillow_interp_codes = { 'nearest': Image.NEAREST, 'bilinear': Image.BILINEAR, 'bicubic': Image.BICUBIC, 'box': Image.BOX, 'lanczos': Image.LANCZOS, 'hamming': Image.HAMMING } def imresize(img, size, return_scale=False, interpolation='bilinear', out=None, backend=None): """Resize image to a given size. Args: img (ndarray): The input image. size (tuple[int]): Target size (w, h). return_scale (bool): Whether to return `w_scale` and `h_scale`. interpolation (str): Interpolation method, accepted values are "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' backend, "nearest", "bilinear" for 'pillow' backend. out (ndarray): The output destination. backend (str | None): The image resize backend type. Options are `cv2`, `pillow`, `None`. If backend is None, the global imread_backend specified by ``mmcv.use_backend()`` will be used. Default: None. Returns: tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or `resized_img`. """ h, w = img.shape[:2] if backend is None: backend = imread_backend if backend not in ['cv2', 'pillow']: raise ValueError(f'backend: {backend} is not supported for resize.' f"Supported backends are 'cv2', 'pillow'") if backend == 'pillow': assert img.dtype == np.uint8, 'Pillow backend only support uint8 type' pil_image = Image.fromarray(img) pil_image = pil_image.resize(size, pillow_interp_codes[interpolation]) resized_img = np.array(pil_image) else: resized_img = cv2.resize( img, size, dst=out, interpolation=cv2_interp_codes[interpolation]) if not return_scale: return resized_img else: w_scale = size[0] / w h_scale = size[1] / h return resized_img, w_scale, h_scale def imresize_to_multiple(img, divisor, size=None, scale_factor=None, keep_ratio=False, return_scale=False, interpolation='bilinear', out=None, backend=None): """Resize image according to a given size or scale factor and then rounds up the the resized or rescaled image size to the nearest value that can be divided by the divisor. Args: img (ndarray): The input image. divisor (int | tuple): Resized image size will be a multiple of divisor. If divisor is a tuple, divisor should be (w_divisor, h_divisor). size (None | int | tuple[int]): Target size (w, h). Default: None. scale_factor (None | float | tuple[float]): Multiplier for spatial size. Should match input size if it is a tuple and the 2D style is (w_scale_factor, h_scale_factor). Default: None. keep_ratio (bool): Whether to keep the aspect ratio when resizing the image. Default: False. return_scale (bool): Whether to return `w_scale` and `h_scale`. interpolation (str): Interpolation method, accepted values are "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' backend, "nearest", "bilinear" for 'pillow' backend. out (ndarray): The output destination. backend (str | None): The image resize backend type. Options are `cv2`, `pillow`, `None`. If backend is None, the global imread_backend specified by ``mmcv.use_backend()`` will be used. Default: None. Returns: tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or `resized_img`. """ h, w = img.shape[:2] if size is not None and scale_factor is not None: raise ValueError('only one of size or scale_factor should be defined') elif size is None and scale_factor is None: raise ValueError('one of size or scale_factor should be defined') elif size is not None: size = to_2tuple(size) if keep_ratio: size = rescale_size((w, h), size, return_scale=False) else: size = _scale_size((w, h), scale_factor) divisor = to_2tuple(divisor) size = tuple([int(np.ceil(s / d)) * d for s, d in zip(size, divisor)]) resized_img, w_scale, h_scale = imresize( img, size, return_scale=True, interpolation=interpolation, out=out, backend=backend) if return_scale: return resized_img, w_scale, h_scale else: return resized_img def imresize_like(img, dst_img, return_scale=False, interpolation='bilinear', backend=None): """Resize image to the same size of a given image. Args: img (ndarray): The input image. dst_img (ndarray): The target image. return_scale (bool): Whether to return `w_scale` and `h_scale`. interpolation (str): Same as :func:`resize`. backend (str | None): Same as :func:`resize`. Returns: tuple or ndarray: (`resized_img`, `w_scale`, `h_scale`) or `resized_img`. """ h, w = dst_img.shape[:2] return imresize(img, (w, h), return_scale, interpolation, backend=backend) def rescale_size(old_size, scale, return_scale=False): """Calculate the new size to be rescaled to. Args: old_size (tuple[int]): The old size (w, h) of image. scale (float | tuple[int]): The scaling factor or maximum size. If it is a float number, then the image will be rescaled by this factor, else if it is a tuple of 2 integers, then the image will be rescaled as large as possible within the scale. return_scale (bool): Whether to return the scaling factor besides the rescaled image size. Returns: tuple[int]: The new rescaled image size. """ w, h = old_size if isinstance(scale, (float, int)): if scale <= 0: raise ValueError(f'Invalid scale {scale}, must be positive.') scale_factor = scale elif isinstance(scale, tuple): max_long_edge = max(scale) max_short_edge = min(scale) scale_factor = min(max_long_edge / max(h, w), max_short_edge / min(h, w)) else: raise TypeError( f'Scale must be a number or tuple of int, but got {type(scale)}') new_size = _scale_size((w, h), scale_factor) if return_scale: return new_size, scale_factor else: return new_size def imrescale(img, scale, return_scale=False, interpolation='bilinear', backend=None): """Resize image while keeping the aspect ratio. Args: img (ndarray): The input image. scale (float | tuple[int]): The scaling factor or maximum size. If it is a float number, then the image will be rescaled by this factor, else if it is a tuple of 2 integers, then the image will be rescaled as large as possible within the scale. return_scale (bool): Whether to return the scaling factor besides the rescaled image. interpolation (str): Same as :func:`resize`. backend (str | None): Same as :func:`resize`. Returns: ndarray: The rescaled image. """ h, w = img.shape[:2] new_size, scale_factor = rescale_size((w, h), scale, return_scale=True) rescaled_img = imresize( img, new_size, interpolation=interpolation, backend=backend) if return_scale: return rescaled_img, scale_factor else: return rescaled_img def imflip(img, direction='horizontal'): """Flip an image horizontally or vertically. Args: img (ndarray): Image to be flipped. direction (str): The flip direction, either "horizontal" or "vertical" or "diagonal". Returns: ndarray: The flipped image. """ assert direction in ['horizontal', 'vertical', 'diagonal'] if direction == 'horizontal': return np.flip(img, axis=1) elif direction == 'vertical': return np.flip(img, axis=0) else: return np.flip(img, axis=(0, 1)) def imflip_(img, direction='horizontal'): """Inplace flip an image horizontally or vertically. Args: img (ndarray): Image to be flipped. direction (str): The flip direction, either "horizontal" or "vertical" or "diagonal". Returns: ndarray: The flipped image (inplace). """ assert direction in ['horizontal', 'vertical', 'diagonal'] if direction == 'horizontal': return cv2.flip(img, 1, img) elif direction == 'vertical': return cv2.flip(img, 0, img) else: return cv2.flip(img, -1, img) def imrotate(img, angle, center=None, scale=1.0, border_value=0, interpolation='bilinear', auto_bound=False): """Rotate an image. Args: img (ndarray): Image to be rotated. angle (float): Rotation angle in degrees, positive values mean clockwise rotation. center (tuple[float], optional): Center point (w, h) of the rotation in the source image. If not specified, the center of the image will be used. scale (float): Isotropic scale factor. border_value (int): Border value. interpolation (str): Same as :func:`resize`. auto_bound (bool): Whether to adjust the image size to cover the whole rotated image. Returns: ndarray: The rotated image. """ if center is not None and auto_bound: raise ValueError('`auto_bound` conflicts with `center`') h, w = img.shape[:2] if center is None: center = ((w - 1) * 0.5, (h - 1) * 0.5) assert isinstance(center, tuple) matrix = cv2.getRotationMatrix2D(center, -angle, scale) if auto_bound: cos = np.abs(matrix[0, 0]) sin = np.abs(matrix[0, 1]) new_w = h * sin + w * cos new_h = h * cos + w * sin matrix[0, 2] += (new_w - w) * 0.5 matrix[1, 2] += (new_h - h) * 0.5 w = int(np.round(new_w)) h = int(np.round(new_h)) rotated = cv2.warpAffine( img, matrix, (w, h), flags=cv2_interp_codes[interpolation], borderValue=border_value) return rotated def bbox_clip(bboxes, img_shape): """Clip bboxes to fit the image shape. Args: bboxes (ndarray): Shape (..., 4*k) img_shape (tuple[int]): (height, width) of the image. Returns: ndarray: Clipped bboxes. """ assert bboxes.shape[-1] % 4 == 0 cmin = np.empty(bboxes.shape[-1], dtype=bboxes.dtype) cmin[0::2] = img_shape[1] - 1 cmin[1::2] = img_shape[0] - 1 clipped_bboxes = np.maximum(np.minimum(bboxes, cmin), 0) return clipped_bboxes def bbox_scaling(bboxes, scale, clip_shape=None): """Scaling bboxes w.r.t the box center. Args: bboxes (ndarray): Shape(..., 4). scale (float): Scaling factor. clip_shape (tuple[int], optional): If specified, bboxes that exceed the boundary will be clipped according to the given shape (h, w). Returns: ndarray: Scaled bboxes. """ if float(scale) == 1.0: scaled_bboxes = bboxes.copy() else: w = bboxes[..., 2] - bboxes[..., 0] + 1 h = bboxes[..., 3] - bboxes[..., 1] + 1 dw = (w * (scale - 1)) * 0.5 dh = (h * (scale - 1)) * 0.5 scaled_bboxes = bboxes + np.stack((-dw, -dh, dw, dh), axis=-1) if clip_shape is not None: return bbox_clip(scaled_bboxes, clip_shape) else: return scaled_bboxes def imcrop(img, bboxes, scale=1.0, pad_fill=None): """Crop image patches. 3 steps: scale the bboxes -> clip bboxes -> crop and pad. Args: img (ndarray): Image to be cropped. bboxes (ndarray): Shape (k, 4) or (4, ), location of cropped bboxes. scale (float, optional): Scale ratio of bboxes, the default value 1.0 means no padding. pad_fill (Number | list[Number]): Value to be filled for padding. Default: None, which means no padding. Returns: list[ndarray] | ndarray: The cropped image patches. """ chn = 1 if img.ndim == 2 else img.shape[2] if pad_fill is not None: if isinstance(pad_fill, (int, float)): pad_fill = [pad_fill for _ in range(chn)] assert len(pad_fill) == chn _bboxes = bboxes[None, ...] if bboxes.ndim == 1 else bboxes scaled_bboxes = bbox_scaling(_bboxes, scale).astype(np.int32) clipped_bbox = bbox_clip(scaled_bboxes, img.shape) patches = [] for i in range(clipped_bbox.shape[0]): x1, y1, x2, y2 = tuple(clipped_bbox[i, :]) if pad_fill is None: patch = img[y1:y2 + 1, x1:x2 + 1, ...] else: _x1, _y1, _x2, _y2 = tuple(scaled_bboxes[i, :]) if chn == 1: patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1) else: patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1, chn) patch = np.array( pad_fill, dtype=img.dtype) * np.ones( patch_shape, dtype=img.dtype) x_start = 0 if _x1 >= 0 else -_x1 y_start = 0 if _y1 >= 0 else -_y1 w = x2 - x1 + 1 h = y2 - y1 + 1 patch[y_start:y_start + h, x_start:x_start + w, ...] = img[y1:y1 + h, x1:x1 + w, ...] patches.append(patch) if bboxes.ndim == 1: return patches[0] else: return patches def impad(img, *, shape=None, padding=None, pad_val=0, padding_mode='constant'): """Pad the given image to a certain shape or pad on all sides with specified padding mode and padding value. Args: img (ndarray): Image to be padded. shape (tuple[int]): Expected padding shape (h, w). Default: None. padding (int or tuple[int]): Padding on each border. If a single int is provided this is used to pad all borders. If tuple of length 2 is provided this is the padding on left/right and top/bottom respectively. If a tuple of length 4 is provided this is the padding for the left, top, right and bottom borders respectively. Default: None. Note that `shape` and `padding` can not be both set. pad_val (Number | Sequence[Number]): Values to be filled in padding areas when padding_mode is 'constant'. Default: 0. padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric. Default: constant. - constant: pads with a constant value, this value is specified with pad_val. - edge: pads with the last value at the edge of the image. - reflect: pads with reflection of image without repeating the last value on the edge. For example, padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode will result in [3, 2, 1, 2, 3, 4, 3, 2]. - symmetric: pads with reflection of image repeating the last value on the edge. For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode will result in [2, 1, 1, 2, 3, 4, 4, 3] Returns: ndarray: The padded image. """ assert (shape is not None) ^ (padding is not None) if shape is not None: padding = (0, 0, shape[1] - img.shape[1], shape[0] - img.shape[0]) # check pad_val if isinstance(pad_val, tuple): assert len(pad_val) == img.shape[-1] elif not isinstance(pad_val, numbers.Number): raise TypeError('pad_val must be a int or a tuple. ' f'But received {type(pad_val)}') # check padding if isinstance(padding, tuple) and len(padding) in [2, 4]: if len(padding) == 2: padding = (padding[0], padding[1], padding[0], padding[1]) elif isinstance(padding, numbers.Number): padding = (padding, padding, padding, padding) else: raise ValueError('Padding must be a int or a 2, or 4 element tuple.' f'But received {padding}') # check padding mode assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric'] border_type = { 'constant': cv2.BORDER_CONSTANT, 'edge': cv2.BORDER_REPLICATE, 'reflect': cv2.BORDER_REFLECT_101, 'symmetric': cv2.BORDER_REFLECT } img = cv2.copyMakeBorder( img, padding[1], padding[3], padding[0], padding[2], border_type[padding_mode], value=pad_val) return img def impad_to_multiple(img, divisor, pad_val=0): """Pad an image to ensure each edge to be multiple to some number. Args: img (ndarray): Image to be padded. divisor (int): Padded image edges will be multiple to divisor. pad_val (Number | Sequence[Number]): Same as :func:`impad`. Returns: ndarray: The padded image. """ pad_h = int(np.ceil(img.shape[0] / divisor)) * divisor pad_w = int(np.ceil(img.shape[1] / divisor)) * divisor return impad(img, shape=(pad_h, pad_w), pad_val=pad_val) def cutout(img, shape, pad_val=0): """Randomly cut out a rectangle from the original img. Args: img (ndarray): Image to be cutout. shape (int | tuple[int]): Expected cutout shape (h, w). If given as a int, the value will be used for both h and w. pad_val (int | float | tuple[int | float]): Values to be filled in the cut area. Defaults to 0. Returns: ndarray: The cutout image. """ channels = 1 if img.ndim == 2 else img.shape[2] if isinstance(shape, int): cut_h, cut_w = shape, shape else: assert isinstance(shape, tuple) and len(shape) == 2, \ f'shape must be a int or a tuple with length 2, but got type ' \ f'{type(shape)} instead.' cut_h, cut_w = shape if isinstance(pad_val, (int, float)): pad_val = tuple([pad_val] * channels) elif isinstance(pad_val, tuple): assert len(pad_val) == channels, \ 'Expected the num of elements in tuple equals the channels' \ 'of input image. Found {} vs {}'.format( len(pad_val), channels) else: raise TypeError(f'Invalid type {type(pad_val)} for `pad_val`') img_h, img_w = img.shape[:2] y0 = np.random.uniform(img_h) x0 = np.random.uniform(img_w) y1 = int(max(0, y0 - cut_h / 2.)) x1 = int(max(0, x0 - cut_w / 2.)) y2 = min(img_h, y1 + cut_h) x2 = min(img_w, x1 + cut_w) if img.ndim == 2: patch_shape = (y2 - y1, x2 - x1) else: patch_shape = (y2 - y1, x2 - x1, channels) img_cutout = img.copy() patch = np.array( pad_val, dtype=img.dtype) * np.ones( patch_shape, dtype=img.dtype) img_cutout[y1:y2, x1:x2, ...] = patch return img_cutout def _get_shear_matrix(magnitude, direction='horizontal'): """Generate the shear matrix for transformation. Args: magnitude (int | float): The magnitude used for shear. direction (str): The flip direction, either "horizontal" or "vertical". Returns: ndarray: The shear matrix with dtype float32. """ if direction == 'horizontal': shear_matrix = np.float32([[1, magnitude, 0], [0, 1, 0]]) elif direction == 'vertical': shear_matrix = np.float32([[1, 0, 0], [magnitude, 1, 0]]) return shear_matrix def imshear(img, magnitude, direction='horizontal', border_value=0, interpolation='bilinear'): """Shear an image. Args: img (ndarray): Image to be sheared with format (h, w) or (h, w, c). magnitude (int | float): The magnitude used for shear. direction (str): The flip direction, either "horizontal" or "vertical". border_value (int | tuple[int]): Value used in case of a constant border. interpolation (str): Same as :func:`resize`. Returns: ndarray: The sheared image. """ assert direction in ['horizontal', 'vertical'], f'Invalid direction: {direction}' height, width = img.shape[:2] if img.ndim == 2: channels = 1 elif img.ndim == 3: channels = img.shape[-1] if isinstance(border_value, int): border_value = tuple([border_value] * channels) elif isinstance(border_value, tuple): assert len(border_value) == channels, \ 'Expected the num of elements in tuple equals the channels' \ 'of input image. Found {} vs {}'.format( len(border_value), channels) else: raise ValueError( f'Invalid type {type(border_value)} for `border_value`') shear_matrix = _get_shear_matrix(magnitude, direction) sheared = cv2.warpAffine( img, shear_matrix, (width, height), # Note case when the number elements in `border_value` # greater than 3 (e.g. shearing masks whose channels large # than 3) will raise TypeError in `cv2.warpAffine`. # Here simply slice the first 3 values in `border_value`. borderValue=border_value[:3], flags=cv2_interp_codes[interpolation]) return sheared def _get_translate_matrix(offset, direction='horizontal'): """Generate the translate matrix. Args: offset (int | float): The offset used for translate. direction (str): The translate direction, either "horizontal" or "vertical". Returns: ndarray: The translate matrix with dtype float32. """ if direction == 'horizontal': translate_matrix = np.float32([[1, 0, offset], [0, 1, 0]]) elif direction == 'vertical': translate_matrix = np.float32([[1, 0, 0], [0, 1, offset]]) return translate_matrix def imtranslate(img, offset, direction='horizontal', border_value=0, interpolation='bilinear'): """Translate an image. Args: img (ndarray): Image to be translated with format (h, w) or (h, w, c). offset (int | float): The offset used for translate. direction (str): The translate direction, either "horizontal" or "vertical". border_value (int | tuple[int]): Value used in case of a constant border. interpolation (str): Same as :func:`resize`. Returns: ndarray: The translated image. """ assert direction in ['horizontal', 'vertical'], f'Invalid direction: {direction}' height, width = img.shape[:2] if img.ndim == 2: channels = 1 elif img.ndim == 3: channels = img.shape[-1] if isinstance(border_value, int): border_value = tuple([border_value] * channels) elif isinstance(border_value, tuple): assert len(border_value) == channels, \ 'Expected the num of elements in tuple equals the channels' \ 'of input image. Found {} vs {}'.format( len(border_value), channels) else: raise ValueError( f'Invalid type {type(border_value)} for `border_value`.') translate_matrix = _get_translate_matrix(offset, direction) translated = cv2.warpAffine( img, translate_matrix, (width, height), # Note case when the number elements in `border_value` # greater than 3 (e.g. translating masks whose channels # large than 3) will raise TypeError in `cv2.warpAffine`. # Here simply slice the first 3 values in `border_value`. borderValue=border_value[:3], flags=cv2_interp_codes[interpolation]) return translated ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/image/io.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import io import os.path as osp import warnings from pathlib import Path import cv2 import numpy as np from cv2 import (IMREAD_COLOR, IMREAD_GRAYSCALE, IMREAD_IGNORE_ORIENTATION, IMREAD_UNCHANGED) from mmcv.fileio import FileClient from mmcv.utils import is_filepath, is_str try: from turbojpeg import TJCS_RGB, TJPF_BGR, TJPF_GRAY, TurboJPEG except ImportError: TJCS_RGB = TJPF_GRAY = TJPF_BGR = TurboJPEG = None try: from PIL import Image, ImageOps except ImportError: Image = None try: import tifffile except ImportError: tifffile = None jpeg = None supported_backends = ['cv2', 'turbojpeg', 'pillow', 'tifffile'] imread_flags = { 'color': IMREAD_COLOR, 'grayscale': IMREAD_GRAYSCALE, 'unchanged': IMREAD_UNCHANGED, 'color_ignore_orientation': IMREAD_IGNORE_ORIENTATION | IMREAD_COLOR, 'grayscale_ignore_orientation': IMREAD_IGNORE_ORIENTATION | IMREAD_GRAYSCALE } imread_backend = 'cv2' def use_backend(backend): """Select a backend for image decoding. Args: backend (str): The image decoding backend type. Options are `cv2`, `pillow`, `turbojpeg` (see https://github.com/lilohuang/PyTurboJPEG) and `tifffile`. `turbojpeg` is faster but it only supports `.jpeg` file format. """ assert backend in supported_backends global imread_backend imread_backend = backend if imread_backend == 'turbojpeg': if TurboJPEG is None: raise ImportError('`PyTurboJPEG` is not installed') global jpeg if jpeg is None: jpeg = TurboJPEG() elif imread_backend == 'pillow': if Image is None: raise ImportError('`Pillow` is not installed') elif imread_backend == 'tifffile': if tifffile is None: raise ImportError('`tifffile` is not installed') def _jpegflag(flag='color', channel_order='bgr'): channel_order = channel_order.lower() if channel_order not in ['rgb', 'bgr']: raise ValueError('channel order must be either "rgb" or "bgr"') if flag == 'color': if channel_order == 'bgr': return TJPF_BGR elif channel_order == 'rgb': return TJCS_RGB elif flag == 'grayscale': return TJPF_GRAY else: raise ValueError('flag must be "color" or "grayscale"') def _pillow2array(img, flag='color', channel_order='bgr'): """Convert a pillow image to numpy array. Args: img (:obj:`PIL.Image.Image`): The image loaded using PIL flag (str): Flags specifying the color type of a loaded image, candidates are 'color', 'grayscale' and 'unchanged'. Default to 'color'. channel_order (str): The channel order of the output image array, candidates are 'bgr' and 'rgb'. Default to 'bgr'. Returns: np.ndarray: The converted numpy array """ channel_order = channel_order.lower() if channel_order not in ['rgb', 'bgr']: raise ValueError('channel order must be either "rgb" or "bgr"') if flag == 'unchanged': array = np.array(img) if array.ndim >= 3 and array.shape[2] >= 3: # color image array[:, :, :3] = array[:, :, (2, 1, 0)] # RGB to BGR else: # Handle exif orientation tag if flag in ['color', 'grayscale']: img = ImageOps.exif_transpose(img) # If the image mode is not 'RGB', convert it to 'RGB' first. if img.mode != 'RGB': if img.mode != 'LA': # Most formats except 'LA' can be directly converted to RGB img = img.convert('RGB') else: # When the mode is 'LA', the default conversion will fill in # the canvas with black, which sometimes shadows black objects # in the foreground. # # Therefore, a random color (124, 117, 104) is used for canvas img_rgba = img.convert('RGBA') img = Image.new('RGB', img_rgba.size, (124, 117, 104)) img.paste(img_rgba, mask=img_rgba.split()[3]) # 3 is alpha if flag in ['color', 'color_ignore_orientation']: array = np.array(img) if channel_order != 'rgb': array = array[:, :, ::-1] # RGB to BGR elif flag in ['grayscale', 'grayscale_ignore_orientation']: img = img.convert('L') array = np.array(img) else: raise ValueError( 'flag must be "color", "grayscale", "unchanged", ' f'"color_ignore_orientation" or "grayscale_ignore_orientation"' f' but got {flag}') return array def imread(img_or_path, flag='color', channel_order='bgr', backend=None, file_client_args=None): """Read an image. Note: In v1.4.1 and later, add `file_client_args` parameters. Args: img_or_path (ndarray or str or Path): Either a numpy array or str or pathlib.Path. If it is a numpy array (loaded image), then it will be returned as is. flag (str): Flags specifying the color type of a loaded image, candidates are `color`, `grayscale`, `unchanged`, `color_ignore_orientation` and `grayscale_ignore_orientation`. By default, `cv2` and `pillow` backend would rotate the image according to its EXIF info unless called with `unchanged` or `*_ignore_orientation` flags. `turbojpeg` and `tifffile` backend always ignore image's EXIF info regardless of the flag. The `turbojpeg` backend only supports `color` and `grayscale`. channel_order (str): Order of channel, candidates are `bgr` and `rgb`. backend (str | None): The image decoding backend type. Options are `cv2`, `pillow`, `turbojpeg`, `tifffile`, `None`. If backend is None, the global imread_backend specified by ``mmcv.use_backend()`` will be used. Default: None. file_client_args (dict | None): Arguments to instantiate a FileClient. See :class:`mmcv.fileio.FileClient` for details. Default: None. Returns: ndarray: Loaded image array. Examples: >>> import mmcv >>> img_path = '/path/to/img.jpg' >>> img = mmcv.imread(img_path) >>> img = mmcv.imread(img_path, flag='color', channel_order='rgb', ... backend='cv2') >>> img = mmcv.imread(img_path, flag='color', channel_order='bgr', ... backend='pillow') >>> s3_img_path = 's3://bucket/img.jpg' >>> # infer the file backend by the prefix s3 >>> img = mmcv.imread(s3_img_path) >>> # manually set the file backend petrel >>> img = mmcv.imread(s3_img_path, file_client_args={ ... 'backend': 'petrel'}) >>> http_img_path = 'http://path/to/img.jpg' >>> img = mmcv.imread(http_img_path) >>> img = mmcv.imread(http_img_path, file_client_args={ ... 'backend': 'http'}) """ if isinstance(img_or_path, Path): img_or_path = str(img_or_path) if isinstance(img_or_path, np.ndarray): return img_or_path elif is_str(img_or_path): file_client = FileClient.infer_client(file_client_args, img_or_path) img_bytes = file_client.get(img_or_path) return imfrombytes(img_bytes, flag, channel_order, backend) else: raise TypeError('"img" must be a numpy array or a str or ' 'a pathlib.Path object') def imfrombytes(content, flag='color', channel_order='bgr', backend=None): """Read an image from bytes. Args: content (bytes): Image bytes got from files or other streams. flag (str): Same as :func:`imread`. backend (str | None): The image decoding backend type. Options are `cv2`, `pillow`, `turbojpeg`, `tifffile`, `None`. If backend is None, the global imread_backend specified by ``mmcv.use_backend()`` will be used. Default: None. Returns: ndarray: Loaded image array. Examples: >>> img_path = '/path/to/img.jpg' >>> with open(img_path, 'rb') as f: >>> img_buff = f.read() >>> img = mmcv.imfrombytes(img_buff) >>> img = mmcv.imfrombytes(img_buff, flag='color', channel_order='rgb') >>> img = mmcv.imfrombytes(img_buff, backend='pillow') >>> img = mmcv.imfrombytes(img_buff, backend='cv2') """ if backend is None: backend = imread_backend if backend not in supported_backends: raise ValueError( f'backend: {backend} is not supported. Supported ' "backends are 'cv2', 'turbojpeg', 'pillow', 'tifffile'") if backend == 'turbojpeg': img = jpeg.decode(content, _jpegflag(flag, channel_order)) if img.shape[-1] == 1: img = img[:, :, 0] return img elif backend == 'pillow': with io.BytesIO(content) as buff: img = Image.open(buff) img = _pillow2array(img, flag, channel_order) return img elif backend == 'tifffile': with io.BytesIO(content) as buff: img = tifffile.imread(buff) return img else: img_np = np.frombuffer(content, np.uint8) flag = imread_flags[flag] if is_str(flag) else flag img = cv2.imdecode(img_np, flag) if flag == IMREAD_COLOR and channel_order == 'rgb': cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) return img def imwrite(img, file_path, params=None, auto_mkdir=None, file_client_args=None): """Write image to file. Note: In v1.4.1 and later, add `file_client_args` parameters. Warning: The parameter `auto_mkdir` will be deprecated in the future and every file clients will make directory automatically. Args: img (ndarray): Image array to be written. file_path (str): Image file path. params (None or list): Same as opencv :func:`imwrite` interface. auto_mkdir (bool): If the parent folder of `file_path` does not exist, whether to create it automatically. It will be deprecated. file_client_args (dict | None): Arguments to instantiate a FileClient. See :class:`mmcv.fileio.FileClient` for details. Default: None. Returns: bool: Successful or not. Examples: >>> # write to hard disk client >>> ret = mmcv.imwrite(img, '/path/to/img.jpg') >>> # infer the file backend by the prefix s3 >>> ret = mmcv.imwrite(img, 's3://bucket/img.jpg') >>> # manually set the file backend petrel >>> ret = mmcv.imwrite(img, 's3://bucket/img.jpg', file_client_args={ ... 'backend': 'petrel'}) """ assert is_filepath(file_path) file_path = str(file_path) if auto_mkdir is not None: warnings.warn( 'The parameter `auto_mkdir` will be deprecated in the future and ' 'every file clients will make directory automatically.') file_client = FileClient.infer_client(file_client_args, file_path) img_ext = osp.splitext(file_path)[-1] # Encode image according to image suffix. # For example, if image path is '/path/your/img.jpg', the encode # format is '.jpg'. flag, img_buff = cv2.imencode(img_ext, img, params) file_client.put(img_buff.tobytes(), file_path) return flag ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/image/misc.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import numpy as np import mmcv try: import torch except ImportError: torch = None def tensor2imgs(tensor, mean=None, std=None, to_rgb=True): """Convert tensor to 3-channel images or 1-channel gray images. Args: tensor (torch.Tensor): Tensor that contains multiple images, shape ( N, C, H, W). :math:`C` can be either 3 or 1. mean (tuple[float], optional): Mean of images. If None, (0, 0, 0) will be used for tensor with 3-channel, while (0, ) for tensor with 1-channel. Defaults to None. std (tuple[float], optional): Standard deviation of images. If None, (1, 1, 1) will be used for tensor with 3-channel, while (1, ) for tensor with 1-channel. Defaults to None. to_rgb (bool, optional): Whether the tensor was converted to RGB format in the first place. If so, convert it back to BGR. For the tensor with 1 channel, it must be False. Defaults to True. Returns: list[np.ndarray]: A list that contains multiple images. """ if torch is None: raise RuntimeError('pytorch is not installed') assert torch.is_tensor(tensor) and tensor.ndim == 4 channels = tensor.size(1) assert channels in [1, 3] if mean is None: mean = (0, ) * channels if std is None: std = (1, ) * channels assert (channels == len(mean) == len(std) == 3) or \ (channels == len(mean) == len(std) == 1 and not to_rgb) num_imgs = tensor.size(0) mean = np.array(mean, dtype=np.float32) std = np.array(std, dtype=np.float32) imgs = [] for img_id in range(num_imgs): img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) img = mmcv.imdenormalize( img, mean, std, to_bgr=to_rgb).astype(np.uint8) imgs.append(np.ascontiguousarray(img)) return imgs ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/image/photometric.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import cv2 import numpy as np from ..utils import is_tuple_of from .colorspace import bgr2gray, gray2bgr def imnormalize(img, mean, std, to_rgb=True): """Normalize an image with mean and std. Args: img (ndarray): Image to be normalized. mean (ndarray): The mean to be used for normalize. std (ndarray): The std to be used for normalize. to_rgb (bool): Whether to convert to rgb. Returns: ndarray: The normalized image. """ img = img.copy().astype(np.float32) return imnormalize_(img, mean, std, to_rgb) def imnormalize_(img, mean, std, to_rgb=True): """Inplace normalize an image with mean and std. Args: img (ndarray): Image to be normalized. mean (ndarray): The mean to be used for normalize. std (ndarray): The std to be used for normalize. to_rgb (bool): Whether to convert to rgb. Returns: ndarray: The normalized image. """ # cv2 inplace normalization does not accept uint8 assert img.dtype != np.uint8 mean = np.float64(mean.reshape(1, -1)) stdinv = 1 / np.float64(std.reshape(1, -1)) if to_rgb: cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) # inplace cv2.subtract(img, mean, img) # inplace cv2.multiply(img, stdinv, img) # inplace return img def imdenormalize(img, mean, std, to_bgr=True): assert img.dtype != np.uint8 mean = mean.reshape(1, -1).astype(np.float64) std = std.reshape(1, -1).astype(np.float64) img = cv2.multiply(img, std) # make a copy cv2.add(img, mean, img) # inplace if to_bgr: cv2.cvtColor(img, cv2.COLOR_RGB2BGR, img) # inplace return img def iminvert(img): """Invert (negate) an image. Args: img (ndarray): Image to be inverted. Returns: ndarray: The inverted image. """ return np.full_like(img, 255) - img def solarize(img, thr=128): """Solarize an image (invert all pixel values above a threshold) Args: img (ndarray): Image to be solarized. thr (int): Threshold for solarizing (0 - 255). Returns: ndarray: The solarized image. """ img = np.where(img < thr, img, 255 - img) return img def posterize(img, bits): """Posterize an image (reduce the number of bits for each color channel) Args: img (ndarray): Image to be posterized. bits (int): Number of bits (1 to 8) to use for posterizing. Returns: ndarray: The posterized image. """ shift = 8 - bits img = np.left_shift(np.right_shift(img, shift), shift) return img def adjust_color(img, alpha=1, beta=None, gamma=0): r"""It blends the source image and its gray image: .. math:: output = img * alpha + gray\_img * beta + gamma Args: img (ndarray): The input source image. alpha (int | float): Weight for the source image. Default 1. beta (int | float): Weight for the converted gray image. If None, it's assigned the value (1 - `alpha`). gamma (int | float): Scalar added to each sum. Same as :func:`cv2.addWeighted`. Default 0. Returns: ndarray: Colored image which has the same size and dtype as input. """ gray_img = bgr2gray(img) gray_img = np.tile(gray_img[..., None], [1, 1, 3]) if beta is None: beta = 1 - alpha colored_img = cv2.addWeighted(img, alpha, gray_img, beta, gamma) if not colored_img.dtype == np.uint8: # Note when the dtype of `img` is not the default `np.uint8` # (e.g. np.float32), the value in `colored_img` got from cv2 # is not guaranteed to be in range [0, 255], so here clip # is needed. colored_img = np.clip(colored_img, 0, 255) return colored_img def imequalize(img): """Equalize the image histogram. This function applies a non-linear mapping to the input image, in order to create a uniform distribution of grayscale values in the output image. Args: img (ndarray): Image to be equalized. Returns: ndarray: The equalized image. """ def _scale_channel(im, c): """Scale the data in the corresponding channel.""" im = im[:, :, c] # Compute the histogram of the image channel. histo = np.histogram(im, 256, (0, 255))[0] # For computing the step, filter out the nonzeros. nonzero_histo = histo[histo > 0] step = (np.sum(nonzero_histo) - nonzero_histo[-1]) // 255 if not step: lut = np.array(range(256)) else: # Compute the cumulative sum, shifted by step // 2 # and then normalized by step. lut = (np.cumsum(histo) + (step // 2)) // step # Shift lut, prepending with 0. lut = np.concatenate([[0], lut[:-1]], 0) # handle potential integer overflow lut[lut > 255] = 255 # If step is zero, return the original image. # Otherwise, index from lut. return np.where(np.equal(step, 0), im, lut[im]) # Scales each channel independently and then stacks # the result. s1 = _scale_channel(img, 0) s2 = _scale_channel(img, 1) s3 = _scale_channel(img, 2) equalized_img = np.stack([s1, s2, s3], axis=-1) return equalized_img.astype(img.dtype) def adjust_brightness(img, factor=1.): """Adjust image brightness. This function controls the brightness of an image. An enhancement factor of 0.0 gives a black image. A factor of 1.0 gives the original image. This function blends the source image and the degenerated black image: .. math:: output = img * factor + degenerated * (1 - factor) Args: img (ndarray): Image to be brightened. factor (float): A value controls the enhancement. Factor 1.0 returns the original image, lower factors mean less color (brightness, contrast, etc), and higher values more. Default 1. Returns: ndarray: The brightened image. """ degenerated = np.zeros_like(img) # Note manually convert the dtype to np.float32, to # achieve as close results as PIL.ImageEnhance.Brightness. # Set beta=1-factor, and gamma=0 brightened_img = cv2.addWeighted( img.astype(np.float32), factor, degenerated.astype(np.float32), 1 - factor, 0) brightened_img = np.clip(brightened_img, 0, 255) return brightened_img.astype(img.dtype) def adjust_contrast(img, factor=1.): """Adjust image contrast. This function controls the contrast of an image. An enhancement factor of 0.0 gives a solid grey image. A factor of 1.0 gives the original image. It blends the source image and the degenerated mean image: .. math:: output = img * factor + degenerated * (1 - factor) Args: img (ndarray): Image to be contrasted. BGR order. factor (float): Same as :func:`mmcv.adjust_brightness`. Returns: ndarray: The contrasted image. """ gray_img = bgr2gray(img) hist = np.histogram(gray_img, 256, (0, 255))[0] mean = round(np.sum(gray_img) / np.sum(hist)) degenerated = (np.ones_like(img[..., 0]) * mean).astype(img.dtype) degenerated = gray2bgr(degenerated) contrasted_img = cv2.addWeighted( img.astype(np.float32), factor, degenerated.astype(np.float32), 1 - factor, 0) contrasted_img = np.clip(contrasted_img, 0, 255) return contrasted_img.astype(img.dtype) def auto_contrast(img, cutoff=0): """Auto adjust image contrast. This function maximize (normalize) image contrast by first removing cutoff percent of the lightest and darkest pixels from the histogram and remapping the image so that the darkest pixel becomes black (0), and the lightest becomes white (255). Args: img (ndarray): Image to be contrasted. BGR order. cutoff (int | float | tuple): The cutoff percent of the lightest and darkest pixels to be removed. If given as tuple, it shall be (low, high). Otherwise, the single value will be used for both. Defaults to 0. Returns: ndarray: The contrasted image. """ def _auto_contrast_channel(im, c, cutoff): im = im[:, :, c] # Compute the histogram of the image channel. histo = np.histogram(im, 256, (0, 255))[0] # Remove cut-off percent pixels from histo histo_sum = np.cumsum(histo) cut_low = histo_sum[-1] * cutoff[0] // 100 cut_high = histo_sum[-1] - histo_sum[-1] * cutoff[1] // 100 histo_sum = np.clip(histo_sum, cut_low, cut_high) - cut_low histo = np.concatenate([[histo_sum[0]], np.diff(histo_sum)], 0) # Compute mapping low, high = np.nonzero(histo)[0][0], np.nonzero(histo)[0][-1] # If all the values have been cut off, return the origin img if low >= high: return im scale = 255.0 / (high - low) offset = -low * scale lut = np.array(range(256)) lut = lut * scale + offset lut = np.clip(lut, 0, 255) return lut[im] if isinstance(cutoff, (int, float)): cutoff = (cutoff, cutoff) else: assert isinstance(cutoff, tuple), 'cutoff must be of type int, ' \ f'float or tuple, but got {type(cutoff)} instead.' # Auto adjusts contrast for each channel independently and then stacks # the result. s1 = _auto_contrast_channel(img, 0, cutoff) s2 = _auto_contrast_channel(img, 1, cutoff) s3 = _auto_contrast_channel(img, 2, cutoff) contrasted_img = np.stack([s1, s2, s3], axis=-1) return contrasted_img.astype(img.dtype) def adjust_sharpness(img, factor=1., kernel=None): """Adjust image sharpness. This function controls the sharpness of an image. An enhancement factor of 0.0 gives a blurred image. A factor of 1.0 gives the original image. And a factor of 2.0 gives a sharpened image. It blends the source image and the degenerated mean image: .. math:: output = img * factor + degenerated * (1 - factor) Args: img (ndarray): Image to be sharpened. BGR order. factor (float): Same as :func:`mmcv.adjust_brightness`. kernel (np.ndarray, optional): Filter kernel to be applied on the img to obtain the degenerated img. Defaults to None. Note: No value sanity check is enforced on the kernel set by users. So with an inappropriate kernel, the ``adjust_sharpness`` may fail to perform the function its name indicates but end up performing whatever transform determined by the kernel. Returns: ndarray: The sharpened image. """ if kernel is None: # adopted from PIL.ImageFilter.SMOOTH kernel = np.array([[1., 1., 1.], [1., 5., 1.], [1., 1., 1.]]) / 13 assert isinstance(kernel, np.ndarray), \ f'kernel must be of type np.ndarray, but got {type(kernel)} instead.' assert kernel.ndim == 2, \ f'kernel must have a dimension of 2, but got {kernel.ndim} instead.' degenerated = cv2.filter2D(img, -1, kernel) sharpened_img = cv2.addWeighted( img.astype(np.float32), factor, degenerated.astype(np.float32), 1 - factor, 0) sharpened_img = np.clip(sharpened_img, 0, 255) return sharpened_img.astype(img.dtype) def adjust_lighting(img, eigval, eigvec, alphastd=0.1, to_rgb=True): """AlexNet-style PCA jitter. This data augmentation is proposed in `ImageNet Classification with Deep Convolutional Neural Networks `_. Args: img (ndarray): Image to be adjusted lighting. BGR order. eigval (ndarray): the eigenvalue of the convariance matrix of pixel values, respectively. eigvec (ndarray): the eigenvector of the convariance matrix of pixel values, respectively. alphastd (float): The standard deviation for distribution of alpha. Defaults to 0.1 to_rgb (bool): Whether to convert img to rgb. Returns: ndarray: The adjusted image. """ assert isinstance(eigval, np.ndarray) and isinstance(eigvec, np.ndarray), \ f'eigval and eigvec should both be of type np.ndarray, got ' \ f'{type(eigval)} and {type(eigvec)} instead.' assert eigval.ndim == 1 and eigvec.ndim == 2 assert eigvec.shape == (3, eigval.shape[0]) n_eigval = eigval.shape[0] assert isinstance(alphastd, float), 'alphastd should be of type float, ' \ f'got {type(alphastd)} instead.' img = img.copy().astype(np.float32) if to_rgb: cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) # inplace alpha = np.random.normal(0, alphastd, n_eigval) alter = eigvec \ * np.broadcast_to(alpha.reshape(1, n_eigval), (3, n_eigval)) \ * np.broadcast_to(eigval.reshape(1, n_eigval), (3, n_eigval)) alter = np.broadcast_to(alter.sum(axis=1).reshape(1, 1, 3), img.shape) img_adjusted = img + alter return img_adjusted def lut_transform(img, lut_table): """Transform array by look-up table. The function lut_transform fills the output array with values from the look-up table. Indices of the entries are taken from the input array. Args: img (ndarray): Image to be transformed. lut_table (ndarray): look-up table of 256 elements; in case of multi-channel input array, the table should either have a single channel (in this case the same table is used for all channels) or the same number of channels as in the input array. Returns: ndarray: The transformed image. """ assert isinstance(img, np.ndarray) assert 0 <= np.min(img) and np.max(img) <= 255 assert isinstance(lut_table, np.ndarray) assert lut_table.shape == (256, ) return cv2.LUT(np.array(img, dtype=np.uint8), lut_table) def clahe(img, clip_limit=40.0, tile_grid_size=(8, 8)): """Use CLAHE method to process the image. See `ZUIDERVELD,K. Contrast Limited Adaptive Histogram Equalization[J]. Graphics Gems, 1994:474-485.` for more information. Args: img (ndarray): Image to be processed. clip_limit (float): Threshold for contrast limiting. Default: 40.0. tile_grid_size (tuple[int]): Size of grid for histogram equalization. Input image will be divided into equally sized rectangular tiles. It defines the number of tiles in row and column. Default: (8, 8). Returns: ndarray: The processed image. """ assert isinstance(img, np.ndarray) assert img.ndim == 2 assert isinstance(clip_limit, (float, int)) assert is_tuple_of(tile_grid_size, int) assert len(tile_grid_size) == 2 clahe = cv2.createCLAHE(clip_limit, tile_grid_size) return clahe.apply(np.array(img, dtype=np.uint8)) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/onnx/__init__.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from .info import is_custom_op_loaded from .symbolic import register_extra_symbolics __all__ = ['register_extra_symbolics', 'is_custom_op_loaded'] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/onnx/info.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import os import torch def is_custom_op_loaded(): flag = False try: from ..tensorrt import is_tensorrt_plugin_loaded flag = is_tensorrt_plugin_loaded() except (ImportError, ModuleNotFoundError): pass if not flag: try: from ..ops import get_onnxruntime_op_path ort_lib_path = get_onnxruntime_op_path() flag = os.path.exists(ort_lib_path) except (ImportError, ModuleNotFoundError): pass return flag or torch.__version__ == 'parrots' ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/onnx/onnx_utils/__init__.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/onnx/onnx_utils/symbolic_helper.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. """Modified from https://github.com/pytorch/pytorch.""" import warnings from functools import wraps from sys import maxsize import torch import torch.onnx # This import monkey-patches graph manipulation methods on Graph, used for the # ONNX symbolics import torch.onnx.utils from torch._C import ListType # --------------------------------------------------------------------------------- # Helper functions # --------------------------------------------------------------------------------- # Save some builtins as locals, because we'll shadown them below _sum = sum def _parse_arg(value, desc): if desc == 'none': return value if desc == 'v' or not _is_value(value): return value if value.node().mustBeNone(): return None if value.node().kind() == 'onnx::Constant': tval = value.node()['value'] if desc == 'i': return int(tval) elif desc == 'f': return float(tval) elif desc == 'b': return bool(tval) elif desc == 's': return str(tval) elif desc == 't': return tval elif desc == 'is': return [int(v) for v in tval] elif desc == 'fs': return [float(v) for v in tval] else: raise RuntimeError( "ONNX symbolic doesn't know to interpret Constant node") elif value.node().kind() == 'prim::ListConstruct': if desc == 'is': for v in value.node().inputs(): if v.node().kind() != 'onnx::Constant': raise RuntimeError( "Failed to export an ONNX attribute '" + v.node().kind() + "', since it's not constant, please try to make " 'things (e.g., kernel size) static if possible') return [int(v.node()['value']) for v in value.node().inputs()] else: raise RuntimeError( "ONNX symbolic doesn't know to interpret ListConstruct node") raise RuntimeError('Unexpected node type: {}'.format(value.node().kind())) def _maybe_get_const(value, desc): if _is_value(value) and value.node().kind() == 'onnx::Constant': return _parse_arg(value, desc) return value def _maybe_get_scalar(value): value_t = _maybe_get_const(value, 't') if isinstance(value_t, torch.Tensor) and value_t.shape == (): return value_t return value def _get_const(value, desc, arg_name): if _is_value(value) and value.node().kind() not in ('onnx::Constant', 'prim::Constant'): raise RuntimeError('ONNX symbolic expected a constant' ' value of the {} argument, got `{}`'.format( arg_name, value)) return _parse_arg(value, desc) def _unpack_list(list_value): list_node = list_value.node() assert list_node.kind() == 'prim::ListConstruct' return list(list_node.inputs()) # Check if list_value is output from prim::ListConstruct # This is usually called before _unpack_list to ensure the list can be # unpacked. def _is_packed_list(list_value): return _is_value( list_value) and list_value.node().kind() == 'prim::ListConstruct' def parse_args(*arg_descriptors): def decorator(fn): fn._arg_descriptors = arg_descriptors def wrapper(g, *args): # some args may be optional, so the length may be smaller assert len(arg_descriptors) >= len(args) args = [ _parse_arg(arg, arg_desc) for arg, arg_desc in zip(args, arg_descriptors) ] return fn(g, *args) # In Python 2 functools.wraps chokes on partially applied functions, so # we need this as a workaround try: wrapper = wraps(fn)(wrapper) except Exception: pass return wrapper return decorator def _scalar(x): """Convert a scalar tensor into a Python value.""" assert x.numel() == 1 return x.item() def _if_scalar_type_as(g, self, tensor): """Convert self into the same type of tensor, as necessary.""" if isinstance(self, torch._C.Value): return self scalar_type = tensor.type().scalarType() if scalar_type: ty = scalar_type.lower() return getattr(self, ty)() return self def _is_none(x): return x.node().mustBeNone() def _is_value(x): return isinstance(x, torch._C.Value) def _is_tensor_list(x): return x.type().isSubtypeOf(ListType.ofTensors()) def _unimplemented(op, msg): warnings.warn('ONNX export failed on ' + op + ' because ' + msg + ' not supported') def _try_get_scalar_type(*args): for arg in args: try: return arg.type().scalarType() except RuntimeError: pass return None def _topk_helper(g, input, k, dim, largest=True, sorted=False, out=None): if out is not None: _unimplemented('TopK', 'Out parameter is not supported') if not _is_value(k): k = g.op('Constant', value_t=torch.tensor([k], dtype=torch.int64)) else: k = g.op('Reshape', k, g.op('Constant', value_t=torch.tensor([1]))) return g.op( 'TopK', input, k, axis_i=dim, largest_i=largest, sorted_i=sorted, outputs=2) def _slice_helper(g, input, axes, starts, ends, steps=None, dynamic_slice=False): # TODO(ruobing): add support for opset<10 from torch.onnx.symbolic_opset10 import _slice return _slice(g, input, axes, starts, ends, steps, dynamic_slice) def _unsqueeze_helper(g, input, dim): from torch.onnx.symbolic_opset9 import unsqueeze return unsqueeze(g, input, dim) def _interpolate_size_to_scales(g, input, output_size, dim): output_size = _maybe_get_const(output_size, 'is') if _is_value(output_size): offset = 2 offsets = g.op( 'Constant', value_t=torch.ones(offset, dtype=torch.float32)) dividend = g.op( 'Cast', output_size, to_i=cast_pytorch_to_onnx['Float']) divisor = _slice_helper( g, g.op('Shape', input), axes=[0], ends=[maxsize], starts=[offset]) divisor = g.op('Cast', divisor, to_i=cast_pytorch_to_onnx['Float']) scale_dims = g.op('Div', dividend, divisor) scales = g.op('Concat', offsets, scale_dims, axis_i=0) else: scales_constant = [ 1. if i < 2 else float(output_size[-(dim - i)]) / float(input.type().sizes()[-(dim - i)]) for i in range(0, dim) ] scales = g.op( 'Constant', value_t=torch.tensor(scales_constant, dtype=torch.float32)) return scales def _interpolate_get_scales_if_available(g, scales): if len(scales) == 0: return None # scales[0] is NoneType in Pytorch == 1.5.1 # scales[0] is TensorType with sizes = [] in Pytorch == 1.6.0 # scales[0] is ListType in Pytorch == 1.7.0 # scales[0] is TensorType with sizes = [2] in Pytorch == 1.8.0 scale_desc = 'fs' if scales[0].type().kind() == 'ListType' or ( scales[0].type().kind() == 'TensorType' and (sum(scales[0].type().sizes()) > 1)) else 'f' available_scales = _maybe_get_const( scales[0], scale_desc) != -1 and not _is_none(scales[0]) if not available_scales: return None offsets = g.op('Constant', value_t=torch.ones(2, dtype=torch.float32)) if scale_desc == 'fs': scales_list = g.op( 'Constant', value_t=torch.tensor(_maybe_get_const(scales[0], scale_desc))) # modify to support PyTorch==1.7.0 # https://github.com/pytorch/pytorch/blob/75ee5756715e7161314ce037474843b68f69fc04/torch/onnx/symbolic_helper.py#L375 # noqa: E501 scales = g.op('Concat', offsets, scales_list, axis_i=0) else: # for PyTorch < 1.7.0 scales_list = [] for scale in scales: unsqueezed_scale = _unsqueeze_helper(g, scale, 0) # ONNX only supports float for the scales. double -> float. unsqueezed_scale = g.op( 'Cast', unsqueezed_scale, to_i=cast_pytorch_to_onnx['Float']) scales_list.append(unsqueezed_scale) scales = g.op('Concat', offsets, *scales_list, axis_i=0) return scales def _get_interpolate_attributes(g, mode, args): if mode == 'nearest': align_corners = None scales = args[0:] else: align_corners = args[0] scales = args[1:] scales = _interpolate_get_scales_if_available(g, scales) return scales, align_corners def _interpolate_get_scales(g, scale_factor, dim): offsets = g.op('Constant', value_t=torch.ones(2, dtype=torch.float32)) if isinstance(scale_factor.type(), torch._C.ListType): return g.op('Concat', offsets, scale_factor, axis_i=0) else: scale_factor = _unsqueeze_helper(g, scale_factor, 0) scale_factor = g.op( 'Cast', scale_factor, to_i=cast_pytorch_to_onnx['Float']) scales = [scale_factor for i in range(dim - 2)] scale_factor = g.op('Concat', offsets, *scales, axis_i=0) return scale_factor def _size_helper(g, self, dim): full_shape = g.op('Shape', self) from torch.onnx.symbolic_opset9 import select return select(g, full_shape, g.op('Constant', value_t=torch.tensor([0])), dim) def _avgpool_helper(tuple_fn, padding, kernel_size, stride, divisor_override, name): if divisor_override and divisor_override.node().kind() != 'prim::Constant': return _unimplemented(name, 'divisor_override') if not stride: stride = kernel_size padding = tuple(tuple_fn(padding)) return padding # Metaprogram symbolics for each ATen native specialized cast operator. # For e.g. we specify a function named `_cast_uint8_t` that instantiates an # ONNX cast node with `to` attribute 'UINT8' # # TODO: remove these once we support Type's in the JIT IR and we can once again # use the unified toType operator cast_pytorch_to_onnx = { 'Byte': torch.onnx.TensorProtoDataType.UINT8, 'Char': torch.onnx.TensorProtoDataType.INT8, 'Double': torch.onnx.TensorProtoDataType.DOUBLE, 'Float': torch.onnx.TensorProtoDataType.FLOAT, 'Half': torch.onnx.TensorProtoDataType.FLOAT16, 'Int': torch.onnx.TensorProtoDataType.INT32, 'Long': torch.onnx.TensorProtoDataType.INT64, 'Short': torch.onnx.TensorProtoDataType.INT16, 'Bool': torch.onnx.TensorProtoDataType.BOOL, 'ComplexFloat': torch.onnx.TensorProtoDataType.COMPLEX64, 'ComplexDouble': torch.onnx.TensorProtoDataType.COMPLEX128, 'Undefined': torch.onnx.TensorProtoDataType.UNDEFINED, } # Global set to store the list of quantized operators in the network. # This is currently only used in the conversion of quantized ops from PT # -> C2 via ONNX. _quantized_ops = set() ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/onnx/symbolic.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. """Modified from https://github.com/pytorch/pytorch.""" import os import numpy as np import torch from torch.nn.modules.utils import _pair, _single, _triple from torch.onnx.symbolic_helper import parse_args from torch.onnx.symbolic_registry import register_op from .onnx_utils import symbolic_helper as sym_help def _interpolate(name, dim, interpolate_mode): def symbolic_fn(g, input, output_size, *args): scales, align_corners = sym_help._get_interpolate_attributes( g, interpolate_mode, args) align_corners = sym_help._maybe_get_scalar(align_corners) transformation_mode = 'asymmetric' \ if interpolate_mode == 'nearest' \ else 'align_corners' if align_corners else 'pytorch_half_pixel' empty_tensor = g.op( 'Constant', value_t=torch.tensor([], dtype=torch.float32)) if scales is None: if 'ONNX_BACKEND' in os.environ and os.environ[ 'ONNX_BACKEND'] == 'TensorRT': input_size = input.type().sizes() # slice the first two dim input_size = input_size[:2] # convert output_size to int type output_size = sym_help._maybe_get_const(output_size, 'is') input_size.extend(output_size) output_size = g.op( 'Constant', value_t=torch.tensor(input_size, dtype=torch.int64)) else: input_size = g.op('Shape', input) input_size_beg = sym_help._slice_helper( g, input_size, axes=[0], ends=[2], starts=[0]) output_size = g.op( 'Cast', output_size, to_i=sym_help.cast_pytorch_to_onnx['Long']) output_size = g.op( 'Concat', input_size_beg, output_size, axis_i=0) scales = g.op( 'Constant', value_t=torch.tensor([], dtype=torch.float32)) return g.op( 'Resize', input, empty_tensor, # roi only takes effect with # coordinate_transformation_mode="tf_crop_and_resize" scales, # scales is not needed since we are sending out_size output_size, coordinate_transformation_mode_s=transformation_mode, cubic_coeff_a_f=-0.75, # only valid when mode="cubic" mode_s=interpolate_mode, # nearest, linear, or cubic nearest_mode_s='floor') # only valid when mode="nearest" else: return g.op( 'Resize', input, empty_tensor, # roi only takes effect with # coordinate_transformation_mode="tf_crop_and_resize" scales, # scales is not needed since we are sending out_size coordinate_transformation_mode_s=transformation_mode, cubic_coeff_a_f=-0.75, # only valid when mode="cubic" mode_s=interpolate_mode, # nearest, linear, or cubic nearest_mode_s='floor') # only valid when mode="nearest" return symbolic_fn upsample_nearest1d = _interpolate('upsample_nearest1d', 3, 'nearest') upsample_nearest2d = _interpolate('upsample_nearest2d', 4, 'nearest') upsample_nearest3d = _interpolate('upsample_nearest3d', 5, 'nearest') upsample_linear1d = _interpolate('upsample_linear1d', 3, 'linear') upsample_bilinear2d = _interpolate('upsample_bilinear2d', 4, 'linear') upsample_trilinear3d = _interpolate('upsample_trilinear3d', 5, 'linear') upsample_bicubic2d = _interpolate('upsample_bicubic2d', 4, 'cubic') @parse_args('v', 'v', 'i', 'i', 'i', 'none') def topk(g, self, k, dim, largest, sorted, out=None): return sym_help._topk_helper( g, self, k, dim, largest=largest, sorted=sorted, out=out) def masked_select(g, self, mask): from torch.onnx.symbolic_opset9 import expand_as, nonzero index = nonzero(g, expand_as(g, mask, self)) return g.op('GatherND', self, index) def _prepare_onnx_paddings(g, dim, pad): pad_len = torch.onnx.symbolic_opset9.size( g, pad, g.op('Constant', value_t=torch.tensor([0]))) # Set extension = [0] * (dim * 2 - len(pad)) extension = g.op( 'Sub', g.op('Mul', g.op('Constant', value_t=torch.tensor(dim, dtype=torch.int64)), g.op('Constant', value_t=torch.tensor(2, dtype=torch.int64))), pad_len) pad = g.op('Cast', pad, to_i=sym_help.cast_pytorch_to_onnx['Long']) paddings = g.op( 'Concat', pad, g.op( 'ConstantOfShape', extension, value_t=torch.tensor([0], dtype=torch.int64)), axis_i=0) paddings = g.op('Reshape', paddings, g.op('Constant', value_t=torch.tensor([-1, 2]))) paddings = g.op( 'Transpose', torch.onnx.symbolic_opset10.flip(g, paddings, [0]), perm_i=[1, 0]) paddings = g.op('Reshape', paddings, g.op('Constant', value_t=torch.tensor([-1]))) padding_c = g.op( 'Cast', paddings, to_i=sym_help.cast_pytorch_to_onnx['Long']) return padding_c def constant_pad_nd(g, input, padding, value=None): mode = 'constant' value = sym_help._maybe_get_scalar(value) value = sym_help._if_scalar_type_as(g, value, input) pad = _prepare_onnx_paddings(g, input.type().dim(), padding) return g.op('Pad', input, pad, value, mode_s=mode) def reflection_pad(g, input, padding): mode = 'reflect' paddings = _prepare_onnx_paddings(g, input.type().dim(), padding) return g.op('Pad', input, paddings, mode_s=mode) reflection_pad1d = reflection_pad reflection_pad2d = reflection_pad reflection_pad3d = reflection_pad def _avg_pool(name, tuple_fn): @parse_args('v', 'is', 'is', 'is', 'i', 'i', 'none') def symbolic_fn(g, input, kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override=None): padding = sym_help._avgpool_helper(tuple_fn, padding, kernel_size, stride, divisor_override, name) if not stride: stride = kernel_size if count_include_pad: input = g.op( 'Pad', input, g.op( 'Constant', value_t=torch.tensor(((0, ) * 2 + padding) * 2)), mode_s='constant') padding = (0, ) * len(padding) output = g.op( 'AveragePool', input, kernel_shape_i=tuple_fn(kernel_size), strides_i=tuple_fn(stride), pads_i=padding * 2, ceil_mode_i=ceil_mode) return output return symbolic_fn avg_pool1d = _avg_pool('avg_pool1d', _single) avg_pool2d = _avg_pool('avg_pool2d', _pair) avg_pool3d = _avg_pool('avg_pool3d', _triple) def _get_im2col_indices_along_dim(g, input_d, kernel_size_d, dilation_d, padding_d, stride_d): # Input is always 4-D (N, C, H, W) # Calculate indices of sliding blocks along spatial dimension # Slide kernel over input each dim d: # each dimension d ranges from 0 to # input[d]+2xpadding[d]-dilation[d]x(kernel_size[d]-1) # with steps = stride blocks_d = g.op('Add', input_d, g.op('Constant', value_t=torch.tensor(padding_d * 2))) blocks_d = g.op( 'Sub', blocks_d, g.op( 'Constant', value_t=torch.tensor(dilation_d * (kernel_size_d - 1)))) # Stride kernel over input and find starting indices along dim d blocks_d_indices = g.op('Range', g.op('Constant', value_t=torch.tensor(0)), blocks_d, g.op('Constant', value_t=torch.tensor(stride_d))) # Apply dilation on kernel and find its indices along dim d kernel_grid = np.arange(0, kernel_size_d * dilation_d, dilation_d) kernel_grid = g.op('Constant', value_t=torch.tensor([kernel_grid])) # Broadcast and add kernel staring positions (indices) with # kernel_grid along dim d, to get block indices along dim d blocks_d_indices = g.op( 'Unsqueeze', blocks_d_indices, axes_i=[0]) # Reshape to [1, -1] kernel_mask = g.op('Reshape', kernel_grid, g.op('Constant', value_t=torch.tensor([-1, 1]))) block_mask = g.op('Add', blocks_d_indices, kernel_mask) return block_mask def _get_im2col_padded_input(g, input, padding_h, padding_w): # Input is always 4-D tensor (N, C, H, W) # Padding tensor has the following format: (padding_h, padding_w) # Reshape the padding to follow ONNX format: # (dim1_begin, dim2_begin,...,dim1_end, dim2_end,...) pad = g.op( 'Constant', value_t=torch.LongTensor([0, 0, padding_h, padding_w] * 2)) return g.op('Pad', input, pad) def _get_im2col_output_shape(g, input, kernel_h, kernel_w): batch_dim = size(g, input, g.op('Constant', value_t=torch.tensor(0))) channel_dim = size(g, input, g.op('Constant', value_t=torch.tensor(1))) channel_unfolded = g.op( 'Mul', channel_dim, g.op('Constant', value_t=torch.tensor(kernel_h * kernel_w))) return g.op( 'Concat', g.op('Unsqueeze', batch_dim, axes_i=[0]), g.op('Unsqueeze', channel_unfolded, axes_i=[0]), g.op('Constant', value_t=torch.tensor([-1])), axis_i=0) def size(g, self, dim=None): if dim is None: return g.op('Shape', self) return sym_help._size_helper(g, self, dim) @parse_args('v', 'is', 'is', 'is', 'is') def im2col(g, input, kernel_size, dilation, padding, stride): # Input is always 4-D tensor (N, C, H, W) # All other args are int[2] input_h = size(g, input, g.op('Constant', value_t=torch.tensor(2))) input_w = size(g, input, g.op('Constant', value_t=torch.tensor(3))) stride_h, stride_w = stride[0], stride[1] padding_h, padding_w = padding[0], padding[1] dilation_h, dilation_w = dilation[0], dilation[1] kernel_h, kernel_w = kernel_size[0], kernel_size[1] blocks_row_indices = _get_im2col_indices_along_dim(g, input_h, kernel_h, dilation_h, padding_h, stride_h) blocks_col_indices = _get_im2col_indices_along_dim(g, input_w, kernel_w, dilation_w, padding_w, stride_w) output_shape = _get_im2col_output_shape(g, input, kernel_h, kernel_w) padded_input = _get_im2col_padded_input(g, input, padding_h, padding_w) output = g.op('Gather', padded_input, blocks_row_indices, axis_i=2) output = g.op('Gather', output, blocks_col_indices, axis_i=4) output = g.op('Transpose', output, perm_i=[0, 1, 2, 4, 3, 5]) return g.op('Reshape', output, output_shape) @parse_args('v', 'i') def one_hot(g, self, num_classes): values = g.op('Constant', value_t=torch.LongTensor([0, 1])) depth = g.op('Constant', value_t=torch.LongTensor([num_classes])) return g.op('OneHot', self, depth, values, axis_i=-1) @parse_args('v', 'i', 'none') def softmax(g, input, dim, dtype=None): input_dim = input.type().dim() if input_dim: # TODO: remove this as onnx opset 11 spec allows negative axes if dim < 0: dim = input_dim + dim if input_dim == dim + 1: softmax = g.op('Softmax', input, axis_i=dim) if dtype and dtype.node().kind() != 'prim::Constant': parsed_dtype = sym_help._get_const(dtype, 'i', 'dtype') softmax = g.op( 'Cast', softmax, to_i=sym_help.scalar_type_to_onnx[parsed_dtype]) return softmax max_value = g.op('ReduceMax', input, axes_i=[dim], keepdims_i=1) input = g.op('Sub', input, max_value) exp = g.op('Exp', input) sum = g.op('ReduceSum', exp, axes_i=[dim]) softmax = g.op('Div', exp, sum) if dtype and dtype.node().kind() != 'prim::Constant': parsed_dtype = sym_help._get_const(dtype, 'i', 'dtype') softmax = g.op( 'Cast', softmax, to_i=sym_help.scalar_type_to_onnx[parsed_dtype]) return softmax def _adaptive_pool(name, type, tuple_fn, fn=None): @parse_args('v', 'is') def symbolic_fn(g, input, output_size): if output_size == [1] * len(output_size) and type == 'AveragePool': return g.op('GlobalAveragePool', input) if not input.isCompleteTensor(): if output_size == [1] * len(output_size): return g.op('GlobalMaxPool', input), None raise NotImplementedError( '[Adaptive pool]:input size not accessible') dim = input.type().sizes()[2:] if output_size == [1] * len(output_size) and type == 'MaxPool': return g.op('GlobalMaxPool', input), None # compute stride = floor(input_size / output_size) s = [int(dim[i] / output_size[i]) for i in range(0, len(dim))] # compute kernel_size = input_size - (output_size - 1) * stride k = [dim[i] - (output_size[i] - 1) * s[i] for i in range(0, len(dim))] # call max_poolxd_with_indices to get indices in the output if type == 'MaxPool': return fn(g, input, k, k, (0, ) * len(dim), (1, ) * len(dim), False) output = g.op( type, input, kernel_shape_i=tuple_fn(k), strides_i=tuple_fn(s), ceil_mode_i=False) return output return symbolic_fn adaptive_avg_pool1d = _adaptive_pool('adaptive_avg_pool1d', 'AveragePool', _single) adaptive_avg_pool2d = _adaptive_pool('adaptive_avg_pool2d', 'AveragePool', _pair) adaptive_avg_pool3d = _adaptive_pool('adaptive_avg_pool3d', 'AveragePool', _triple) def new_full(g, self, size, fill_value, dtype, layout, device, pin_memory=False): from torch.onnx.symbolic_opset9 import full if dtype is None and self.isCompleteTensor(): dtype = self.type().scalarType() dtype = sym_help.scalar_type_to_onnx.index( sym_help.cast_pytorch_to_onnx[dtype]) return full(g, size, fill_value, dtype, layout, device, pin_memory) @parse_args('v', 'v', 'i', 'i', 'i') def grid_sampler(g, input, grid, interpolation_mode, padding_mode, align_corners=False): return g.op( 'mmcv::grid_sampler', input, grid, interpolation_mode_i=interpolation_mode, padding_mode_i=padding_mode, align_corners_i=align_corners) @parse_args('v', 'i') def cummax(g, input, dim): return g.op('mmcv::cummax', input, dim_i=dim, outputs=2) @parse_args('v', 'i') def cummin(g, input, dim): return g.op('mmcv::cummin', input, dim_i=dim, outputs=2) @parse_args('v', 'v', 'is') def roll(g, input, shifts, dims): from torch.onnx.symbolic_opset9 import squeeze from packaging import version input_shape = g.op('Shape', input) need_flatten = len(dims) == 0 # If dims is not specified, the tensor will be flattened before # rolling and then restored to the original shape. if need_flatten: resize_shape = input_shape input = g.op('Reshape', input, g.op('Constant', value_t=torch.LongTensor([1, -1]))) input_shape = g.op('Shape', input) dims = [1] for index, dim in enumerate(dims): end_size = sym_help._slice_helper( g, input_shape, axes=[0], ends=[dim + 1], starts=[dim]) shift_size = sym_help._slice_helper( g, shifts, axes=[0], ends=[index + 1], starts=[index]) slice_size = g.op('Sub', end_size, shift_size) # Can not use Mod because tensorrt does not support div_size = g.op('Div', slice_size, end_size) slice_size = g.op('Sub', slice_size, g.op('Mul', end_size, div_size)) if version.parse(torch.__version__) >= version.parse('1.7.0'): # add dim=0 for pytorch 1.9.0 end_size = squeeze(g, end_size, 0) slice_size = squeeze(g, slice_size, 0) else: end_size = g.op('Squeeze', end_size) slice_size = g.op('Squeeze', slice_size) dim = torch.LongTensor([dim]) input_slice0 = sym_help._slice_helper( g, input, axes=dim, starts=torch.LongTensor([0]), ends=slice_size, dynamic_slice=True) input_slice1 = sym_help._slice_helper( g, input, axes=dim, ends=end_size, starts=slice_size, dynamic_slice=True) input = g.op('Concat', input_slice1, input_slice0, axis_i=dim) if need_flatten: input = g.op('Reshape', input, resize_shape) return input def register_extra_symbolics(opset=11): register_op('one_hot', one_hot, '', opset) register_op('im2col', im2col, '', opset) register_op('topk', topk, '', opset) register_op('softmax', softmax, '', opset) register_op('constant_pad_nd', constant_pad_nd, '', opset) register_op('reflection_pad1d', reflection_pad1d, '', opset) register_op('reflection_pad2d', reflection_pad2d, '', opset) register_op('reflection_pad3d', reflection_pad3d, '', opset) register_op('avg_pool1d', avg_pool1d, '', opset) register_op('avg_pool2d', avg_pool2d, '', opset) register_op('avg_pool3d', avg_pool3d, '', opset) register_op('adaptive_avg_pool1d', adaptive_avg_pool1d, '', opset) register_op('adaptive_avg_pool2d', adaptive_avg_pool2d, '', opset) register_op('adaptive_avg_pool3d', adaptive_avg_pool3d, '', opset) register_op('masked_select', masked_select, '', opset) register_op('upsample_nearest1d', upsample_nearest1d, '', opset) register_op('upsample_nearest2d', upsample_nearest2d, '', opset) register_op('upsample_nearest3d', upsample_nearest3d, '', opset) register_op('upsample_linear1d', upsample_linear1d, '', opset) register_op('upsample_bilinear2d', upsample_bilinear2d, '', opset) register_op('upsample_trilinear3d', upsample_trilinear3d, '', opset) register_op('upsample_bicubic2d', upsample_bicubic2d, '', opset) register_op('new_full', new_full, '', opset) register_op('grid_sampler', grid_sampler, '', opset) register_op('cummax', cummax, '', opset) register_op('cummin', cummin, '', opset) register_op('roll', roll, '', opset) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/__init__.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from .active_rotated_filter import active_rotated_filter from .assign_score_withk import assign_score_withk from .ball_query import ball_query from .bbox import bbox_overlaps from .border_align import BorderAlign, border_align from .box_iou_rotated import box_iou_rotated from .carafe import CARAFE, CARAFENaive, CARAFEPack, carafe, carafe_naive from .cc_attention import CrissCrossAttention from .contour_expand import contour_expand from .convex_iou import convex_giou, convex_iou from .corner_pool import CornerPool from .correlation import Correlation from .deform_conv import DeformConv2d, DeformConv2dPack, deform_conv2d from .deform_roi_pool import (DeformRoIPool, DeformRoIPoolPack, ModulatedDeformRoIPoolPack, deform_roi_pool) from .deprecated_wrappers import Conv2d_deprecated as Conv2d from .deprecated_wrappers import ConvTranspose2d_deprecated as ConvTranspose2d from .deprecated_wrappers import Linear_deprecated as Linear from .deprecated_wrappers import MaxPool2d_deprecated as MaxPool2d from .focal_loss import (SigmoidFocalLoss, SoftmaxFocalLoss, sigmoid_focal_loss, softmax_focal_loss) from .furthest_point_sample import (furthest_point_sample, furthest_point_sample_with_dist) from .fused_bias_leakyrelu import FusedBiasLeakyReLU, fused_bias_leakyrelu from .gather_points import gather_points from .group_points import GroupAll, QueryAndGroup, grouping_operation from .info import (get_compiler_version, get_compiling_cuda_version, get_onnxruntime_op_path) from .iou3d import boxes_iou_bev, nms_bev, nms_normal_bev from .knn import knn from .masked_conv import MaskedConv2d, masked_conv2d from .min_area_polygons import min_area_polygons from .modulated_deform_conv import (ModulatedDeformConv2d, ModulatedDeformConv2dPack, modulated_deform_conv2d) from .multi_scale_deform_attn import MultiScaleDeformableAttention from .nms import batched_nms, nms, nms_match, nms_rotated, soft_nms from .pixel_group import pixel_group from .point_sample import (SimpleRoIAlign, point_sample, rel_roi_point_to_rel_img_point) from .points_in_boxes import (points_in_boxes_all, points_in_boxes_cpu, points_in_boxes_part) from .points_in_polygons import points_in_polygons from .points_sampler import PointsSampler from .psa_mask import PSAMask from .riroi_align_rotated import RiRoIAlignRotated, riroi_align_rotated from .roi_align import RoIAlign, roi_align from .roi_align_rotated import RoIAlignRotated, roi_align_rotated from .roi_pool import RoIPool, roi_pool from .roiaware_pool3d import RoIAwarePool3d from .roipoint_pool3d import RoIPointPool3d from .rotated_feature_align import rotated_feature_align from .saconv import SAConv2d from .scatter_points import DynamicScatter, dynamic_scatter from .sync_bn import SyncBatchNorm from .three_interpolate import three_interpolate from .three_nn import three_nn from .tin_shift import TINShift, tin_shift from .upfirdn2d import upfirdn2d from .voxelize import Voxelization, voxelization __all__ = [ 'bbox_overlaps', 'CARAFE', 'CARAFENaive', 'CARAFEPack', 'carafe', 'carafe_naive', 'CornerPool', 'DeformConv2d', 'DeformConv2dPack', 'deform_conv2d', 'DeformRoIPool', 'DeformRoIPoolPack', 'ModulatedDeformRoIPoolPack', 'deform_roi_pool', 'SigmoidFocalLoss', 'SoftmaxFocalLoss', 'sigmoid_focal_loss', 'softmax_focal_loss', 'get_compiler_version', 'get_compiling_cuda_version', 'get_onnxruntime_op_path', 'MaskedConv2d', 'masked_conv2d', 'ModulatedDeformConv2d', 'ModulatedDeformConv2dPack', 'modulated_deform_conv2d', 'batched_nms', 'nms', 'soft_nms', 'nms_match', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 'SyncBatchNorm', 'Conv2d', 'ConvTranspose2d', 'Linear', 'MaxPool2d', 'CrissCrossAttention', 'PSAMask', 'point_sample', 'rel_roi_point_to_rel_img_point', 'SimpleRoIAlign', 'SAConv2d', 'TINShift', 'tin_shift', 'assign_score_withk', 'box_iou_rotated', 'RoIPointPool3d', 'nms_rotated', 'knn', 'ball_query', 'upfirdn2d', 'FusedBiasLeakyReLU', 'fused_bias_leakyrelu', 'rotated_feature_align', 'RiRoIAlignRotated', 'riroi_align_rotated', 'RoIAlignRotated', 'roi_align_rotated', 'pixel_group', 'QueryAndGroup', 'GroupAll', 'grouping_operation', 'contour_expand', 'three_nn', 'three_interpolate', 'MultiScaleDeformableAttention', 'BorderAlign', 'border_align', 'gather_points', 'furthest_point_sample', 'furthest_point_sample_with_dist', 'PointsSampler', 'Correlation', 'boxes_iou_bev', 'nms_bev', 'nms_normal_bev', 'Voxelization', 'voxelization', 'dynamic_scatter', 'DynamicScatter', 'RoIAwarePool3d', 'points_in_boxes_part', 'points_in_boxes_cpu', 'points_in_boxes_all', 'points_in_polygons', 'min_area_polygons', 'active_rotated_filter', 'convex_iou', 'convex_giou' ] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/active_rotated_filter.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch from torch.autograd import Function from torch.autograd.function import once_differentiable from ..utils import ext_loader ext_module = ext_loader.load_ext( '_ext', ['active_rotated_filter_forward', 'active_rotated_filter_backward']) class ActiveRotatedFilterFunction(Function): """Encoding the orientation information and generating orientation- sensitive features. The details are described in the paper `Align Deep Features for Oriented Object Detection _`. """ @staticmethod def forward(ctx, input, indices): """ Args: input (torch.Tensor): Input features with shape [num_output_planes, num_input_planes, num_orientations, H, W]. indices (torch.Tensor): Indices with shape [num_orientations, H, W, num_rotations]. Returns: torch.Tensor: Refined features with shape [num_output_planes * num_rotations, num_input_planes * num_orientations, H, W]. """ ctx.save_for_backward(input, indices) op, ip, o, h, w = input.size() o, h, w, r = indices.size() output = input.new_zeros((op * r, ip * o, h, w)) ext_module.active_rotated_filter_forward(input, indices, output) return output @staticmethod @once_differentiable def backward(ctx, grad_out): """ Args: grad_output (torch.Tensor): The gradiant of output features with shape [num_output_planes * num_rotations, num_input_planes * num_orientations, H, W]. Returns: torch.Tensor: The gradiant of input features with shape [num_output_planes, num_input_planes, num_orientations, H, W]. """ input, indices = ctx.saved_tensors grad_in = torch.zeros_like(input) ext_module.active_rotated_filter_backward(grad_out, indices, grad_in) return grad_in, None active_rotated_filter = ActiveRotatedFilterFunction.apply ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/assign_score_withk.py ================================================ from torch.autograd import Function from ..utils import ext_loader ext_module = ext_loader.load_ext( '_ext', ['assign_score_withk_forward', 'assign_score_withk_backward']) class AssignScoreWithK(Function): r"""Perform weighted sum to generate output features according to scores. Modified from `PAConv `_. This is a memory-efficient CUDA implementation of assign_scores operation, which first transform all point features with weight bank, then assemble neighbor features with ``knn_idx`` and perform weighted sum of ``scores``. See the `paper `_ appendix Sec. D for more detailed descriptions. Note: This implementation assumes using ``neighbor`` kernel input, which is (point_features - center_features, point_features). See https://github.com/CVMI-Lab/PAConv/blob/main/scene_seg/model/ pointnet2/paconv.py#L128 for more details. """ @staticmethod def forward(ctx, scores, point_features, center_features, knn_idx, aggregate='sum'): """ Args: scores (torch.Tensor): (B, npoint, K, M), predicted scores to aggregate weight matrices in the weight bank. ``npoint`` is the number of sampled centers. ``K`` is the number of queried neighbors. ``M`` is the number of weight matrices in the weight bank. point_features (torch.Tensor): (B, N, M, out_dim) Pre-computed point features to be aggregated. center_features (torch.Tensor): (B, N, M, out_dim) Pre-computed center features to be aggregated. knn_idx (torch.Tensor): (B, npoint, K), index of sampled kNN. We assume the first idx in each row is the idx of the center. aggregate (str, optional): Aggregation method. Can be 'sum', 'avg' or 'max'. Defaults: 'sum'. Returns: torch.Tensor: (B, out_dim, npoint, K), the aggregated features. """ agg = {'sum': 0, 'avg': 1, 'max': 2} B, N, M, out_dim = point_features.size() _, npoint, K, _ = scores.size() output = point_features.new_zeros((B, out_dim, npoint, K)) ext_module.assign_score_withk_forward( point_features.contiguous(), center_features.contiguous(), scores.contiguous(), knn_idx.contiguous(), output, B=B, N0=N, N1=npoint, M=M, K=K, O=out_dim, aggregate=agg[aggregate]) ctx.save_for_backward(output, point_features, center_features, scores, knn_idx) ctx.agg = agg[aggregate] return output @staticmethod def backward(ctx, grad_out): """ Args: grad_out (torch.Tensor): (B, out_dim, npoint, K) Returns: tuple[torch.Tensor]: A tuple contains five elements. The first one is the gradient of ``scores`` whose shape is (B, npoint, K, M). The second is the gradient of ``point_features`` whose shape is (B, N, M, out_dim). The third is the gradient of ``center_features`` with the shape of (B, N, M, out_dim). The last two are ``None``. """ _, point_features, center_features, scores, knn_idx = ctx.saved_tensors agg = ctx.agg B, N, M, out_dim = point_features.size() _, npoint, K, _ = scores.size() grad_point_features = point_features.new_zeros(point_features.shape) grad_center_features = center_features.new_zeros(center_features.shape) grad_scores = scores.new_zeros(scores.shape) ext_module.assign_score_withk_backward( grad_out.contiguous(), point_features.contiguous(), center_features.contiguous(), scores.contiguous(), knn_idx.contiguous(), grad_point_features, grad_center_features, grad_scores, B=B, N0=N, N1=npoint, M=M, K=K, O=out_dim, aggregate=agg) return grad_scores, grad_point_features, \ grad_center_features, None, None assign_score_withk = AssignScoreWithK.apply ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/ball_query.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch from torch.autograd import Function from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', ['ball_query_forward']) class BallQuery(Function): """Find nearby points in spherical space.""" @staticmethod def forward(ctx, min_radius: float, max_radius: float, sample_num: int, xyz: torch.Tensor, center_xyz: torch.Tensor) -> torch.Tensor: """ Args: min_radius (float): minimum radius of the balls. max_radius (float): maximum radius of the balls. sample_num (int): maximum number of features in the balls. xyz (Tensor): (B, N, 3) xyz coordinates of the features. center_xyz (torch.Tensor): (B, npoint, 3) centers of the ball query. Returns: torch.Tensor: (B, npoint, nsample) tensor with the indices of the features that form the query balls. """ assert center_xyz.is_contiguous() assert xyz.is_contiguous() assert min_radius < max_radius B, N, _ = xyz.size() npoint = center_xyz.size(1) idx = xyz.new_zeros(B, npoint, sample_num, dtype=torch.int) ext_module.ball_query_forward( center_xyz, xyz, idx, b=B, n=N, m=npoint, min_radius=min_radius, max_radius=max_radius, nsample=sample_num) if torch.__version__ != 'parrots': ctx.mark_non_differentiable(idx) return idx @staticmethod def backward(ctx, a=None): return None, None, None, None ball_query = BallQuery.apply ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/bbox.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', ['bbox_overlaps']) def bbox_overlaps(bboxes1, bboxes2, mode='iou', aligned=False, offset=0): """Calculate overlap between two set of bboxes. If ``aligned`` is ``False``, then calculate the ious between each bbox of bboxes1 and bboxes2, otherwise the ious between each aligned pair of bboxes1 and bboxes2. Args: bboxes1 (torch.Tensor): shape (m, 4) in format or empty. bboxes2 (torch.Tensor): shape (n, 4) in format or empty. If aligned is ``True``, then m and n must be equal. mode (str): "iou" (intersection over union) or iof (intersection over foreground). Returns: torch.Tensor: Return the ious betweens boxes. If ``aligned`` is ``False``, the shape of ious is (m, n) else (m, 1). Example: >>> bboxes1 = torch.FloatTensor([ >>> [0, 0, 10, 10], >>> [10, 10, 20, 20], >>> [32, 32, 38, 42], >>> ]) >>> bboxes2 = torch.FloatTensor([ >>> [0, 0, 10, 20], >>> [0, 10, 10, 19], >>> [10, 10, 20, 20], >>> ]) >>> bbox_overlaps(bboxes1, bboxes2) tensor([[0.5000, 0.0000, 0.0000], [0.0000, 0.0000, 1.0000], [0.0000, 0.0000, 0.0000]]) Example: >>> empty = torch.FloatTensor([]) >>> nonempty = torch.FloatTensor([ >>> [0, 0, 10, 9], >>> ]) >>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1) >>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0) >>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0) """ mode_dict = {'iou': 0, 'iof': 1} assert mode in mode_dict.keys() mode_flag = mode_dict[mode] # Either the boxes are empty or the length of boxes' last dimension is 4 assert (bboxes1.size(-1) == 4 or bboxes1.size(0) == 0) assert (bboxes2.size(-1) == 4 or bboxes2.size(0) == 0) assert offset == 1 or offset == 0 rows = bboxes1.size(0) cols = bboxes2.size(0) if aligned: assert rows == cols if rows * cols == 0: return bboxes1.new(rows, 1) if aligned else bboxes1.new(rows, cols) if aligned: ious = bboxes1.new_zeros(rows) else: ious = bboxes1.new_zeros((rows, cols)) ext_module.bbox_overlaps( bboxes1, bboxes2, ious, mode=mode_flag, aligned=aligned, offset=offset) return ious ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/border_align.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. # modified from # https://github.com/Megvii-BaseDetection/cvpods/blob/master/cvpods/layers/border_align.py import torch import torch.nn as nn from torch.autograd import Function from torch.autograd.function import once_differentiable from ..utils import ext_loader ext_module = ext_loader.load_ext( '_ext', ['border_align_forward', 'border_align_backward']) class BorderAlignFunction(Function): @staticmethod def symbolic(g, input, boxes, pool_size): return g.op( 'mmcv::MMCVBorderAlign', input, boxes, pool_size_i=pool_size) @staticmethod def forward(ctx, input, boxes, pool_size): ctx.pool_size = pool_size ctx.input_shape = input.size() assert boxes.ndim == 3, 'boxes must be with shape [B, H*W, 4]' assert boxes.size(2) == 4, \ 'the last dimension of boxes must be (x1, y1, x2, y2)' assert input.size(1) % 4 == 0, \ 'the channel for input feature must be divisible by factor 4' # [B, C//4, H*W, 4] output_shape = (input.size(0), input.size(1) // 4, boxes.size(1), 4) output = input.new_zeros(output_shape) # `argmax_idx` only used for backward argmax_idx = input.new_zeros(output_shape).to(torch.int) ext_module.border_align_forward( input, boxes, output, argmax_idx, pool_size=ctx.pool_size) ctx.save_for_backward(boxes, argmax_idx) return output @staticmethod @once_differentiable def backward(ctx, grad_output): boxes, argmax_idx = ctx.saved_tensors grad_input = grad_output.new_zeros(ctx.input_shape) # complex head architecture may cause grad_output uncontiguous grad_output = grad_output.contiguous() ext_module.border_align_backward( grad_output, boxes, argmax_idx, grad_input, pool_size=ctx.pool_size) return grad_input, None, None border_align = BorderAlignFunction.apply class BorderAlign(nn.Module): r"""Border align pooling layer. Applies border_align over the input feature based on predicted bboxes. The details were described in the paper `BorderDet: Border Feature for Dense Object Detection `_. For each border line (e.g. top, left, bottom or right) of each box, border_align does the following: 1. uniformly samples ``pool_size`` +1 positions on this line, involving the start and end points. 2. the corresponding features on these points are computed by bilinear interpolation. 3. max pooling over all the ``pool_size`` +1 positions are used for computing pooled feature. Args: pool_size (int): number of positions sampled over the boxes' borders (e.g. top, bottom, left, right). """ def __init__(self, pool_size): super(BorderAlign, self).__init__() self.pool_size = pool_size def forward(self, input, boxes): """ Args: input: Features with shape [N,4C,H,W]. Channels ranged in [0,C), [C,2C), [2C,3C), [3C,4C) represent the top, left, bottom, right features respectively. boxes: Boxes with shape [N,H*W,4]. Coordinate format (x1,y1,x2,y2). Returns: torch.Tensor: Pooled features with shape [N,C,H*W,4]. The order is (top,left,bottom,right) for the last dimension. """ return border_align(input, boxes, self.pool_size) def __repr__(self): s = self.__class__.__name__ s += f'(pool_size={self.pool_size})' return s ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/box_iou_rotated.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', ['box_iou_rotated']) def box_iou_rotated(bboxes1, bboxes2, mode='iou', aligned=False, clockwise=True): """Return intersection-over-union (Jaccard index) of boxes. Both sets of boxes are expected to be in (x_center, y_center, width, height, angle) format. If ``aligned`` is ``False``, then calculate the ious between each bbox of bboxes1 and bboxes2, otherwise the ious between each aligned pair of bboxes1 and bboxes2. .. note:: The operator assumes: 1) The positive direction along x axis is left -> right. 2) The positive direction along y axis is top -> down. 3) The w border is in parallel with x axis when angle = 0. However, there are 2 opposite definitions of the positive angular direction, clockwise (CW) and counter-clockwise (CCW). MMCV supports both definitions and uses CW by default. Please set ``clockwise=False`` if you are using the CCW definition. The coordinate system when ``clockwise`` is ``True`` (default) .. code-block:: none 0-------------------> x (0 rad) | A-------------B | | | | | box h | | angle=0 | | D------w------C v y (pi/2 rad) In such coordination system the rotation matrix is .. math:: \\begin{pmatrix} \\cos\\alpha & -\\sin\\alpha \\\\ \\sin\\alpha & \\cos\\alpha \\end{pmatrix} The coordinates of the corner point A can be calculated as: .. math:: P_A= \\begin{pmatrix} x_A \\\\ y_A\\end{pmatrix} = \\begin{pmatrix} x_{center} \\\\ y_{center}\\end{pmatrix} + \\begin{pmatrix}\\cos\\alpha & -\\sin\\alpha \\\\ \\sin\\alpha & \\cos\\alpha\\end{pmatrix} \\begin{pmatrix} -0.5w \\\\ -0.5h\\end{pmatrix} \\\\ = \\begin{pmatrix} x_{center}-0.5w\\cos\\alpha+0.5h\\sin\\alpha \\\\ y_{center}-0.5w\\sin\\alpha-0.5h\\cos\\alpha\\end{pmatrix} The coordinate system when ``clockwise`` is ``False`` .. code-block:: none 0-------------------> x (0 rad) | A-------------B | | | | | box h | | angle=0 | | D------w------C v y (-pi/2 rad) In such coordination system the rotation matrix is .. math:: \\begin{pmatrix} \\cos\\alpha & \\sin\\alpha \\\\ -\\sin\\alpha & \\cos\\alpha \\end{pmatrix} The coordinates of the corner point A can be calculated as: .. math:: P_A= \\begin{pmatrix} x_A \\\\ y_A\\end{pmatrix} = \\begin{pmatrix} x_{center} \\\\ y_{center}\\end{pmatrix} + \\begin{pmatrix}\\cos\\alpha & \\sin\\alpha \\\\ -\\sin\\alpha & \\cos\\alpha\\end{pmatrix} \\begin{pmatrix} -0.5w \\\\ -0.5h\\end{pmatrix} \\\\ = \\begin{pmatrix} x_{center}-0.5w\\cos\\alpha-0.5h\\sin\\alpha \\\\ y_{center}+0.5w\\sin\\alpha-0.5h\\cos\\alpha\\end{pmatrix} Args: boxes1 (torch.Tensor): rotated bboxes 1. It has shape (N, 5), indicating (x, y, w, h, theta) for each row. Note that theta is in radian. boxes2 (torch.Tensor): rotated bboxes 2. It has shape (M, 5), indicating (x, y, w, h, theta) for each row. Note that theta is in radian. mode (str): "iou" (intersection over union) or iof (intersection over foreground). clockwise (bool): flag indicating whether the positive angular orientation is clockwise. default True. `New in version 1.4.3.` Returns: torch.Tensor: Return the ious betweens boxes. If ``aligned`` is ``False``, the shape of ious is (N, M) else (N,). """ assert mode in ['iou', 'iof'] mode_dict = {'iou': 0, 'iof': 1} mode_flag = mode_dict[mode] rows = bboxes1.size(0) cols = bboxes2.size(0) if aligned: ious = bboxes1.new_zeros(rows) else: ious = bboxes1.new_zeros((rows * cols)) if not clockwise: flip_mat = bboxes1.new_ones(bboxes1.shape[-1]) flip_mat[-1] = -1 bboxes1 = bboxes1 * flip_mat bboxes2 = bboxes2 * flip_mat bboxes1 = bboxes1.contiguous() bboxes2 = bboxes2.contiguous() ext_module.box_iou_rotated( bboxes1, bboxes2, ious, mode_flag=mode_flag, aligned=aligned) if not aligned: ious = ious.view(rows, cols) return ious ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/carafe.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch import torch.nn as nn import torch.nn.functional as F from torch.autograd import Function from torch.nn.modules.module import Module from ..cnn import UPSAMPLE_LAYERS, normal_init, xavier_init from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', [ 'carafe_naive_forward', 'carafe_naive_backward', 'carafe_forward', 'carafe_backward' ]) class CARAFENaiveFunction(Function): @staticmethod def symbolic(g, features, masks, kernel_size, group_size, scale_factor): return g.op( 'mmcv::MMCVCARAFENaive', features, masks, kernel_size_i=kernel_size, group_size_i=group_size, scale_factor_f=scale_factor) @staticmethod def forward(ctx, features, masks, kernel_size, group_size, scale_factor): assert scale_factor >= 1 assert masks.size(1) == kernel_size * kernel_size * group_size assert masks.size(-1) == features.size(-1) * scale_factor assert masks.size(-2) == features.size(-2) * scale_factor assert features.size(1) % group_size == 0 assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1 ctx.kernel_size = kernel_size ctx.group_size = group_size ctx.scale_factor = scale_factor ctx.feature_size = features.size() ctx.mask_size = masks.size() n, c, h, w = features.size() output = features.new_zeros((n, c, h * scale_factor, w * scale_factor)) ext_module.carafe_naive_forward( features, masks, output, kernel_size=kernel_size, group_size=group_size, scale_factor=scale_factor) if features.requires_grad or masks.requires_grad: ctx.save_for_backward(features, masks) return output @staticmethod def backward(ctx, grad_output): assert grad_output.is_cuda features, masks = ctx.saved_tensors kernel_size = ctx.kernel_size group_size = ctx.group_size scale_factor = ctx.scale_factor grad_input = torch.zeros_like(features) grad_masks = torch.zeros_like(masks) ext_module.carafe_naive_backward( grad_output.contiguous(), features, masks, grad_input, grad_masks, kernel_size=kernel_size, group_size=group_size, scale_factor=scale_factor) return grad_input, grad_masks, None, None, None carafe_naive = CARAFENaiveFunction.apply class CARAFENaive(Module): def __init__(self, kernel_size, group_size, scale_factor): super(CARAFENaive, self).__init__() assert isinstance(kernel_size, int) and isinstance( group_size, int) and isinstance(scale_factor, int) self.kernel_size = kernel_size self.group_size = group_size self.scale_factor = scale_factor def forward(self, features, masks): return carafe_naive(features, masks, self.kernel_size, self.group_size, self.scale_factor) class CARAFEFunction(Function): @staticmethod def symbolic(g, features, masks, kernel_size, group_size, scale_factor): return g.op( 'mmcv::MMCVCARAFE', features, masks, kernel_size_i=kernel_size, group_size_i=group_size, scale_factor_f=scale_factor) @staticmethod def forward(ctx, features, masks, kernel_size, group_size, scale_factor): assert scale_factor >= 1 assert masks.size(1) == kernel_size * kernel_size * group_size assert masks.size(-1) == features.size(-1) * scale_factor assert masks.size(-2) == features.size(-2) * scale_factor assert features.size(1) % group_size == 0 assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1 ctx.kernel_size = kernel_size ctx.group_size = group_size ctx.scale_factor = scale_factor ctx.feature_size = features.size() ctx.mask_size = masks.size() n, c, h, w = features.size() output = features.new_zeros((n, c, h * scale_factor, w * scale_factor)) routput = features.new_zeros(output.size(), requires_grad=False) rfeatures = features.new_zeros(features.size(), requires_grad=False) rmasks = masks.new_zeros(masks.size(), requires_grad=False) ext_module.carafe_forward( features, masks, rfeatures, routput, rmasks, output, kernel_size=kernel_size, group_size=group_size, scale_factor=scale_factor) if features.requires_grad or masks.requires_grad: ctx.save_for_backward(features, masks, rfeatures) return output @staticmethod def backward(ctx, grad_output): assert grad_output.is_cuda features, masks, rfeatures = ctx.saved_tensors kernel_size = ctx.kernel_size group_size = ctx.group_size scale_factor = ctx.scale_factor rgrad_output = torch.zeros_like(grad_output, requires_grad=False) rgrad_input_hs = torch.zeros_like(grad_output, requires_grad=False) rgrad_input = torch.zeros_like(features, requires_grad=False) rgrad_masks = torch.zeros_like(masks, requires_grad=False) grad_input = torch.zeros_like(features, requires_grad=False) grad_masks = torch.zeros_like(masks, requires_grad=False) ext_module.carafe_backward( grad_output.contiguous(), rfeatures, masks, rgrad_output, rgrad_input_hs, rgrad_input, rgrad_masks, grad_input, grad_masks, kernel_size=kernel_size, group_size=group_size, scale_factor=scale_factor) return grad_input, grad_masks, None, None, None carafe = CARAFEFunction.apply class CARAFE(Module): """ CARAFE: Content-Aware ReAssembly of FEatures Please refer to `CARAFE: Content-Aware ReAssembly of FEatures `_ for more details. Args: kernel_size (int): reassemble kernel size group_size (int): reassemble group size scale_factor (int): upsample ratio Returns: upsampled feature map """ def __init__(self, kernel_size, group_size, scale_factor): super(CARAFE, self).__init__() assert isinstance(kernel_size, int) and isinstance( group_size, int) and isinstance(scale_factor, int) self.kernel_size = kernel_size self.group_size = group_size self.scale_factor = scale_factor def forward(self, features, masks): return carafe(features, masks, self.kernel_size, self.group_size, self.scale_factor) @UPSAMPLE_LAYERS.register_module(name='carafe') class CARAFEPack(nn.Module): """A unified package of CARAFE upsampler that contains: 1) channel compressor 2) content encoder 3) CARAFE op. Official implementation of ICCV 2019 paper `CARAFE: Content-Aware ReAssembly of FEatures `_. Args: channels (int): input feature channels scale_factor (int): upsample ratio up_kernel (int): kernel size of CARAFE op up_group (int): group size of CARAFE op encoder_kernel (int): kernel size of content encoder encoder_dilation (int): dilation of content encoder compressed_channels (int): output channels of channels compressor Returns: upsampled feature map """ def __init__(self, channels, scale_factor, up_kernel=5, up_group=1, encoder_kernel=3, encoder_dilation=1, compressed_channels=64): super(CARAFEPack, self).__init__() self.channels = channels self.scale_factor = scale_factor self.up_kernel = up_kernel self.up_group = up_group self.encoder_kernel = encoder_kernel self.encoder_dilation = encoder_dilation self.compressed_channels = compressed_channels self.channel_compressor = nn.Conv2d(channels, self.compressed_channels, 1) self.content_encoder = nn.Conv2d( self.compressed_channels, self.up_kernel * self.up_kernel * self.up_group * self.scale_factor * self.scale_factor, self.encoder_kernel, padding=int((self.encoder_kernel - 1) * self.encoder_dilation / 2), dilation=self.encoder_dilation, groups=1) self.init_weights() def init_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): xavier_init(m, distribution='uniform') normal_init(self.content_encoder, std=0.001) def kernel_normalizer(self, mask): mask = F.pixel_shuffle(mask, self.scale_factor) n, mask_c, h, w = mask.size() # use float division explicitly, # to void inconsistency while exporting to onnx mask_channel = int(mask_c / float(self.up_kernel**2)) mask = mask.view(n, mask_channel, -1, h, w) mask = F.softmax(mask, dim=2, dtype=mask.dtype) mask = mask.view(n, mask_c, h, w).contiguous() return mask def feature_reassemble(self, x, mask): x = carafe(x, mask, self.up_kernel, self.up_group, self.scale_factor) return x def forward(self, x): compressed_x = self.channel_compressor(x) mask = self.content_encoder(compressed_x) mask = self.kernel_normalizer(mask) x = self.feature_reassemble(x, mask) return x ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/cc_attention.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch import torch.nn as nn import torch.nn.functional as F from mmcv.cnn import PLUGIN_LAYERS, Scale def NEG_INF_DIAG(n, device): """Returns a diagonal matrix of size [n, n]. The diagonal are all "-inf". This is for avoiding calculating the overlapped element in the Criss-Cross twice. """ return torch.diag(torch.tensor(float('-inf')).to(device).repeat(n), 0) @PLUGIN_LAYERS.register_module() class CrissCrossAttention(nn.Module): """Criss-Cross Attention Module. .. note:: Before v1.3.13, we use a CUDA op. Since v1.3.13, we switch to a pure PyTorch and equivalent implementation. For more details, please refer to https://github.com/open-mmlab/mmcv/pull/1201. Speed comparison for one forward pass - Input size: [2,512,97,97] - Device: 1 NVIDIA GeForce RTX 2080 Ti +-----------------------+---------------+------------+---------------+ | |PyTorch version|CUDA version|Relative speed | +=======================+===============+============+===============+ |with torch.no_grad() |0.00554402 s |0.0299619 s |5.4x | +-----------------------+---------------+------------+---------------+ |no with torch.no_grad()|0.00562803 s |0.0301349 s |5.4x | +-----------------------+---------------+------------+---------------+ Args: in_channels (int): Channels of the input feature map. """ def __init__(self, in_channels): super().__init__() self.query_conv = nn.Conv2d(in_channels, in_channels // 8, 1) self.key_conv = nn.Conv2d(in_channels, in_channels // 8, 1) self.value_conv = nn.Conv2d(in_channels, in_channels, 1) self.gamma = Scale(0.) self.in_channels = in_channels def forward(self, x): """forward function of Criss-Cross Attention. Args: x (torch.Tensor): Input feature with the shape of (batch_size, in_channels, height, width). Returns: torch.Tensor: Output of the layer, with the shape of (batch_size, in_channels, height, width) """ B, C, H, W = x.size() query = self.query_conv(x) key = self.key_conv(x) value = self.value_conv(x) energy_H = torch.einsum('bchw,bciw->bwhi', query, key) + NEG_INF_DIAG( H, query.device) energy_H = energy_H.transpose(1, 2) energy_W = torch.einsum('bchw,bchj->bhwj', query, key) attn = F.softmax( torch.cat([energy_H, energy_W], dim=-1), dim=-1) # [B,H,W,(H+W)] out = torch.einsum('bciw,bhwi->bchw', value, attn[..., :H]) out += torch.einsum('bchj,bhwj->bchw', value, attn[..., H:]) out = self.gamma(out) + x out = out.contiguous() return out def __repr__(self): s = self.__class__.__name__ s += f'(in_channels={self.in_channels})' return s ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/contour_expand.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import numpy as np import torch from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', ['contour_expand']) def contour_expand(kernel_mask, internal_kernel_label, min_kernel_area, kernel_num): """Expand kernel contours so that foreground pixels are assigned into instances. Args: kernel_mask (np.array or torch.Tensor): The instance kernel mask with size hxw. internal_kernel_label (np.array or torch.Tensor): The instance internal kernel label with size hxw. min_kernel_area (int): The minimum kernel area. kernel_num (int): The instance kernel number. Returns: list: The instance index map with size hxw. """ assert isinstance(kernel_mask, (torch.Tensor, np.ndarray)) assert isinstance(internal_kernel_label, (torch.Tensor, np.ndarray)) assert isinstance(min_kernel_area, int) assert isinstance(kernel_num, int) if isinstance(kernel_mask, np.ndarray): kernel_mask = torch.from_numpy(kernel_mask) if isinstance(internal_kernel_label, np.ndarray): internal_kernel_label = torch.from_numpy(internal_kernel_label) if torch.__version__ == 'parrots': if kernel_mask.shape[0] == 0 or internal_kernel_label.shape[0] == 0: label = [] else: label = ext_module.contour_expand( kernel_mask, internal_kernel_label, min_kernel_area=min_kernel_area, kernel_num=kernel_num) label = label.tolist() else: label = ext_module.contour_expand(kernel_mask, internal_kernel_label, min_kernel_area, kernel_num) return label ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/convex_iou.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', ['convex_iou', 'convex_giou']) def convex_giou(pointsets, polygons): """Return generalized intersection-over-union (Jaccard index) between point sets and polygons. Args: pointsets (torch.Tensor): It has shape (N, 18), indicating (x1, y1, x2, y2, ..., x9, y9) for each row. polygons (torch.Tensor): It has shape (N, 8), indicating (x1, y1, x2, y2, x3, y3, x4, y4) for each row. Returns: tuple[torch.Tensor, torch.Tensor]: The first element is the gious between point sets and polygons with the shape (N,). The second element is the gradient of point sets with the shape (N, 18). """ output = pointsets.new_zeros((pointsets.size(0), 19)) ext_module.convex_giou(pointsets, polygons, output) convex_giou = output[:, -1] points_grad = output[:, 0:-1] return convex_giou, points_grad def convex_iou(pointsets, polygons): """Return intersection-over-union (Jaccard index) between point sets and polygons. Args: pointsets (torch.Tensor): It has shape (N, 18), indicating (x1, y1, x2, y2, ..., x9, y9) for each row. polygons (torch.Tensor): It has shape (K, 8), indicating (x1, y1, x2, y2, x3, y3, x4, y4) for each row. Returns: torch.Tensor: Return the ious between point sets and polygons with the shape (N, K). """ N, K = pointsets.size(0), polygons.size(0) ious = pointsets.new_zeros((N, K)) ext_module.convex_iou(pointsets, polygons, ious) return ious ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/corner_pool.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch from torch import nn from torch.autograd import Function from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', [ 'top_pool_forward', 'top_pool_backward', 'bottom_pool_forward', 'bottom_pool_backward', 'left_pool_forward', 'left_pool_backward', 'right_pool_forward', 'right_pool_backward' ]) _mode_dict = {'top': 0, 'bottom': 1, 'left': 2, 'right': 3} class TopPoolFunction(Function): @staticmethod def symbolic(g, input): output = g.op( 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['top'])) return output @staticmethod def forward(ctx, input): output = ext_module.top_pool_forward(input) ctx.save_for_backward(input) return output @staticmethod def backward(ctx, grad_output): input, = ctx.saved_tensors output = ext_module.top_pool_backward(input, grad_output) return output class BottomPoolFunction(Function): @staticmethod def symbolic(g, input): output = g.op( 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['bottom'])) return output @staticmethod def forward(ctx, input): output = ext_module.bottom_pool_forward(input) ctx.save_for_backward(input) return output @staticmethod def backward(ctx, grad_output): input, = ctx.saved_tensors output = ext_module.bottom_pool_backward(input, grad_output) return output class LeftPoolFunction(Function): @staticmethod def symbolic(g, input): output = g.op( 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['left'])) return output @staticmethod def forward(ctx, input): output = ext_module.left_pool_forward(input) ctx.save_for_backward(input) return output @staticmethod def backward(ctx, grad_output): input, = ctx.saved_tensors output = ext_module.left_pool_backward(input, grad_output) return output class RightPoolFunction(Function): @staticmethod def symbolic(g, input): output = g.op( 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['right'])) return output @staticmethod def forward(ctx, input): output = ext_module.right_pool_forward(input) ctx.save_for_backward(input) return output @staticmethod def backward(ctx, grad_output): input, = ctx.saved_tensors output = ext_module.right_pool_backward(input, grad_output) return output class CornerPool(nn.Module): """Corner Pooling. Corner Pooling is a new type of pooling layer that helps a convolutional network better localize corners of bounding boxes. Please refer to `CornerNet: Detecting Objects as Paired Keypoints `_ for more details. Code is modified from https://github.com/princeton-vl/CornerNet-Lite. Args: mode (str): Pooling orientation for the pooling layer - 'bottom': Bottom Pooling - 'left': Left Pooling - 'right': Right Pooling - 'top': Top Pooling Returns: Feature map after pooling. """ pool_functions = { 'bottom': BottomPoolFunction, 'left': LeftPoolFunction, 'right': RightPoolFunction, 'top': TopPoolFunction, } cummax_dim_flip = { 'bottom': (2, False), 'left': (3, True), 'right': (3, False), 'top': (2, True), } def __init__(self, mode): super(CornerPool, self).__init__() assert mode in self.pool_functions self.mode = mode self.corner_pool = self.pool_functions[mode] def forward(self, x): if torch.__version__ != 'parrots' and torch.__version__ >= '1.5.0': if torch.onnx.is_in_onnx_export(): assert torch.__version__ >= '1.7.0', \ 'When `cummax` serves as an intermediate component whose '\ 'outputs is used as inputs for another modules, it\'s '\ 'expected that pytorch version must be >= 1.7.0, '\ 'otherwise Error appears like: `RuntimeError: tuple '\ 'appears in op that does not forward tuples, unsupported '\ 'kind: prim::PythonOp`.' dim, flip = self.cummax_dim_flip[self.mode] if flip: x = x.flip(dim) pool_tensor, _ = torch.cummax(x, dim=dim) if flip: pool_tensor = pool_tensor.flip(dim) return pool_tensor else: return self.corner_pool.apply(x) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/correlation.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch from torch import Tensor, nn from torch.autograd import Function from torch.autograd.function import once_differentiable from torch.nn.modules.utils import _pair from ..utils import ext_loader ext_module = ext_loader.load_ext( '_ext', ['correlation_forward', 'correlation_backward']) class CorrelationFunction(Function): @staticmethod def forward(ctx, input1, input2, kernel_size=1, max_displacement=1, stride=1, padding=1, dilation=1, dilation_patch=1): ctx.save_for_backward(input1, input2) kH, kW = ctx.kernel_size = _pair(kernel_size) patch_size = max_displacement * 2 + 1 ctx.patch_size = patch_size dH, dW = ctx.stride = _pair(stride) padH, padW = ctx.padding = _pair(padding) dilationH, dilationW = ctx.dilation = _pair(dilation) dilation_patchH, dilation_patchW = ctx.dilation_patch = _pair( dilation_patch) output_size = CorrelationFunction._output_size(ctx, input1) output = input1.new_zeros(output_size) ext_module.correlation_forward( input1, input2, output, kH=kH, kW=kW, patchH=patch_size, patchW=patch_size, padH=padH, padW=padW, dilationH=dilationH, dilationW=dilationW, dilation_patchH=dilation_patchH, dilation_patchW=dilation_patchW, dH=dH, dW=dW) return output @staticmethod @once_differentiable def backward(ctx, grad_output): input1, input2 = ctx.saved_tensors kH, kW = ctx.kernel_size patch_size = ctx.patch_size padH, padW = ctx.padding dilationH, dilationW = ctx.dilation dilation_patchH, dilation_patchW = ctx.dilation_patch dH, dW = ctx.stride grad_input1 = torch.zeros_like(input1) grad_input2 = torch.zeros_like(input2) ext_module.correlation_backward( grad_output, input1, input2, grad_input1, grad_input2, kH=kH, kW=kW, patchH=patch_size, patchW=patch_size, padH=padH, padW=padW, dilationH=dilationH, dilationW=dilationW, dilation_patchH=dilation_patchH, dilation_patchW=dilation_patchW, dH=dH, dW=dW) return grad_input1, grad_input2, None, None, None, None, None, None @staticmethod def _output_size(ctx, input1): iH, iW = input1.size(2), input1.size(3) batch_size = input1.size(0) kH, kW = ctx.kernel_size patch_size = ctx.patch_size dH, dW = ctx.stride padH, padW = ctx.padding dilationH, dilationW = ctx.dilation dilatedKH = (kH - 1) * dilationH + 1 dilatedKW = (kW - 1) * dilationW + 1 oH = int((iH + 2 * padH - dilatedKH) / dH + 1) oW = int((iW + 2 * padW - dilatedKW) / dW + 1) output_size = (batch_size, patch_size, patch_size, oH, oW) return output_size class Correlation(nn.Module): r"""Correlation operator This correlation operator works for optical flow correlation computation. There are two batched tensors with shape :math:`(N, C, H, W)`, and the correlation output's shape is :math:`(N, max\_displacement \times 2 + 1, max\_displacement * 2 + 1, H_{out}, W_{out})` where .. math:: H_{out} = \left\lfloor\frac{H_{in} + 2 \times padding - dilation \times (kernel\_size - 1) - 1} {stride} + 1\right\rfloor .. math:: W_{out} = \left\lfloor\frac{W_{in} + 2 \times padding - dilation \times (kernel\_size - 1) - 1} {stride} + 1\right\rfloor the correlation item :math:`(N_i, dy, dx)` is formed by taking the sliding window convolution between input1 and shifted input2, .. math:: Corr(N_i, dx, dy) = \sum_{c=0}^{C-1} input1(N_i, c) \star \mathcal{S}(input2(N_i, c), dy, dx) where :math:`\star` is the valid 2d sliding window convolution operator, and :math:`\mathcal{S}` means shifting the input features (auto-complete zero marginal), and :math:`dx, dy` are shifting distance, :math:`dx, dy \in [-max\_displacement \times dilation\_patch, max\_displacement \times dilation\_patch]`. Args: kernel_size (int): The size of sliding window i.e. local neighborhood representing the center points and involved in correlation computation. Defaults to 1. max_displacement (int): The radius for computing correlation volume, but the actual working space can be dilated by dilation_patch. Defaults to 1. stride (int): The stride of the sliding blocks in the input spatial dimensions. Defaults to 1. padding (int): Zero padding added to all four sides of the input1. Defaults to 0. dilation (int): The spacing of local neighborhood that will involved in correlation. Defaults to 1. dilation_patch (int): The spacing between position need to compute correlation. Defaults to 1. """ def __init__(self, kernel_size: int = 1, max_displacement: int = 1, stride: int = 1, padding: int = 0, dilation: int = 1, dilation_patch: int = 1) -> None: super().__init__() self.kernel_size = kernel_size self.max_displacement = max_displacement self.stride = stride self.padding = padding self.dilation = dilation self.dilation_patch = dilation_patch def forward(self, input1: Tensor, input2: Tensor) -> Tensor: return CorrelationFunction.apply(input1, input2, self.kernel_size, self.max_displacement, self.stride, self.padding, self.dilation, self.dilation_patch) def __repr__(self) -> str: s = self.__class__.__name__ s += f'(kernel_size={self.kernel_size}, ' s += f'max_displacement={self.max_displacement}, ' s += f'stride={self.stride}, ' s += f'padding={self.padding}, ' s += f'dilation={self.dilation}, ' s += f'dilation_patch={self.dilation_patch})' return s ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/README.md ================================================ # Code Structure of CUDA operators This folder contains all non-python code for MMCV custom ops. Please follow the same architecture if you want to add new ops. ## Directories Tree ```folder . ├── common │ ├── box_iou_rotated_utils.hpp │ ├── parrots_cpp_helper.hpp │ ├── parrots_cuda_helper.hpp │ ├── pytorch_cpp_helper.hpp │ ├── pytorch_cuda_helper.hpp │ ├── pytorch_device_registry.hpp │   └── cuda │   ├── common_cuda_helper.hpp │   ├── parrots_cudawarpfunction.cuh │   ├── ... │   └── ops_cuda_kernel.cuh ├── onnxruntime │   ├── onnxruntime_register.h │   ├── onnxruntime_session_options_config_keys.h │   ├── ort_mmcv_utils.h │   ├── ... │   ├── onnx_ops.h │   └── cpu │ ├── onnxruntime_register.cpp │      ├── ... │      └── onnx_ops_impl.cpp ├── parrots │   ├── ... │   ├── ops.cpp │   ├── ops_parrots.cpp │   └── ops_pytorch.h ├── pytorch │   ├── info.cpp │   ├── pybind.cpp │   ├── ... │   ├── ops.cpp │   ├── cuda │   │   ├── ... │   │   └── ops_cuda.cu │   └── cpu │      ├── ... │      └── ops.cpp └── tensorrt ├── trt_cuda_helper.cuh ├── trt_plugin_helper.hpp ├── trt_plugin.hpp ├── trt_serialize.hpp ├── ... ├── trt_ops.hpp └── plugins    ├── trt_cuda_helper.cu    ├── trt_plugin.cpp    ├── ...    ├── trt_ops.cpp    └── trt_ops_kernel.cu ``` ## Components - `common`: This directory contains all tools and shared codes. - `cuda`: The cuda kernels which can be shared by all backends. **HIP** kernel is also here since they have similar syntax. - `onnxruntime`: **ONNX Runtime** support for custom ops. - `cpu`: CPU implementation of supported ops. - `parrots`: **Parrots** is a deep learning frame for model training and inference. Parrots custom ops are placed in this directory. - `pytorch`: **PyTorch** custom ops are supported by binding C++ to Python with **pybind11**. The ops implementation and binding codes are placed in this directory. - `cuda`: This directory contains cuda kernel launchers, which feed memory pointers of tensor to the cuda kernel in `common/cuda`. The launchers provide c++ interface of cuda implementation of corresponding custom ops. - `cpu`: This directory contain cpu implementations of corresponding custom ops. - `tensorrt`: **TensorRT** support for custom ops. - `plugins`: This directory contains the implementation of the supported custom ops. Some ops might also use shared cuda kernel in `common/cuda`. ## How to add new PyTorch ops? 1. (Optional) Add shared kernel in `common` to support special hardware platform. ```c++ // src/common/cuda/new_ops_cuda_kernel.cuh template __global__ void new_ops_forward_cuda_kernel(const T* input, T* output, ...) { // forward here } ``` Add cuda kernel launcher in `pytorch/cuda`. ```c++ // src/pytorch/cuda #include void NewOpsForwardCUDAKernelLauncher(Tensor input, Tensor output, ...){ // initialize at::cuda::CUDAGuard device_guard(input.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); ... AT_DISPATCH_FLOATING_TYPES_AND_HALF( input.scalar_type(), "new_ops_forward_cuda_kernel", ([&] { new_ops_forward_cuda_kernel <<>>( input.data_ptr(), output.data_ptr(),...); })); AT_CUDA_CHECK(cudaGetLastError()); } ``` 2. Register implementation for different devices. ```c++ // src/pytorch/cuda/cudabind.cpp ... Tensor new_ops_forward_cuda(Tensor input, Tensor output, ...){ // implement cuda forward here // use `NewOpsForwardCUDAKernelLauncher` here } // declare interface here. Tensor new_ops_forward_impl(Tensor input, Tensor output, ...); // register the implementation for given device (CUDA here). REGISTER_DEVICE_IMPL(new_ops_forward_impl, CUDA, new_ops_forward_cuda); ``` 3. Add ops implementation in `pytorch` directory. Select different implementations according to device type. ```c++ // src/pytorch/new_ops.cpp Tensor new_ops_forward_impl(Tensor input, Tensor output, ...){ // dispatch the implementation according to the device type of input. DISPATCH_DEVICE_IMPL(new_ops_forward_impl, input, output, ...); } ... Tensor new_ops_forward(Tensor input, Tensor output, ...){ return new_ops_forward_impl(input, output, ...); } ``` 4. Binding the implementation in `pytorch/pybind.cpp` ```c++ // src/pytorch/pybind.cpp ... Tensor new_ops_forward(Tensor input, Tensor output, ...); ... // bind with pybind11 m.def("new_ops_forward", &new_ops_forward, "new_ops_forward", py::arg("input"), py::arg("output"), ...); ... ``` 5. Build MMCV again. Enjoy new ops in python ```python from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', ['new_ops_forward']) ... ext_module.new_ops_forward(input, output, ...) ``` ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/box_iou_rotated_utils.hpp ================================================ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved // modified from // https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h #pragma once #include #include #ifdef __CUDACC__ // Designates functions callable from the host (CPU) and the device (GPU) #define HOST_DEVICE __host__ __device__ #define HOST_DEVICE_INLINE HOST_DEVICE __forceinline__ #else #include #define HOST_DEVICE #define HOST_DEVICE_INLINE HOST_DEVICE inline #endif namespace { template struct RotatedBox { T x_ctr, y_ctr, w, h, a; }; template struct Point { T x, y; HOST_DEVICE_INLINE Point(const T& px = 0, const T& py = 0) : x(px), y(py) {} HOST_DEVICE_INLINE Point operator+(const Point& p) const { return Point(x + p.x, y + p.y); } HOST_DEVICE_INLINE Point& operator+=(const Point& p) { x += p.x; y += p.y; return *this; } HOST_DEVICE_INLINE Point operator-(const Point& p) const { return Point(x - p.x, y - p.y); } HOST_DEVICE_INLINE Point operator*(const T coeff) const { return Point(x * coeff, y * coeff); } }; template HOST_DEVICE_INLINE T dot_2d(const Point& A, const Point& B) { return A.x * B.x + A.y * B.y; } template HOST_DEVICE_INLINE T cross_2d(const Point& A, const Point& B) { return A.x * B.y - B.x * A.y; } template HOST_DEVICE_INLINE void get_rotated_vertices(const RotatedBox& box, Point (&pts)[4]) { // M_PI / 180. == 0.01745329251 // double theta = box.a * 0.01745329251; // MODIFIED double theta = box.a; T cosTheta2 = (T)cos(theta) * 0.5f; T sinTheta2 = (T)sin(theta) * 0.5f; // y: top --> down; x: left --> right pts[0].x = box.x_ctr - sinTheta2 * box.h - cosTheta2 * box.w; pts[0].y = box.y_ctr + cosTheta2 * box.h - sinTheta2 * box.w; pts[1].x = box.x_ctr + sinTheta2 * box.h - cosTheta2 * box.w; pts[1].y = box.y_ctr - cosTheta2 * box.h - sinTheta2 * box.w; pts[2].x = 2 * box.x_ctr - pts[0].x; pts[2].y = 2 * box.y_ctr - pts[0].y; pts[3].x = 2 * box.x_ctr - pts[1].x; pts[3].y = 2 * box.y_ctr - pts[1].y; } template HOST_DEVICE_INLINE int get_intersection_points(const Point (&pts1)[4], const Point (&pts2)[4], Point (&intersections)[24]) { // Line vector // A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1] Point vec1[4], vec2[4]; for (int i = 0; i < 4; i++) { vec1[i] = pts1[(i + 1) % 4] - pts1[i]; vec2[i] = pts2[(i + 1) % 4] - pts2[i]; } // Line test - test all line combos for intersection int num = 0; // number of intersections for (int i = 0; i < 4; i++) { for (int j = 0; j < 4; j++) { // Solve for 2x2 Ax=b T det = cross_2d(vec2[j], vec1[i]); // This takes care of parallel lines if (fabs(det) <= 1e-14) { continue; } auto vec12 = pts2[j] - pts1[i]; T t1 = cross_2d(vec2[j], vec12) / det; T t2 = cross_2d(vec1[i], vec12) / det; if (t1 >= 0.0f && t1 <= 1.0f && t2 >= 0.0f && t2 <= 1.0f) { intersections[num++] = pts1[i] + vec1[i] * t1; } } } // Check for vertices of rect1 inside rect2 { const auto& AB = vec2[0]; const auto& DA = vec2[3]; auto ABdotAB = dot_2d(AB, AB); auto ADdotAD = dot_2d(DA, DA); for (int i = 0; i < 4; i++) { // assume ABCD is the rectangle, and P is the point to be judged // P is inside ABCD iff. P's projection on AB lies within AB // and P's projection on AD lies within AD auto AP = pts1[i] - pts2[0]; auto APdotAB = dot_2d(AP, AB); auto APdotAD = -dot_2d(AP, DA); if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) && (APdotAD <= ADdotAD)) { intersections[num++] = pts1[i]; } } } // Reverse the check - check for vertices of rect2 inside rect1 { const auto& AB = vec1[0]; const auto& DA = vec1[3]; auto ABdotAB = dot_2d(AB, AB); auto ADdotAD = dot_2d(DA, DA); for (int i = 0; i < 4; i++) { auto AP = pts2[i] - pts1[0]; auto APdotAB = dot_2d(AP, AB); auto APdotAD = -dot_2d(AP, DA); if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) && (APdotAD <= ADdotAD)) { intersections[num++] = pts2[i]; } } } return num; } template HOST_DEVICE_INLINE int convex_hull_graham(const Point (&p)[24], const int& num_in, Point (&q)[24], bool shift_to_zero = false) { assert(num_in >= 2); // Step 1: // Find point with minimum y // if more than 1 points have the same minimum y, // pick the one with the minimum x. int t = 0; for (int i = 1; i < num_in; i++) { if (p[i].y < p[t].y || (p[i].y == p[t].y && p[i].x < p[t].x)) { t = i; } } auto& start = p[t]; // starting point // Step 2: // Subtract starting point from every points (for sorting in the next step) for (int i = 0; i < num_in; i++) { q[i] = p[i] - start; } // Swap the starting point to position 0 auto tmp = q[0]; q[0] = q[t]; q[t] = tmp; // Step 3: // Sort point 1 ~ num_in according to their relative cross-product values // (essentially sorting according to angles) // If the angles are the same, sort according to their distance to origin T dist[24]; for (int i = 0; i < num_in; i++) { dist[i] = dot_2d(q[i], q[i]); } #ifdef __CUDACC__ // CUDA version // In the future, we can potentially use thrust // for sorting here to improve speed (though not guaranteed) for (int i = 1; i < num_in - 1; i++) { for (int j = i + 1; j < num_in; j++) { T crossProduct = cross_2d(q[i], q[j]); if ((crossProduct < -1e-6) || (fabs(crossProduct) < 1e-6 && dist[i] > dist[j])) { auto q_tmp = q[i]; q[i] = q[j]; q[j] = q_tmp; auto dist_tmp = dist[i]; dist[i] = dist[j]; dist[j] = dist_tmp; } } } #else // CPU version std::sort(q + 1, q + num_in, [](const Point& A, const Point& B) -> bool { T temp = cross_2d(A, B); if (fabs(temp) < 1e-6) { return dot_2d(A, A) < dot_2d(B, B); } else { return temp > 0; } }); // compute distance to origin after sort, since the points are now different. for (int i = 0; i < num_in; i++) { dist[i] = dot_2d(q[i], q[i]); } #endif // Step 4: // Make sure there are at least 2 points (that don't overlap with each other) // in the stack int k; // index of the non-overlapped second point for (k = 1; k < num_in; k++) { if (dist[k] > 1e-8) { break; } } if (k == num_in) { // We reach the end, which means the convex hull is just one point q[0] = p[t]; return 1; } q[1] = q[k]; int m = 2; // 2 points in the stack // Step 5: // Finally we can start the scanning process. // When a non-convex relationship between the 3 points is found // (either concave shape or duplicated points), // we pop the previous point from the stack // until the 3-point relationship is convex again, or // until the stack only contains two points for (int i = k + 1; i < num_in; i++) { while (m > 1 && cross_2d(q[i] - q[m - 2], q[m - 1] - q[m - 2]) >= 0) { m--; } q[m++] = q[i]; } // Step 6 (Optional): // In general sense we need the original coordinates, so we // need to shift the points back (reverting Step 2) // But if we're only interested in getting the area/perimeter of the shape // We can simply return. if (!shift_to_zero) { for (int i = 0; i < m; i++) { q[i] += start; } } return m; } template HOST_DEVICE_INLINE T polygon_area(const Point (&q)[24], const int& m) { if (m <= 2) { return 0; } T area = 0; for (int i = 1; i < m - 1; i++) { area += fabs(cross_2d(q[i] - q[0], q[i + 1] - q[0])); } return area / 2.0; } template HOST_DEVICE_INLINE T rotated_boxes_intersection(const RotatedBox& box1, const RotatedBox& box2) { // There are up to 4 x 4 + 4 + 4 = 24 intersections (including dups) returned // from rotated_rect_intersection_pts Point intersectPts[24], orderedPts[24]; Point pts1[4]; Point pts2[4]; get_rotated_vertices(box1, pts1); get_rotated_vertices(box2, pts2); int num = get_intersection_points(pts1, pts2, intersectPts); if (num <= 2) { return 0.0; } // Convex Hull to order the intersection points in clockwise order and find // the contour area. int num_convex = convex_hull_graham(intersectPts, num, orderedPts, true); return polygon_area(orderedPts, num_convex); } } // namespace template HOST_DEVICE_INLINE T single_box_iou_rotated(T const* const box1_raw, T const* const box2_raw, const int mode_flag) { // shift center to the middle point to achieve higher precision in result RotatedBox box1, box2; auto center_shift_x = (box1_raw[0] + box2_raw[0]) / 2.0; auto center_shift_y = (box1_raw[1] + box2_raw[1]) / 2.0; box1.x_ctr = box1_raw[0] - center_shift_x; box1.y_ctr = box1_raw[1] - center_shift_y; box1.w = box1_raw[2]; box1.h = box1_raw[3]; box1.a = box1_raw[4]; box2.x_ctr = box2_raw[0] - center_shift_x; box2.y_ctr = box2_raw[1] - center_shift_y; box2.w = box2_raw[2]; box2.h = box2_raw[3]; box2.a = box2_raw[4]; const T area1 = box1.w * box1.h; const T area2 = box2.w * box2.h; if (area1 < 1e-14 || area2 < 1e-14) { return 0.f; } const T intersection = rotated_boxes_intersection(box1, box2); T baseS = 1.0; if (mode_flag == 0) { baseS = (area1 + area2 - intersection); } else if (mode_flag == 1) { baseS = area1; } const T iou = intersection / baseS; return iou; } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/active_rotated_filter_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved. // Modified from // https://github.com/csuhan/s2anet/blob/master/mmdet/ops/orn/src/cuda/ActiveRotatingFilter_cuda.cu #ifndef ACTIVE_ROTATED_FILTER_CUDA_KERNEL_CUH #define ACTIVE_ROTATED_FILTER_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif template __global__ void active_rotated_filter_forward_cuda_kernel( const int nthreads, const scalar_t* weight_data, const int* indices_data, const int num_input_planes, const int num_output_planes, const int num_orientations, const int num_rotations, const int nEntry, scalar_t* output_data) { CUDA_1D_KERNEL_LOOP(index, nthreads) { int l = index % nEntry; int j = (index / nEntry) % num_input_planes; int i = index / nEntry / num_input_planes; int k; scalar_t val = *(weight_data + index); for (k = 0; k < num_rotations; k++) { int idx = (int)(*(indices_data + l * num_rotations + k)) - 1; scalar_t* target = output_data + i * (num_rotations * num_input_planes * nEntry) + k * (num_input_planes * nEntry) + j * (nEntry) + idx; *target = val; } } } template __global__ void active_rotated_filter_backward_cuda_kernel( const int nthreads, const scalar_t* gradWeight_data, const int* indices_data, const int num_input_planes, const int num_output_planes, const int num_orientations, const int num_rotations, const int nEntry, scalar_t* weight_data) { CUDA_1D_KERNEL_LOOP(index, nthreads) { int l = index % nEntry; int j = (index / nEntry) % num_input_planes; int i = index / nEntry / num_input_planes; int k; scalar_t* val = weight_data + index; *val = 0; scalar_t tmp = 0; for (k = 0; k < num_rotations; k++) { int idx = (int)(*(indices_data + l * num_rotations + k)) - 1; scalar_t target = *(gradWeight_data + i * (num_rotations * num_input_planes * nEntry) + k * (num_input_planes * nEntry) + j * (nEntry) + idx); tmp = tmp + target; } *val = tmp; } } #endif // ACTIVE_ROTATED_FILTER_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/assign_score_withk_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef ASSIGN_SCORE_WITHK_CUDA_KERNEL_CUH #define ASSIGN_SCORE_WITHK_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif // input: points(B,N0,M,O), centers(B,N0,M,O), scores(B,N1,K,M), knn_idx(B,N1,K) // output: fout(B,O,N) // algo: fout(b,i,k,j) = s(b,i,k,m)*p(b,c(i),k,m,j) = s(b,i,k,m)*p(b,i(k),m,j) // i(k) = idx(b,i,k) // sum: fout(b,i,j) = fout(b,i,j) + s(b,i,k,m)*p(b,i,k,m,j) // avg: fout(b,i,j) = sum(fout(b,i,k,j)) / k // max: fout(b,i,j) = max(fout(b,i,k,j), sum(s(b,i,k,m)*p(b,i,k,m,j))) template __global__ void assign_score_withk_forward_cuda_kernel( const int B, const int N0, const int N1, const int M, const int K, const int O, const int aggregate, const T* points, const T* centers, const T* scores, const int64_t* knn_idx, T* output) { // ----- parallel loop for B, N1, K and O --------- CUDA_1D_KERNEL_LOOP(i, B * O * N1 * K) { // ------- loop for M ---------- const int b = (int)(i / (O * N1 * K)); const int o = (int)(i % (O * N1 * K) / (N1 * K)); const int n = (int)(i % (N1 * K) / K); const int k = (int)(i % K); const int cn = (int)knn_idx[b * K * N1 + n * K + 0]; // The first neighbor is the center point const int kn = (int)knn_idx[b * K * N1 + n * K + k]; if (kn >= N0 || kn < 0) { // if index overflows, it is out of the neighborhood range return; } assert(b < B); assert(kn < N0); assert(cn < N0); assert(o < O); assert(n < N1); const int out_idx = b * N1 * O * K + o * N1 * K + n * K + k; T val = output[out_idx]; for (int m = 0; m < M; m++) { val += points[b * N0 * M * O + kn * M * O + m * O + o] * scores[b * N1 * K * M + n * K * M + k * M + m] - centers[b * N0 * M * O + cn * M * O + m * O + o] * scores[b * N1 * K * M + n * K * M + k * M + m]; } output[out_idx] = val; } } template __global__ void assign_score_withk_points_backward_cuda_kernel( const int B, const int N0, const int N, const int M, const int K, const int O, const int aggregate, const T* grad_out, const T* scores, const int64_t* knn_idx, T* grad_points, T* grad_centers) { // ----- parallel loop for B, M, O --------- CUDA_1D_KERNEL_LOOP(i, B * M * O) { int b = (int)(i / (M * O)); int m = (int)(i % (M * O) / O); int o = (int)(i % O); // ----- loop for N,K --------- for (int n = 0; n < N; n++) { for (int k = 0; k < K; k++) { int kn = knn_idx[b * N * K + n * K + k]; int cn = knn_idx[b * N * K + n * K + 0]; if (kn >= N0 || kn < 0) { // if index overflows, it is out of the // neighborhood range continue; } atomicAdd(grad_points + b * N0 * M * O + kn * M * O + m * O + o, scores[b * N * K * M + n * K * M + k * M + m] * grad_out[b * O * N * K + o * N * K + n * K + k]); atomicAdd(grad_centers + b * N0 * M * O + cn * M * O + m * O + o, -scores[b * N * K * M + n * K * M + k * M + m] * grad_out[b * O * N * K + o * N * K + n * K + k]); } } } } template __global__ void assign_score_withk_scores_backward_cuda_kernel( const int B, const int N0, const int N, const int M, const int K, const int O, const int aggregate, const T* grad_out, const T* points, const T* centers, const int64_t* knn_idx, T* grad_scores) { // ----- parallel loop for B, N, K, M --------- CUDA_1D_KERNEL_LOOP(i, B * N * K * M) { const int b = (int)(i / (N * M * K)); const int n = (int)(i % (N * M * K) / M / K); const int k = (int)(i % (M * K) / M); const int m = (int)(i % M); const int cn = knn_idx[b * N * K + n * K + 0]; const int kn = knn_idx[b * N * K + n * K + k]; if (kn >= N0 || kn < 0) { // if index overflows, it is out of the neighborhood range return; } // -------------- loop for O ------------------------ const int out_idx = b * N * K * M + n * K * M + k * M + m; T val = grad_scores[out_idx]; for (int o = 0; o < O; o++) { val += (points[b * N0 * M * O + kn * M * O + m * O + o] - centers[b * N0 * M * O + cn * M * O + m * O + o]) * grad_out[b * O * N * K + o * N * K + n * K + k]; } grad_scores[out_idx] = val; } } #endif // ASSIGN_SCORE_WITHK_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/ball_query_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved // Modified from // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/ball_query_gpu.cu #ifndef BALL_QUERY_CUDA_KERNEL_CUH #define BALL_QUERY_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif template __global__ void ball_query_forward_cuda_kernel(int b, int n, int m, float min_radius, float max_radius, int nsample, const T* new_xyz, const T* xyz, int* idx) { // new_xyz: (B, M, 3) // xyz: (B, N, 3) // output: // idx: (B, M, nsample) int bs_idx = blockIdx.y; CUDA_1D_KERNEL_LOOP(pt_idx, m) { if (bs_idx >= b) return; new_xyz += bs_idx * m * 3 + pt_idx * 3; xyz += bs_idx * n * 3; idx += bs_idx * m * nsample + pt_idx * nsample; float max_radius2 = max_radius * max_radius; float min_radius2 = min_radius * min_radius; T new_x = new_xyz[0]; T new_y = new_xyz[1]; T new_z = new_xyz[2]; int cnt = 0; for (int k = 0; k < n; ++k) { T x = xyz[k * 3 + 0]; T y = xyz[k * 3 + 1]; T z = xyz[k * 3 + 2]; T d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); if (d2 == 0 || (d2 >= min_radius2 && d2 < max_radius2)) { if (cnt == 0) { for (int l = 0; l < nsample; ++l) { idx[l] = k; } } idx[cnt] = k; ++cnt; if (cnt >= nsample) break; } } } } #endif // BALL_QUERY_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/bbox_overlaps_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef BBOX_OVERLAPS_CUDA_KERNEL_CUH #define BBOX_OVERLAPS_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif template __global__ void bbox_overlaps_cuda_kernel(const T* bbox1, const T* bbox2, T* ious, const int num_bbox1, const int num_bbox2, const int mode, const bool aligned, const int offset) { if (aligned) { CUDA_1D_KERNEL_LOOP(index, num_bbox1) { int b1 = index; int b2 = index; int base1 = b1 * 4; T b1_x1 = bbox1[base1]; T b1_y1 = bbox1[base1 + 1]; T b1_x2 = bbox1[base1 + 2]; T b1_y2 = bbox1[base1 + 3]; T b1_area = (b1_x2 - b1_x1 + offset) * (b1_y2 - b1_y1 + offset); int base2 = b2 * 4; T b2_x1 = bbox2[base2]; T b2_y1 = bbox2[base2 + 1]; T b2_x2 = bbox2[base2 + 2]; T b2_y2 = bbox2[base2 + 3]; T b2_area = (b2_x2 - b2_x1 + offset) * (b2_y2 - b2_y1 + offset); T left = fmaxf(b1_x1, b2_x1), right = fminf(b1_x2, b2_x2); T top = fmaxf(b1_y1, b2_y1), bottom = fminf(b1_y2, b2_y2); T width = fmaxf(right - left + offset, 0.f); T height = fmaxf(bottom - top + offset, 0.f); T interS = width * height; T baseS = 1.0; if (mode == 0) { baseS = fmaxf(b1_area + b2_area - interS, T(offset)); } else if (mode == 1) { baseS = fmaxf(b1_area, T(offset)); } ious[index] = interS / baseS; } } else { CUDA_1D_KERNEL_LOOP(index, num_bbox1 * num_bbox2) { int b1 = index / num_bbox2; int b2 = index % num_bbox2; int base1 = b1 * 4; T b1_x1 = bbox1[base1]; T b1_y1 = bbox1[base1 + 1]; T b1_x2 = bbox1[base1 + 2]; T b1_y2 = bbox1[base1 + 3]; T b1_area = (b1_x2 - b1_x1 + offset) * (b1_y2 - b1_y1 + offset); int base2 = b2 * 4; T b2_x1 = bbox2[base2]; T b2_y1 = bbox2[base2 + 1]; T b2_x2 = bbox2[base2 + 2]; T b2_y2 = bbox2[base2 + 3]; T b2_area = (b2_x2 - b2_x1 + offset) * (b2_y2 - b2_y1 + offset); T left = fmaxf(b1_x1, b2_x1), right = fminf(b1_x2, b2_x2); T top = fmaxf(b1_y1, b2_y1), bottom = fminf(b1_y2, b2_y2); T width = fmaxf(right - left + offset, 0.f); T height = fmaxf(bottom - top + offset, 0.f); T interS = width * height; T baseS = 1.0; if (mode == 0) { baseS = fmaxf(b1_area + b2_area - interS, T(offset)); } else if (mode == 1) { baseS = fmaxf(b1_area, T(offset)); } ious[index] = interS / baseS; } } } #endif // BBOX_OVERLAPS_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/border_align_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved // modified from // https://github.com/Megvii-BaseDetection/cvpods/blob/master/cvpods/layers/csrc/border_align/border_align_kernel.cu. // the main difference: (1) use `argmax_idx` for fast computing of gradient // during the backward. (2) `wh` is directly computed by `boxes`, rather than // passing it as argument to forward or backward functions. #ifndef BORDER_ALIGN_CUDA_KERNEL_CUH #define BORDER_ALIGN_CUDA_KERNEL_CUH #include #ifdef MMCV_WITH_TRT #include "common_cuda_helper.hpp" #else // MMCV_WITH_TRT #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else // MMCV_USE_PARROTS #include "pytorch_cuda_helper.hpp" #endif // MMCV_USE_PARROTS #endif // MMCV_WITH_TRT enum BorderMode { Top = 0, Left = 1, Bottom = 2, Right = 3 }; /*** Forward ***/ template __global__ void border_align_forward_cuda_kernel( const int nthreads, const T* input, const T* boxes, T* output, int* argmax_idx, const int channels, const int box_size, const int height, const int width, const int pool_size) { CUDA_1D_KERNEL_LOOP(index, nthreads) { // (batch_idx, c_idx, box_idx) is an element paralleled for computing // output, and `extreme_idx` is in range [0,3] int batch_idx, c_idx, box_idx, extreme_idx, maxidx, *offset_argmax_idx; const T *offset_box, *offset_input, *offset_box_x; T *offset_output, box_width, box_height, stride, x_stride, y_stride, x, y, val, maxval; extreme_idx = threadIdx.y; // shape (N, C, box_size, 4) for output batch_idx = index / channels / box_size; // shape (N, box_size, 4) for boxes box_idx = index % box_size + batch_idx * box_size; c_idx = (index / box_size) % channels; offset_box = boxes + box_idx * 4; box_width = *(offset_box + 2) - *offset_box; box_height = *(offset_box + 3) - *(offset_box + 1); offset_output = output + index * 4 + extreme_idx; offset_argmax_idx = argmax_idx + index * 4 + extreme_idx; // shape (N, 4C, h, w) for input. // [0,C) for top feature, [C,2C) for left feature, // [2C,3C) for bottom feature, [3C,4C) for right feature offset_input = input + (batch_idx * channels * 4 + extreme_idx * channels + c_idx) * height * width; // extreme_idx in [0,1] -> offset_box_x indexed at x1 // extreme_idx in [2,3] -> offset_box_x indexed at x2 offset_box_x = offset_box + extreme_idx / 2 * 2; // (x1,y1) or (x2,y2) for (x,y) x = *offset_box_x; y = *(offset_box_x + 1); switch (extreme_idx) { // top case BorderMode::Top: stride = box_width / pool_size; x_stride = stride; y_stride = 0; break; // left case BorderMode::Left: stride = box_height / pool_size; x_stride = 0; y_stride = stride; break; // bottom case BorderMode::Bottom: stride = box_width / pool_size; x_stride = -stride; y_stride = 0; break; // right case BorderMode::Right: stride = box_height / pool_size; x_stride = 0; y_stride = -stride; break; } // initialize maxval and maxidx with the start position (e.g. (x1,y1) or // (x2,y2)) maxval = bilinear_interpolate(offset_input, height, width, y, x, index); maxidx = 0; // do max_pool along the border for (int i = 1; i <= pool_size; i++) { x += x_stride; y += y_stride; val = bilinear_interpolate(offset_input, height, width, y, x, index); if (val > maxval) { maxval = val; maxidx = i; } } // update output and argmax_idx *offset_output = maxval; *offset_argmax_idx = maxidx; } } /*** Backward ***/ template __global__ void border_align_backward_cuda_kernel( const int nthreads, const T* grad_output, const T* boxes, const int* argmax_idx, T* grad_input, const int channels, const int box_size, const int height, const int width, const int pool_size) { CUDA_1D_KERNEL_LOOP(index, nthreads) { // (batch_idx, c_idx, box_idx) is an element paralleled for computing // output, and `extreme_idx` is in range [0,3] int batch_idx, c_idx, box_idx, extreme_idx; const int* offset_argmax_idx; const T *offset_grad_output, *offset_box, *offset_box_x; T *offset_grad_input, box_width, box_height, stride, x_stride, y_stride, x, y; extreme_idx = threadIdx.y; batch_idx = index / channels / box_size; box_idx = index % box_size + batch_idx * box_size; c_idx = (index / box_size) % channels; offset_box = boxes + box_idx * 4; box_width = *(offset_box + 2) - *offset_box; box_height = *(offset_box + 3) - *(offset_box + 1); offset_grad_output = grad_output + index * 4 + extreme_idx; offset_argmax_idx = argmax_idx + index * 4 + extreme_idx; // [0,C) for top feature grad, [C,2C) for left feature grad, // [2C,3C) for bottom feature grad, [3C,4C) for right feature grad offset_grad_input = grad_input + (batch_idx * channels * 4 + extreme_idx * channels + c_idx) * height * width; // extreme_idx in [0,1] -> offset_box_x indexed at x1 // extreme_idx in [2,3] -> offset_box_x indexed at x2 offset_box_x = offset_box + extreme_idx / 2 * 2; switch (extreme_idx) { // top case BorderMode::Top: stride = box_width / pool_size; x_stride = stride; y_stride = 0; break; // left case BorderMode::Left: stride = box_height / pool_size; x_stride = 0; y_stride = stride; break; // bottom case BorderMode::Bottom: stride = box_width / pool_size; x_stride = -stride; y_stride = 0; break; // right case BorderMode::Right: stride = box_height / pool_size; x_stride = 0; y_stride = -stride; break; } // get position (x,y) which has maximum value during forward x = *offset_box_x; y = *(offset_box_x + 1); x += x_stride * (T)(*offset_argmax_idx); y += y_stride * (T)(*offset_argmax_idx); T w1, w2, w3, w4; int x_low, x_high, y_low, y_high; bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high, index); // update grad_output atomicAdd(offset_grad_input + y_low * width + x_low, *offset_grad_output * w1); atomicAdd(offset_grad_input + y_low * width + x_high, *offset_grad_output * w2); atomicAdd(offset_grad_input + y_high * width + x_low, *offset_grad_output * w3); atomicAdd(offset_grad_input + y_high * width + x_high, *offset_grad_output * w4); } } #endif // BORDER_ALIGN_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/box_iou_rotated_cuda.cuh ================================================ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved // modified from // https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu #ifndef BOX_IOU_ROTATED_CUDA_CUH #define BOX_IOU_ROTATED_CUDA_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif #include "box_iou_rotated_utils.hpp" // 2D block with 32 * 16 = 512 threads per block const int BLOCK_DIM_X = 32; const int BLOCK_DIM_Y = 16; inline int divideUP(const int x, const int y) { return (((x) + (y)-1) / (y)); } template __global__ void box_iou_rotated_cuda_kernel( const int n_boxes1, const int n_boxes2, const T* dev_boxes1, const T* dev_boxes2, T* dev_ious, const int mode_flag, const bool aligned) { if (aligned) { CUDA_1D_KERNEL_LOOP(index, n_boxes1) { int b1 = index; int b2 = index; int base1 = b1 * 5; float block_boxes1[5]; float block_boxes2[5]; block_boxes1[0] = dev_boxes1[base1 + 0]; block_boxes1[1] = dev_boxes1[base1 + 1]; block_boxes1[2] = dev_boxes1[base1 + 2]; block_boxes1[3] = dev_boxes1[base1 + 3]; block_boxes1[4] = dev_boxes1[base1 + 4]; int base2 = b2 * 5; block_boxes2[0] = dev_boxes2[base2 + 0]; block_boxes2[1] = dev_boxes2[base2 + 1]; block_boxes2[2] = dev_boxes2[base2 + 2]; block_boxes2[3] = dev_boxes2[base2 + 3]; block_boxes2[4] = dev_boxes2[base2 + 4]; dev_ious[index] = single_box_iou_rotated(block_boxes1, block_boxes2, mode_flag); } } else { CUDA_1D_KERNEL_LOOP(index, n_boxes1 * n_boxes2) { int b1 = index / n_boxes2; int b2 = index % n_boxes2; int base1 = b1 * 5; float block_boxes1[5]; float block_boxes2[5]; block_boxes1[0] = dev_boxes1[base1 + 0]; block_boxes1[1] = dev_boxes1[base1 + 1]; block_boxes1[2] = dev_boxes1[base1 + 2]; block_boxes1[3] = dev_boxes1[base1 + 3]; block_boxes1[4] = dev_boxes1[base1 + 4]; int base2 = b2 * 5; block_boxes2[0] = dev_boxes2[base2 + 0]; block_boxes2[1] = dev_boxes2[base2 + 1]; block_boxes2[2] = dev_boxes2[base2 + 2]; block_boxes2[3] = dev_boxes2[base2 + 3]; block_boxes2[4] = dev_boxes2[base2 + 4]; dev_ious[index] = single_box_iou_rotated(block_boxes1, block_boxes2, mode_flag); } } } #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/carafe_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef CARAFE_CUDA_KERNEL_CUH #define CARAFE_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif #ifdef HIP_DIFF #define WARP_SIZE 64 #else #define WARP_SIZE 32 #endif #define THREADS_PER_PIXEL 32 #define MAX_SHARED_MEMORY 49152 #define MAX_SHARED_SCALAR_T 6144 // 49152 / 8 = 6144 #define MAXIMIZE_KERNEL_SIZE true #define kTileDim 32 #define kBlockRows 8 #define FULL_MASK 0xffffffff inline int divideUP(const int x, const int y) { return (((x) + (y)-1) / (y)); } __device__ inline int Loc2Index(const int n, const int c, const int h, const int w, const int channel_num, const int height, const int width) { int index = w + (h + (c + n * channel_num) * height) * width; return index; } #ifndef HIP_DIFF /* TODO: move this to a common place */ template __device__ inline scalar_t min(scalar_t a, scalar_t b) { return a < b ? a : b; } template __device__ inline scalar_t max(scalar_t a, scalar_t b) { return a > b ? a : b; } #endif template __device__ __forceinline__ scalar_t warpReduceSum(scalar_t val) { for (int offset = WARP_SIZE / 2; offset > 0; offset /= 2) #ifdef HIP_DIFF val += __shfl_down(val, offset); #else val += __shfl_down_sync(FULL_MASK, val, offset); #endif return val; } template <> __device__ __forceinline__ phalf warpReduceSum(phalf val) { for (int offset = WARP_SIZE / 2; offset > 0; offset /= 2) #ifdef HIP_DIFF __PHALF(val) += __shfl_down(FULL_MASK, val, offset); #else __PHALF(val) += __shfl_down_sync(FULL_MASK, static_cast<__half>(__PHALF(val)), offset); #endif return val; } // Splits the original matrix into submatrices with size 32 * 32. // Each block transposes one submatrix by loading it into shared memory. // Reference https://devblogs.nvidia.com/efficient-matrix-transpose-cuda-cc/ template __global__ void BatchTranspose2DCUDAKernel(const int N, const int H, const int W, const int dh, const int dw, const scalar_t *__restrict__ X, scalar_t *__restrict__ Y) { __shared__ scalar_t tile[kTileDim][kTileDim + 1]; const int n = blockIdx.x / (dh * dw); const int k = blockIdx.x % (dh * dw); const int r = k / dw; const int c = k % dw; const int offset = n * H * W; int x = c * kTileDim + threadIdx.x; int y = r * kTileDim + threadIdx.y; if (x < W) { for (int i = 0; threadIdx.y + i < kTileDim && y + i < H; i += kBlockRows) { tile[threadIdx.y + i][threadIdx.x] = X[offset + (y + i) * W + x]; } } __syncthreads(); x = r * kTileDim + threadIdx.x; y = c * kTileDim + threadIdx.y; if (x < H) { for (int i = 0; threadIdx.y + i < kTileDim && y + i < W; i += kBlockRows) { Y[offset + (y + i) * H + x] = tile[threadIdx.x][threadIdx.y + i]; } } } template __global__ void CARAFEForward( const int num_kernels, const scalar_t *__restrict__ bottom_data, const scalar_t *__restrict__ bottom_masks, const int kernel_size, const int group_size, const int scale_factor, const int channels, const int down_height, const int down_width, const int height, const int width, const int mask_channels, scalar_t *__restrict__ top_data) { #if MAXIMIZE_KERNEL_SIZE __shared__ float shared_mask[MAX_SHARED_SCALAR_T * 2]; #else __shared__ scalar_t shared_mask[MAX_SHARED_SCALAR_T]; #endif int index = threadIdx.x + blockIdx.x * blockDim.x; if (index > num_kernels - 1) { return; } const int pixel_id = threadIdx.x / THREADS_PER_PIXEL; const int split_id = threadIdx.x % THREADS_PER_PIXEL; index = index / THREADS_PER_PIXEL; const int pw = index % width; const int ph = (index / width) % height; const int n = index / width / height; const int down_pw = pw / scale_factor; const int down_ph = ph / scale_factor; const int start_w = down_pw - (kernel_size - 1) / 2; const int end_w = down_pw + (kernel_size - 1) / 2 + 1; const int start_h = down_ph - (kernel_size - 1) / 2; const int end_h = down_ph + (kernel_size - 1) / 2 + 1; for (int c = split_id; c < mask_channels; c += THREADS_PER_PIXEL) { int mask_index = Loc2Index(n, ph, pw, c, height, width, mask_channels); shared_mask[c * WARP_SIZE + pixel_id] = bottom_masks[mask_index]; } __syncthreads(); const int channels_per_group = ceilf(channels / (float)group_size); #pragma unroll for (int c = split_id; c < channels; c += THREADS_PER_PIXEL) { int mask_group = c / channels_per_group; scalar_t output_val = 0; #pragma unroll for (int iy = start_h; iy < end_h; iy++) { #pragma unroll for (int ix = start_w; ix < end_w; ix++) { if (iy < 0 || iy > down_height - 1 || ix < 0 || ix > down_width - 1) { continue; } int mask_iy = iy - down_ph + (kernel_size - 1) / 2; int mask_ix = ix - down_pw + (kernel_size - 1) / 2; int mask_c = (mask_group * kernel_size + mask_iy) * kernel_size + mask_ix; int feat_index = Loc2Index(n, iy, ix, c, down_height, down_width, channels); output_val += bottom_data[feat_index] * shared_mask[mask_c * WARP_SIZE + pixel_id]; } } int top_index = Loc2Index(n, ph, pw, c, height, width, channels); top_data[top_index] = output_val; } } template __global__ void CARAFEBackward_Feature( const int num_kernels, const scalar_t *__restrict__ top_diff, const scalar_t *__restrict__ bottom_masks, const int kernel_size, const int group_size, const int scale_factor, const int channels, const int down_height, const int down_width, const int height, const int width, const int mask_channels, scalar_t *__restrict__ bottom_diff) { #if MAXIMIZE_KERNEL_SIZE __shared__ float shared_mask[MAX_SHARED_SCALAR_T * 2]; #else __shared__ scalar_t shared_mask[MAX_SHARED_SCALAR_T]; #endif int index = threadIdx.x + blockIdx.x * blockDim.x; if (index > num_kernels - 1) { return; } const int pixel_id = threadIdx.x / THREADS_PER_PIXEL; const int split_id = threadIdx.x % THREADS_PER_PIXEL; // (n, c, ph, pw) is an element in the bottom_data index = index / THREADS_PER_PIXEL; const int pw = index % width; const int ph = (index / width) % height; const int n = index / width / height; const int start_w = pw - (kernel_size - 1) * scale_factor / 2; const int end_w = pw + (kernel_size - 1) * scale_factor / 2 + 1; const int start_h = ph - (kernel_size - 1) * scale_factor / 2; const int end_h = ph + (kernel_size - 1) * scale_factor / 2 + 1; for (int c = split_id; c < mask_channels; c += THREADS_PER_PIXEL) { const int mask_w = (c % kernel_size) * scale_factor; const int mask_h = (c / kernel_size % kernel_size) * scale_factor; const int mask_x = start_w + mask_w; const int mask_y = start_h + mask_h; if (mask_y < 0 || mask_y > height - 1 || mask_x < 0 || mask_x > width - 1) { shared_mask[c * WARP_SIZE + pixel_id] = 0; continue; } const int mask_group = c / (kernel_size * kernel_size); const int mask_c = (2 * mask_group + 1) * kernel_size * kernel_size - c - 1; int mask_index = Loc2Index(n, mask_c, mask_y, mask_x, mask_channels, height, width); shared_mask[c * WARP_SIZE + pixel_id] = bottom_masks[mask_index]; } __syncthreads(); const int channels_per_group = ceilf(channels / (float)group_size); #pragma unroll for (int c = split_id; c < channels; c += THREADS_PER_PIXEL) { int mask_group = c / channels_per_group; int top_index = Loc2Index(n, ph, pw, c, height, width, channels); scalar_t output_val = 0; #pragma unroll for (int iy = start_h; iy < end_h; iy += scale_factor) { #pragma unroll for (int ix = start_w; ix < end_w; ix += scale_factor) { if (iy < 0 || iy > height - 1 || ix < 0 || ix > width - 1) { continue; } int mask_iy = (iy - ph + (kernel_size - 1) * scale_factor / 2) / scale_factor; int mask_ix = (ix - pw + (kernel_size - 1) * scale_factor / 2) / scale_factor; int mask_c = (mask_group * kernel_size + mask_iy) * kernel_size + mask_ix; int feat_index = Loc2Index(n, iy, ix, c, height, width, channels); output_val += shared_mask[mask_c * WARP_SIZE + pixel_id] * top_diff[feat_index]; } } bottom_diff[top_index] = output_val; } } template __global__ void FeatureSum(const int num_kernels, const scalar_t *__restrict__ input_data, const int scale_factor, const int channels, const int height, const int width, scalar_t *__restrict__ output_data) { int index = threadIdx.x + blockIdx.x * blockDim.x; if (index > num_kernels - 1) { return; } const int split_id = threadIdx.x % THREADS_PER_PIXEL; index = index / THREADS_PER_PIXEL; const int pw = index % width; const int ph = (index / width) % height; const int n = index / width / height; for (int c = split_id; c < channels; c += THREADS_PER_PIXEL) { scalar_t output_val = 0; for (int iy = ph * scale_factor; iy < (ph + 1) * scale_factor; iy++) { for (int ix = pw * scale_factor; ix < (pw + 1) * scale_factor; ix++) { int input_id = Loc2Index(n, iy, ix, c, height * scale_factor, width * scale_factor, channels); output_val += input_data[input_id]; } } const int output_id = Loc2Index(n, ph, pw, c, height, width, channels); output_data[output_id] = output_val; } } template __global__ void CARAFEBackward_Mask(const int num_kernels, const scalar_t *__restrict__ top_diff, const scalar_t *__restrict__ bottom_data, const int kernel_size, const int group_size, const int scale_factor, const int channels, const int down_height, const int down_width, const int height, const int width, const int mask_channels, scalar_t *__restrict__ mask_diff) { int index = threadIdx.x + blockIdx.x * blockDim.x; if (index > num_kernels - 1) { return; } const int lane_id = index % WARP_SIZE; index = index / WARP_SIZE; const int mask_c = index % mask_channels; // (n, c, ph, pw) is an element in the bottom_data index = index / mask_channels; const int pw = index % width; const int ph = (index / width) % height; const int n = index / width / height; const int down_pw = pw / scale_factor; const int down_ph = ph / scale_factor; const int mask_group = mask_c / (kernel_size * kernel_size); const int mask_loc = mask_c % (kernel_size * kernel_size); const int offset_x = mask_loc % kernel_size - (kernel_size - 1) / 2; const int offset_y = mask_loc / kernel_size % kernel_size - (kernel_size - 1) / 2; const int down_x = down_pw + offset_x; const int down_y = down_ph + offset_y; scalar_t output_val = 0; if (down_y >= 0 && down_y <= down_height - 1 && down_x >= 0 && down_x <= down_width - 1) { const int channels_per_mask = ceilf(channels / (float)group_size); const int start = channels_per_mask * mask_group; const int end = min(channels_per_mask * (mask_group + 1), channels); for (int c = start + lane_id; c < end; c += WARP_SIZE) { int bottom_id = Loc2Index(n, down_y, down_x, c, down_height, down_width, channels); int top_id = Loc2Index(n, ph, pw, c, height, width, channels); output_val += top_diff[top_id] * bottom_data[bottom_id]; } } #ifdef HIP_DIFF __syncthreads(); #else __syncwarp(); #endif output_val = warpReduceSum(output_val); if (lane_id == 0) { const int mask_id = Loc2Index(n, ph, pw, mask_c, height, width, mask_channels); mask_diff[mask_id] = output_val; } } #endif // CARAFE_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/carafe_naive_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef CARAFE_NAIVE_CUDA_KERNEL_CUH #define CARAFE_NAIVE_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif __device__ inline int Loc2Index(const int n, const int c, const int h, const int w, const int channel_num, const int height, const int width) { int index = w + (h + (c + n * channel_num) * height) * width; return index; } template __global__ void carafe_naive_forward_cuda_kernel( const int nthreads, const scalar_t *bottom_data, const scalar_t *bottom_masks, scalar_t *top_data, const int kernel_size, const int group_size, const int scale_factor, const int channels, const int height, const int width) { CUDA_1D_KERNEL_LOOP(index, nthreads) { // (n, c, ph, pw) is an element in the bottom_data int pw = index % width; int ph = (index / width) % height; int c = (index / width / height) % channels; int n = index / width / height / channels; int mask_channels = kernel_size * kernel_size * group_size; int mask_group = c / (channels / group_size); int down_pw = pw / scale_factor; int down_ph = ph / scale_factor; int down_width = width / scale_factor; int down_height = height / scale_factor; int start_w = down_pw - (kernel_size - 1) / 2; int end_w = down_pw + (kernel_size - 1) / 2 + 1; int start_h = down_ph - (kernel_size - 1) / 2; int end_h = down_ph + (kernel_size - 1) / 2 + 1; scalar_t output_val = 0; for (int iy = start_h; iy < end_h; iy++) { for (int ix = start_w; ix < end_w; ix++) { if (iy < 0 || iy > down_height - 1 || ix < 0 || ix > down_width - 1) { continue; } int mask_iy = iy - down_ph + (kernel_size - 1) / 2; int mask_ix = ix - down_pw + (kernel_size - 1) / 2; int mask_c = (mask_group * kernel_size + mask_iy) * kernel_size + mask_ix; int feat_index = Loc2Index(n, c, iy, ix, channels, down_height, down_width); int mask_index = Loc2Index(n, mask_c, ph, pw, mask_channels, height, width); output_val += bottom_data[feat_index] * bottom_masks[mask_index]; } } top_data[index] = output_val; } } template __global__ void carafe_naive_backward_cuda_kernel( const int nthreads, const scalar_t *top_diff, const scalar_t *bottom_data, const scalar_t *bottom_masks, scalar_t *bottom_diff, scalar_t *mask_diff, const int kernel_size, const int group_size, const int scale_factor, const int channels, const int height, const int width) { CUDA_1D_KERNEL_LOOP(index, nthreads) { // (n, c, ph, pw) is an element in the bottom_data int pw = index % width; int ph = (index / width) % height; int c = (index / width / height) % channels; int n = index / width / height / channels; int mask_channels = kernel_size * kernel_size * group_size; int mask_group = c / (channels / group_size); int down_pw = pw / scale_factor; int down_ph = ph / scale_factor; int down_width = width / scale_factor; int down_height = height / scale_factor; int start_w = down_pw - (kernel_size - 1) / 2; int end_w = down_pw + (kernel_size - 1) / 2 + 1; int start_h = down_ph - (kernel_size - 1) / 2; int end_h = down_ph + (kernel_size - 1) / 2 + 1; for (int iy = start_h; iy < end_h; iy++) { for (int ix = start_w; ix < end_w; ix++) { if (iy < 0 || iy > down_height - 1 || ix < 0 || ix > down_width - 1) { continue; } int mask_iy = iy - down_ph + (kernel_size - 1) / 2; int mask_ix = ix - down_pw + (kernel_size - 1) / 2; int mask_c = (mask_group * kernel_size + mask_iy) * kernel_size + mask_ix; int feat_index = Loc2Index(n, c, iy, ix, channels, down_height, down_width); int mask_index = Loc2Index(n, mask_c, ph, pw, mask_channels, height, width); atomicAdd(bottom_diff + feat_index, bottom_masks[mask_index] * top_diff[index]); atomicAdd(mask_diff + mask_index, bottom_data[feat_index] * top_diff[index]); } } } } #endif // CARAFE_NAIVE_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/common_cuda_helper.hpp ================================================ #ifndef COMMON_CUDA_HELPER #define COMMON_CUDA_HELPER #include #define CUDA_1D_KERNEL_LOOP(i, n) \ for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ i += blockDim.x * gridDim.x) #define CUDA_2D_KERNEL_LOOP(i, n, j, m) \ for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ i += blockDim.x * gridDim.x) \ for (size_t j = blockIdx.y * blockDim.y + threadIdx.y; j < (m); \ j += blockDim.y * gridDim.y) #define CUDA_2D_KERNEL_BLOCK_LOOP(i, n, j, m) \ for (size_t i = blockIdx.x; i < (n); i += gridDim.x) \ for (size_t j = blockIdx.y; j < (m); j += gridDim.y) #define THREADS_PER_BLOCK 512 inline int GET_BLOCKS(const int N, const int num_threads = THREADS_PER_BLOCK) { int optimal_block_num = (N + num_threads - 1) / num_threads; int max_block_num = 4096; return min(optimal_block_num, max_block_num); } template __device__ T bilinear_interpolate(const T* input, const int height, const int width, T y, T x, const int index /* index for debug only*/) { // deal with cases that inverse elements are out of feature map boundary if (y < -1.0 || y > height || x < -1.0 || x > width) return 0; if (y <= 0) y = 0; if (x <= 0) x = 0; int y_low = (int)y; int x_low = (int)x; int y_high; int x_high; if (y_low >= height - 1) { y_high = y_low = height - 1; y = (T)y_low; } else { y_high = y_low + 1; } if (x_low >= width - 1) { x_high = x_low = width - 1; x = (T)x_low; } else { x_high = x_low + 1; } T ly = y - y_low; T lx = x - x_low; T hy = 1. - ly, hx = 1. - lx; // do bilinear interpolation T v1 = input[y_low * width + x_low]; T v2 = input[y_low * width + x_high]; T v3 = input[y_high * width + x_low]; T v4 = input[y_high * width + x_high]; T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); return val; } template __device__ void bilinear_interpolate_gradient( const int height, const int width, T y, T x, T& w1, T& w2, T& w3, T& w4, int& x_low, int& x_high, int& y_low, int& y_high, const int index /* index for debug only*/) { // deal with cases that inverse elements are out of feature map boundary if (y < -1.0 || y > height || x < -1.0 || x > width) { // empty w1 = w2 = w3 = w4 = 0.; x_low = x_high = y_low = y_high = -1; return; } if (y <= 0) y = 0; if (x <= 0) x = 0; y_low = (int)y; x_low = (int)x; if (y_low >= height - 1) { y_high = y_low = height - 1; y = (T)y_low; } else { y_high = y_low + 1; } if (x_low >= width - 1) { x_high = x_low = width - 1; x = (T)x_low; } else { x_high = x_low + 1; } T ly = y - y_low; T lx = x - x_low; T hy = 1. - ly, hx = 1. - lx; // reference in forward // T v1 = input[y_low * width + x_low]; // T v2 = input[y_low * width + x_high]; // T v3 = input[y_high * width + x_low]; // T v4 = input[y_high * width + x_high]; // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; return; } #endif // COMMON_CUDA_HELPER ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/convex_iou_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef CONVEX_IOU_CUDA_KERNEL_CUH #define CONVEX_IOU_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif #define MAXN 100 #define NMAX 512 __device__ const double EPS = 1E-8; __device__ inline int sig(double d) { return (d > EPS) - (d < -EPS); } struct Point { double x, y; __device__ Point() {} __device__ Point(double x, double y) : x(x), y(y) {} }; __device__ inline bool point_same(Point& a, Point& b) { return sig(a.x - b.x) == 0 && sig(a.y - b.y) == 0; } __device__ inline void swap1(Point* a, Point* b) { Point temp; temp.x = a->x; temp.y = a->y; a->x = b->x; a->y = b->y; b->x = temp.x; b->y = temp.y; } __device__ inline void reverse1(Point* a, const int n) { for (int i = 0; i < (n - 1) / 2.0; i++) { Point* j = &(a[i]); Point* k = &(a[n - 1 - i]); swap1(j, k); } } __device__ inline double cross(Point o, Point a, Point b) { return (a.x - o.x) * (b.y - o.y) - (b.x - o.x) * (a.y - o.y); } __device__ inline double dis(Point a, Point b) { return (a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y); } __device__ inline double area(Point* ps, int n) { ps[n] = ps[0]; double res = 0; for (int i = 0; i < n; i++) { res += ps[i].x * ps[i + 1].y - ps[i].y * ps[i + 1].x; } return res / 2.0; } __device__ inline double polygon_area_grad(Point* ps, int n, int* polygon_to_pred_index, int n_pred, double* grad_C) { ps[n] = ps[0]; double partion_grad[4 * 30 + 2]; double res = 0; for (int i = 0; i < n; i++) { res += ps[i].x * ps[i + 1].y - ps[i].y * ps[i + 1].x; partion_grad[i * 4 + 2] = ps[i + 1].y; partion_grad[i * 4 + 3] = -ps[i + 1].x; if (i != n - 1) { partion_grad[i * 4 + 4] = -ps[i].y; partion_grad[i * 4 + 5] = ps[i].x; } else { partion_grad[0] = -ps[i].y; partion_grad[1] = ps[i].x; } } for (int i = 0; i < n; i++) { for (int j = 0; j < n_pred; j++) { if (i == polygon_to_pred_index[j]) { grad_C[2 * polygon_to_pred_index[j + n_pred]] = (partion_grad[i * 4] + partion_grad[i * 4 + 2]) / 2; break; } } for (int j = 0; j < n_pred; j++) { if (i == polygon_to_pred_index[j]) { grad_C[2 * polygon_to_pred_index[j + n_pred] + 1] = (partion_grad[i * 4 + 1] + partion_grad[i * 4 + 1 + 2]) / 2; break; } } } return res / 2.0; } __device__ inline int lineCross(Point a, Point b, Point c, Point d, Point& p, double* cut_grad, int m, int n, int i) { double s1, s2; double s2_s1_2; double ds1_dxc, ds1_dyc, ds2_dxd, ds2_dyd; double dxp_dxc, dxp_dyc, dxp_dxd, dxp_dyd, dyp_dxc, dyp_dyc, dyp_dxd, dyp_dyd; s1 = cross(a, b, c); s2 = cross(a, b, d); ds1_dxc = -(b.y - a.y); ds1_dyc = b.x - a.x; ds2_dxd = ds1_dxc; ds2_dyd = ds1_dyc; s2_s1_2 = (s2 - s1) * (s2 - s1); if (sig(s1) == 0 && sig(s2) == 0) return 2; if (sig(s2 - s1) == 0) return 0; dxp_dxc = ((s2 - d.x * ds1_dxc) * (s2 - s1) - (c.x * s2 - d.x * s1) * (-ds1_dxc)) / (s2_s1_2); dxp_dyc = ((0 - d.x * ds1_dyc) * (s2 - s1) - (c.x * s2 - d.x * s1) * (-ds1_dyc)) / (s2_s1_2); dxp_dxd = ((c.x * ds2_dxd - s1) * (s2 - s1) - (c.x * s2 - d.x * s1) * (ds2_dxd)) / (s2_s1_2); dxp_dyd = ((c.x * ds2_dyd - 0) * (s2 - s1) - (c.x * s2 - d.x * s1) * (ds2_dyd)) / (s2_s1_2); dyp_dxc = ((0 - d.y * ds1_dxc) * (s2 - s1) - (c.y * s2 - d.y * s1) * (-ds1_dxc)) / (s2_s1_2); dyp_dyc = ((s2 - d.y * ds1_dyc) * (s2 - s1) - (c.y * s2 - d.y * s1) * (-ds1_dyc)) / (s2_s1_2); dyp_dxd = ((c.y * ds2_dxd - 0) * (s2 - s1) - (c.y * s2 - d.y * s1) * (ds2_dxd)) / (s2_s1_2); dyp_dyd = ((c.y * ds2_dyd - s1) * (s2 - s1) - (c.y * s2 - d.y * s1) * (ds2_dyd)) / (s2_s1_2); p.x = (c.x * s2 - d.x * s1) / (s2 - s1); p.y = (c.y * s2 - d.y * s1) / (s2 - s1); if (i == n - 1) { cut_grad[4 * n * m + 4 * i] = dxp_dxc; // + dyp_dxc; cut_grad[4 * n * m + 4 * i + 1] = dyp_dxc; cut_grad[4 * n * m + 4 * i + 2] = dxp_dyc; // + dyp_dyc; cut_grad[4 * n * m + 4 * i + 3] = dyp_dyc; cut_grad[4 * n * m + 0] = dxp_dxd; // + dyp_dxd; cut_grad[4 * n * m + 1] = dyp_dxd; cut_grad[4 * n * m + 2] = dxp_dyd; // + dyp_dyd; cut_grad[4 * n * m + 3] = dyp_dyd; } else { cut_grad[4 * n * m + 4 * i] = dxp_dxc; // + dyp_dxc; cut_grad[4 * n * m + 4 * i + 1] = dyp_dxc; cut_grad[4 * n * m + 4 * i + 2] = dxp_dyc; // + dyp_dyc; cut_grad[4 * n * m + 4 * i + 3] = dyp_dyc; cut_grad[4 * n * m + 4 * (i + 1)] = dxp_dxd; // + dyp_dxd; cut_grad[4 * n * m + 4 * (i + 1) + 1] = dyp_dxd; cut_grad[4 * n * m + 4 * (i + 1) + 2] = dxp_dyd; // + dyp_dyd; cut_grad[4 * n * m + 4 * (i + 1) + 3] = dyp_dyd; } return 1; } __device__ inline void polygon_cut(Point* p, int& n, Point a, Point b, double* cut_grad) { Point pp[MAXN]; double ccur_grad[MAXN] = {}; int m = 0; p[n] = p[0]; int k = n; for (int i = 0; i < n; i++) { if (sig(cross(a, b, p[i])) > 0) { pp[m] = p[i]; ccur_grad[4 * n * m + 4 * i] = 1.0; ccur_grad[4 * n * m + 4 * i + 3] = 1.0; m++; } if (sig(cross(a, b, p[i])) != sig(cross(a, b, p[i + 1]))) { lineCross(a, b, p[i], p[i + 1], pp[m], ccur_grad, m, n, i); m++; } } n = 0; for (int i = 0; i < m; i++) { if (!i || !(point_same(pp[i], pp[i - 1]))) { p[n] = pp[i]; for (int j = 0; j < 4 * k; j++) { cut_grad[4 * k * n + j] = ccur_grad[4 * k * i + j]; } n++; } } while (n > 1 && point_same(p[n - 1], p[0])) n--; } __device__ inline double intersectArea(Point a, Point b, Point c, Point d, double* grad_AB, int order, int convex_n) { Point o(0, 0); int res_flag = 0; int s1 = sig(cross(o, a, b)); int s2 = sig(cross(o, c, d)); if (s1 == 0 || s2 == 0) return 0.0; if (s1 == -1) { Point* i = &a; Point* j = &b; swap1(i, j); res_flag = 1; } if (s2 == -1) { Point* i = &c; Point* j = &d; swap1(i, j); } Point p[10] = {o, a, b}; int n = 3, n0 = 3, n1, n2, n3; double cut_grad1[MAXN] = {}; double cut_grad2[MAXN] = {}; double cut_grad3[MAXN] = {}; double p1_p_grad[10][10] = {}; double p2_p1_grad[10][10] = {}; double p3_p2_grad[10][10] = {}; double p3_p1_grad[10][10] = {}; double p3_p_grad[10][10] = {}; // 1 polygon_cut(p, n, o, c, cut_grad1); n1 = n; for (int i = 0; i < n; i++) { for (int j = 0; j < 4 * n0; j++) { if (!(j % 2)) { p1_p_grad[2 * i][j / 2] = cut_grad1[4 * n0 * i + j]; } else { p1_p_grad[2 * i + 1][j / 2] = cut_grad1[4 * n0 * i + j]; } } } // 2 polygon_cut(p, n, c, d, cut_grad2); n2 = n; for (int i = 0; i < n; i++) { for (int j = 0; j < 4 * n1; j++) { if (!(j % 2)) { p2_p1_grad[2 * i][j / 2] = cut_grad2[4 * n1 * i + j]; } else { p2_p1_grad[2 * i + 1][j / 2] = cut_grad2[4 * n1 * i + j]; } } } // 3 polygon_cut(p, n, d, o, cut_grad3); n3 = n; for (int i = 0; i < n; i++) { for (int j = 0; j < 4 * n2; j++) { if (!(j % 2)) { p3_p2_grad[2 * i][j / 2] = cut_grad3[4 * n2 * i + j]; } else { p3_p2_grad[2 * i + 1][j / 2] = cut_grad3[4 * n2 * i + j]; } } } // mul // p3_p2(n3 * n2) * p2_p1(n2 * n1) = p3_p1 (n3 * n1) for (int i = 0; i < 2 * n3; i++) { for (int j = 0; j < 2 * n1; j++) { double sum = 0.0; for (int m = 0; m < 2 * n2; m++) { sum = sum + p3_p2_grad[i][m] * p2_p1_grad[m][j]; } p3_p1_grad[i][j] = sum; } } // p3_p1 (n3 * n1) * p1_p (n1 * n0) = p3_p (n3 * n0) for (int i = 0; i < 2 * n3; i++) { for (int j = 0; j < 2 * n0; j++) { double sum = 0.0; for (int m = 0; m < 2 * n1; m++) { sum = sum + p3_p1_grad[i][m] * p1_p_grad[m][j]; } p3_p_grad[i][j] = sum; } } // calculate S_grad int polygon_index_box_index[20]; double grad_polygon[20]; double S_grad[6]; for (int i = 0; i < n3; i++) { polygon_index_box_index[i] = i; polygon_index_box_index[i + n3] = i; } double res = polygon_area_grad(p, n3, polygon_index_box_index, n3, grad_polygon); if (s1 * s2 == -1) { for (int j = 0; j < 2 * 3; j++) { double sum = 0.0; for (int m = 0; m < 2 * n3; m++) { sum = sum - grad_polygon[m] * p3_p_grad[m][j]; } S_grad[j] = sum; } if (order != convex_n - 1) { if (res_flag) { grad_AB[2 * order] += S_grad[4]; grad_AB[2 * order + 1] += S_grad[5]; grad_AB[2 * order + 2] += S_grad[2]; grad_AB[2 * order + 3] += S_grad[3]; } else { grad_AB[2 * order] += S_grad[2]; grad_AB[2 * order + 1] += S_grad[3]; grad_AB[2 * order + 2] += S_grad[4]; grad_AB[2 * order + 3] += S_grad[5]; } } else { if (res_flag) { grad_AB[2 * order] += S_grad[4]; grad_AB[2 * order + 1] += S_grad[5]; grad_AB[0] += S_grad[2]; grad_AB[1] += S_grad[3]; } else { grad_AB[2 * order] += S_grad[2]; grad_AB[2 * order + 1] += S_grad[3]; grad_AB[0] += S_grad[4]; grad_AB[1] += S_grad[5]; } } res = -res; } else { for (int j = 0; j < 2 * 3; j++) { double sum = 0.0; for (int m = 0; m < 2 * n3; m++) { sum = sum + grad_polygon[m] * p3_p_grad[m][j]; } S_grad[j] = sum; } if (order != convex_n - 1) { if (res_flag) { grad_AB[2 * order] += S_grad[4]; grad_AB[2 * order + 1] += S_grad[5]; grad_AB[2 * order + 2] += S_grad[2]; grad_AB[2 * order + 3] += S_grad[3]; } else { grad_AB[2 * order] += S_grad[2]; grad_AB[2 * order + 1] += S_grad[3]; grad_AB[2 * order + 2] += S_grad[4]; grad_AB[2 * order + 3] += S_grad[5]; } } else { if (res_flag) { grad_AB[2 * order] += S_grad[4]; grad_AB[2 * order + 1] += S_grad[5]; grad_AB[0] += S_grad[2]; grad_AB[1] += S_grad[3]; } else { grad_AB[2 * order] += S_grad[2]; grad_AB[2 * order + 1] += S_grad[3]; grad_AB[0] += S_grad[4]; grad_AB[1] += S_grad[5]; } } } return res; } __device__ inline double intersectAreaO(Point* ps1, int n1, Point* ps2, int n2, double* grad_AB) { if (area(ps1, n1) < 0) reverse1(ps1, n1); if (area(ps2, n2) < 0) reverse1(ps2, n2); ps1[n1] = ps1[0]; ps2[n2] = ps2[0]; double res = 0; for (int i = 0; i < n1; i++) { for (int j = 0; j < n2; j++) { res += intersectArea(ps1[i], ps1[i + 1], ps2[j], ps2[j + 1], grad_AB, i, n1); } } return res; } __device__ inline void Jarvis(Point* in_poly, int& n_poly) { Point p_max, p_k; int max_index, k_index; int Stack[NMAX] = {}, top1, top2; double sign; Point right_point[10], left_point[10]; for (int i = 0; i < n_poly; i++) { if (in_poly[i].y < in_poly[0].y || in_poly[i].y == in_poly[0].y && in_poly[i].x < in_poly[0].x) { Point* j = &(in_poly[0]); Point* k = &(in_poly[i]); swap1(j, k); } if (i == 0) { p_max = in_poly[0]; max_index = 0; } if (in_poly[i].y > p_max.y || in_poly[i].y == p_max.y && in_poly[i].x > p_max.x) { p_max = in_poly[i]; max_index = i; } } if (max_index == 0) { max_index = 1; p_max = in_poly[max_index]; } k_index = 0, Stack[0] = 0, top1 = 0; while (k_index != max_index) { p_k = p_max; k_index = max_index; for (int i = 1; i < n_poly; i++) { sign = cross(in_poly[Stack[top1]], in_poly[i], p_k); if ((sign > 0) || ((sign == 0) && (dis(in_poly[Stack[top1]], in_poly[i]) > dis(in_poly[Stack[top1]], p_k)))) { p_k = in_poly[i]; k_index = i; } } top1++; Stack[top1] = k_index; } for (int i = 0; i <= top1; i++) right_point[i] = in_poly[Stack[i]]; k_index = 0, Stack[0] = 0, top2 = 0; while (k_index != max_index) { p_k = p_max; k_index = max_index; for (int i = 1; i < n_poly; i++) { sign = cross(in_poly[Stack[top2]], in_poly[i], p_k); if ((sign < 0) || (sign == 0) && (dis(in_poly[Stack[top2]], in_poly[i]) > dis(in_poly[Stack[top2]], p_k))) { p_k = in_poly[i]; k_index = i; } } top2++; Stack[top2] = k_index; } for (int i = top2 - 1; i >= 0; i--) left_point[i] = in_poly[Stack[i]]; for (int i = 0; i < top1 + top2; i++) { if (i <= top1) { in_poly[i] = right_point[i]; } else { in_poly[i] = left_point[top2 - (i - top1)]; } } n_poly = top1 + top2; } __device__ inline double intersectAreaPoly(Point* ps1, int n1, Point* ps2, int n2, double* grad_C) { Point polygon[MAXN]; int n = n1 + n2, n_poly = 0; for (int i = 0; i < n1; i++) { for (int j = 0; j < n - n1; j++) { if (point_same(ps1[i], ps2[j])) { for (int k = j; k < n - n1 - 1; k++) { ps2[k] = ps2[k + 1]; } n2--; break; } } } n_poly = n1 + n2; for (int i = 0; i < n_poly; i++) { if (i < n1) { polygon[i] = ps1[i]; } else { polygon[i] = ps2[i - n1]; } } Jarvis(polygon, n_poly); int polygon_to_pred_index[18] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; int n_pred = 0; for (int i = 0; i < n_poly; i++) { for (int j = 0; j < n1; j++) { if (polygon[i].x == ps1[j].x && polygon[i].y == ps1[j].y) { polygon_to_pred_index[n_pred] = i; polygon_to_pred_index[n_pred + n1] = j; n_pred += 1; break; } } } if (n_pred == 0) { double polygon_area = fabs(area(polygon, n_poly)); for (int i = 0; i < 18; i++) { grad_C[i] = 0.0; } return polygon_area; } else { double polygon_area = polygon_area_grad(polygon, n_poly, polygon_to_pred_index, n1, grad_C); if (polygon_area < 0) { for (int i = 0; i < 18; i++) { grad_C[i] = -grad_C[i]; } } return fabs(polygon_area); } } // convex_find and get the polygon_index_box_index __device__ inline void Jarvis_and_index(Point* in_poly, int& n_poly, int* points_to_convex_ind) { int n_input = n_poly; Point input_poly[20]; for (int i = 0; i < n_input; i++) { input_poly[i].x = in_poly[i].x; input_poly[i].y = in_poly[i].y; } Point p_max, p_k; int max_index, k_index; int Stack[20], top1, top2; double sign; Point right_point[10], left_point[10]; for (int i = 0; i < n_poly; i++) { if (in_poly[i].y < in_poly[0].y || in_poly[i].y == in_poly[0].y && in_poly[i].x < in_poly[0].x) { Point* j = &(in_poly[0]); Point* k = &(in_poly[i]); swap1(j, k); } if (i == 0) { p_max = in_poly[0]; max_index = 0; } if (in_poly[i].y > p_max.y || in_poly[i].y == p_max.y && in_poly[i].x > p_max.x) { p_max = in_poly[i]; max_index = i; } } if (max_index == 0) { max_index = 1; p_max = in_poly[max_index]; } k_index = 0, Stack[0] = 0, top1 = 0; while (k_index != max_index) { p_k = p_max; k_index = max_index; for (int i = 1; i < n_poly; i++) { sign = cross(in_poly[Stack[top1]], in_poly[i], p_k); if ((sign > 0) || ((sign == 0) && (dis(in_poly[Stack[top1]], in_poly[i]) > dis(in_poly[Stack[top1]], p_k)))) { p_k = in_poly[i]; k_index = i; } } top1++; Stack[top1] = k_index; } for (int i = 0; i <= top1; i++) { right_point[i] = in_poly[Stack[i]]; } k_index = 0, Stack[0] = 0, top2 = 0; while (k_index != max_index) { p_k = p_max; k_index = max_index; for (int i = 1; i < n_poly; i++) { sign = cross(in_poly[Stack[top2]], in_poly[i], p_k); if ((sign < 0) || (sign == 0) && (dis(in_poly[Stack[top2]], in_poly[i]) > dis(in_poly[Stack[top2]], p_k))) { p_k = in_poly[i]; k_index = i; } } top2++; Stack[top2] = k_index; } for (int i = top2 - 1; i >= 0; i--) { left_point[i] = in_poly[Stack[i]]; } for (int i = 0; i < top1 + top2; i++) { if (i <= top1) { in_poly[i] = right_point[i]; } else { in_poly[i] = left_point[top2 - (i - top1)]; } } n_poly = top1 + top2; for (int i = 0; i < n_poly; i++) { for (int j = 0; j < n_input; j++) { if (point_same(in_poly[i], input_poly[j])) { points_to_convex_ind[i] = j; break; } } } } template __device__ inline float devrIoU(T const* const p, T const* const q, T* point_grad, const int idx) { Point ps1[MAXN], ps2[MAXN]; Point convex[MAXN]; for (int i = 0; i < 9; i++) { convex[i].x = (double)p[i * 2]; convex[i].y = (double)p[i * 2 + 1]; } int n_convex = 9; int points_to_convex_ind[9] = {-1, -1, -1, -1, -1, -1, -1, -1, -1}; Jarvis_and_index(convex, n_convex, points_to_convex_ind); int n1 = n_convex; int n2 = 4; for (int i = 0; i < n1; i++) { ps1[i].x = (double)convex[i].x; ps1[i].y = (double)convex[i].y; } for (int i = 0; i < n2; i++) { ps2[i].x = (double)q[i * 2]; ps2[i].y = (double)q[i * 2 + 1]; } int polygon_index_box_index[18]; for (int i = 0; i < n1; i++) { polygon_index_box_index[i] = i; polygon_index_box_index[i + n1] = i; } double grad_A[18] = {}; double grad_AB[18] = {}; double grad_C[18] = {}; double inter_area = intersectAreaO(ps1, n1, ps2, n2, grad_AB); double S_pred = polygon_area_grad(ps1, n1, polygon_index_box_index, n1, grad_A); if (S_pred < 0) { for (int i = 0; i < n_convex * 2; i++) { grad_A[i] = -grad_A[i]; } } double union_area = fabs(S_pred) + fabs(area(ps2, n2)) - inter_area; double iou = inter_area / union_area; double polygon_area = intersectAreaPoly(ps1, n1, ps2, n2, grad_C); // printf("%d:live\n", idx); double rot_giou = iou - (polygon_area - union_area) / polygon_area; float grad_point_temp[18] = {}; for (int i = 0; i < n_convex; i++) { int grad_point = points_to_convex_ind[i]; grad_point_temp[2 * grad_point] = (float)((union_area + inter_area) / (union_area * union_area) * grad_AB[2 * i] - iou / union_area * grad_A[2 * i] - 1 / polygon_area * (grad_AB[2 * i] - grad_A[2 * i]) - (union_area) / polygon_area / polygon_area * grad_C[2 * i]); grad_point_temp[2 * grad_point + 1] = (float)((union_area + inter_area) / (union_area * union_area) * grad_AB[2 * i + 1] - iou / union_area * grad_A[2 * i + 1] - 1 / polygon_area * (grad_AB[2 * i + 1] - grad_A[2 * i + 1]) - (union_area) / polygon_area / polygon_area * grad_C[2 * i + 1]); } for (int i = 0; i < 9; i++) { point_grad[2 * i] = grad_point_temp[2 * i]; point_grad[2 * i + 1] = grad_point_temp[2 * i + 1]; } return (float)rot_giou; } template __global__ void convex_giou_cuda_kernel(const int ex_n_boxes, const int gt_n_boxes, const T* ex_boxes, const T* gt_boxes, T* point_grad) { CUDA_1D_KERNEL_LOOP(index, ex_n_boxes) { const T* cur_box = ex_boxes + index * 18; const T* cur_gt_box = gt_boxes + index * 8; T* cur_grad = point_grad + index * 19; T giou = devrIoU(cur_box, cur_gt_box, cur_grad, threadIdx.x); cur_grad[18] = giou; } } __device__ inline int lineCross(Point a, Point b, Point c, Point d, Point& p) { double s1, s2; s1 = cross(a, b, c); s2 = cross(a, b, d); if (sig(s1) == 0 && sig(s2) == 0) return 2; if (sig(s2 - s1) == 0) return 0; p.x = (c.x * s2 - d.x * s1) / (s2 - s1); p.y = (c.y * s2 - d.y * s1) / (s2 - s1); return 1; } __device__ inline void polygon_cut(Point* p, int& n, Point a, Point b) { Point pp[MAXN]; int m = 0; p[n] = p[0]; for (int i = 0; i < n; i++) { if (sig(cross(a, b, p[i])) > 0) { pp[m] = p[i]; m++; } if (sig(cross(a, b, p[i])) != sig(cross(a, b, p[i + 1]))) { lineCross(a, b, p[i], p[i + 1], pp[m]); m++; } } n = 0; for (int i = 0; i < m; i++) { if (!i || !(point_same(pp[i], pp[i - 1]))) { p[n] = pp[i]; n++; } } while (n > 1 && point_same(p[n - 1], p[0])) n--; } __device__ inline double intersectArea(Point a, Point b, Point c, Point d) { Point o(0, 0); int s1 = sig(cross(o, a, b)); int s2 = sig(cross(o, c, d)); if (s1 == 0 || s2 == 0) return 0.0; if (s1 == -1) { Point* i = &a; Point* j = &b; swap1(i, j); } if (s2 == -1) { Point* i = &c; Point* j = &d; swap1(i, j); } Point p[10] = {o, a, b}; int n = 3; polygon_cut(p, n, o, c); polygon_cut(p, n, c, d); polygon_cut(p, n, d, o); double res = area(p, n); if (s1 * s2 == -1) res = -res; return res; } __device__ inline double intersectAreaO(Point* ps1, int n1, Point* ps2, int n2) { if (area(ps1, n1) < 0) reverse1(ps1, n1); if (area(ps2, n2) < 0) reverse1(ps2, n2); ps1[n1] = ps1[0]; ps2[n2] = ps2[0]; double res = 0; for (int i = 0; i < n1; i++) { for (int j = 0; j < n2; j++) { res += intersectArea(ps1[i], ps1[i + 1], ps2[j], ps2[j + 1]); } } return res; } template __device__ inline float devrIoU(T const* const p, T const* const q) { Point ps1[MAXN], ps2[MAXN]; Point convex[MAXN]; for (int i = 0; i < 9; i++) { convex[i].x = (double)p[i * 2]; convex[i].y = (double)p[i * 2 + 1]; } int n_convex = 9; int points_to_convex_ind[9] = {-1, -1, -1, -1, -1, -1, -1, -1, -1}; Jarvis_and_index(convex, n_convex, points_to_convex_ind); int n1 = n_convex; for (int i = 0; i < n1; i++) { ps1[i].x = (double)convex[i].x; ps1[i].y = (double)convex[i].y; } int n2 = 4; for (int i = 0; i < n2; i++) { ps2[i].x = (double)q[i * 2]; ps2[i].y = (double)q[i * 2 + 1]; } double inter_area = intersectAreaO(ps1, n1, ps2, n2); double S_pred = area(ps1, n1); double union_area = fabs(S_pred) + fabs(area(ps2, n2)) - inter_area; double iou = inter_area / union_area; return (float)iou; } template __global__ void convex_iou_cuda_kernel(const int ex_n_boxes, const int gt_n_boxes, const T* ex_boxes, const T* gt_boxes, T* iou) { CUDA_1D_KERNEL_LOOP(index, ex_n_boxes) { const T* cur_box = ex_boxes + index * 18; for (int i = 0; i < gt_n_boxes; i++) { iou[index * gt_n_boxes + i] = devrIoU(cur_box, gt_boxes + i * 8); } } } #endif // CONVEX_IOU_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/correlation_cuda.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved. // Modified from // https://github.com/ClementPinard/Pytorch-Correlation-extension/blob/master/Correlation_Module/correlation_cuda_kernel.cu // Original licence: Under MIT License #ifndef CORRELATION_CUDA #define CORRELATION_CUDA #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif #include #include // Using is recommended in the official documentation in // https://pytorch.org/tutorials/advanced/cpp_extension.html#writing-the-c-op. // However, we use for compatibility with CUDA 9.0 // Read https://github.com/pytorch/extension-cpp/issues/35 for more details. #include #include #include using namespace torch; #define TensorAcc4R PackedTensorAccessor32 #define TensorAcc5R PackedTensorAccessor32 #define WITHIN_BOUNDS(x, y, H, W) (x >= 0 && x < H && y >= 0 && y < W) #define THREADS_FORWARD 32 #define THREADS_BACKWARD 16 template __global__ void correlation_forward_cuda_kernel( const TensorAcc4R rInput1, const TensorAcc4R rInput2, TensorAcc5R output, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW) { const int iH = rInput1.size(1); const int iW = rInput1.size(2); const int C = rInput1.size(3); const int n = blockIdx.x; const int h = blockIdx.y; const int w = blockIdx.z; const int thread = threadIdx.x; const int start_i = -padH + h * dH; const int start_j = -padW + w * dW; const int patchRadH = dilation_patchH * (patchH - 1) / 2; const int patchRadW = dilation_patchW * (patchW - 1) / 2; __shared__ scalar_t prod_sum[THREADS_FORWARD]; for (int ph = 0; ph < patchH; ++ph) { int ph_dilated = ph * dilation_patchH - patchRadH; for (int pw = 0; pw < patchW; ++pw) { int pw_dilated = pw * dilation_patchW - patchRadW; prod_sum[thread] = 0; for (int i = 0; i < kH; ++i) { int i1 = start_i + i * dilationH; int i2 = i1 + ph_dilated; if WITHIN_BOUNDS(i1, i2, iH, iH) { for (int j = 0; j < kW; ++j) { int j1 = start_j + j * dilationW; int j2 = j1 + pw_dilated; if WITHIN_BOUNDS(j1, j2, iW, iW) { for (int c = thread; c < C; c += THREADS_FORWARD) { scalar_t v1 = rInput1[n][i1][j1][c]; scalar_t v2 = rInput2[n][i2][j2][c]; prod_sum[thread] += v1 * v2; } } } } } // accumulate __syncthreads(); if (thread == 0) { scalar_t reduce_sum = 0; for (int index = 0; index < THREADS_FORWARD; ++index) { reduce_sum += prod_sum[index]; } output[n][ph][pw][h][w] = reduce_sum; } } } } template __global__ void correlation_backward_cuda_kernel_input1( const TensorAcc5R grad_output, const TensorAcc4R input2, TensorAcc4R grad_input1, const int kH, const int kW, const int patchH, const int patchW, const int padH, const int padW, const int dilationH, const int dilationW, const int dilation_patchH, const int dilation_patchW, const int dH, const int dW, const int batch) { const int iH = input2.size(2); const int iW = input2.size(3); const int H = grad_output.size(3); const int W = grad_output.size(4); const int patchRadH = (patchH - 1) / 2; const int patchRadW = (patchW - 1) / 2; const int n = batch; const int c = blockIdx.x; const int h = blockIdx.y; const int w = blockIdx.z; const int ph_off = threadIdx.x; const int pw_off = threadIdx.y; const int h_2 = h + padH; const int w_2 = w + padW; const int min_h = h_2 - kH * dilationH; const int min_w = w_2 - kW * dilationW; __shared__ scalar_t prod_sum[THREADS_BACKWARD][THREADS_BACKWARD]; prod_sum[ph_off][pw_off] = 0; for (int ph = ph_off; ph < patchH; ph += THREADS_BACKWARD) { int i1 = h + dilation_patchH * (ph - patchRadH); for (int pw = pw_off; pw < patchW; pw += THREADS_BACKWARD) { int j1 = w + dilation_patchW * (pw - patchRadW); if (WITHIN_BOUNDS(i1, j1, iH, iW)) { scalar_t val = input2[n][c][i1][j1]; for (int h_3 = h_2; h_3 > min_h; h_3 -= dilationH) { int i2 = (h_3) / dH; if (i2 * dH != h_3) continue; for (int w_3 = w_2; w_3 > min_w; w_3 -= dilationW) { int j2 = (w_3) / dW; if (j2 * dW != w_3) continue; if WITHIN_BOUNDS(i2, j2, H, W) { prod_sum[ph_off][pw_off] += grad_output[n][ph][pw][i2][j2] * val; } } } } } } __syncthreads(); if (ph_off == 0 && pw_off == 0) { scalar_t reduce_sum = 0; for (int ph = 0; ph < THREADS_BACKWARD; ++ph) { for (int pw = 0; pw < THREADS_BACKWARD; ++pw) { reduce_sum += prod_sum[ph][pw]; } } grad_input1[n][c][h][w] = reduce_sum; } } template __global__ void correlation_backward_cuda_kernel_input2( const TensorAcc5R grad_output, const TensorAcc4R input1, TensorAcc4R grad_input2, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW, int batch) { const int iH = input1.size(2); const int iW = input1.size(3); const int patchRadH = (patchH - 1) / 2; const int patchRadW = (patchW - 1) / 2; const int H = grad_output.size(3); const int W = grad_output.size(4); const int dilatedKH = kH * dilationH; const int dilatedKW = kW * dilationW; const int n = batch; const int c = blockIdx.x; const int h = blockIdx.y; const int w = blockIdx.z; const int ph_off = threadIdx.x; const int pw_off = threadIdx.y; __shared__ scalar_t prod_sum[THREADS_BACKWARD][THREADS_BACKWARD]; prod_sum[ph_off][pw_off] = 0; for (int ph = ph_off; ph < patchH; ph += THREADS_BACKWARD) { int i1 = h - dilation_patchH * (ph - patchRadH); for (int pw = pw_off; pw < patchW; pw += THREADS_BACKWARD) { int j1 = w - dilation_patchW * (pw - patchRadW); if WITHIN_BOUNDS(i1, j1, iH, iW) { scalar_t val = input1[n][c][i1][j1]; const int h_2 = i1 + padH; const int w_2 = j1 + padW; const int min_h = h_2 - dilatedKH; const int min_w = w_2 - dilatedKW; for (int h_3 = h_2; h_3 > min_h; h_3 -= dilationH) { int i2 = (h_3) / dH; if (i2 * dH != h_3) continue; for (int w_3 = w_2; w_3 > min_w; w_3 -= dilationW) { int j2 = (w_3) / dW; if (j2 * dW != w_3) continue; if WITHIN_BOUNDS(i2, j2, H, W) { prod_sum[ph_off][pw_off] += grad_output[n][ph][pw][i2][j2] * val; } } } } } } __syncthreads(); if (ph_off == 0 && pw_off == 0) { scalar_t reduce_sum = 0; for (int ph = 0; ph < THREADS_BACKWARD; ++ph) { for (int pw = 0; pw < THREADS_BACKWARD; ++pw) { reduce_sum += prod_sum[ph][pw]; } } grad_input2[n][c][h][w] = reduce_sum; } } #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/deform_conv_cuda_kernel.cuh ================================================ /*! ******************* BEGIN Caffe Copyright Notice and Disclaimer ***************** * * COPYRIGHT * * All contributions by the University of California: * Copyright (c) 2014-2017 The Regents of the University of California (Regents) * All rights reserved. * * All other contributions: * Copyright (c) 2014-2017, the respective contributors * All rights reserved. * * Caffe uses a shared copyright model: each contributor holds copyright over * their contributions to Caffe. The project versioning records all such * contribution and copyright details. If a contributor wants to further mark * their specific copyright on a particular contribution, they should indicate * their copyright solely in the commit message of the change when it is * committed. * * LICENSE * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, *this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" *AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE *FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL *DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR *SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER *CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, *OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE *OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * CONTRIBUTION AGREEMENT * * By contributing to the BVLC/caffe repository through pull-request, comment, * or otherwise, the contributor releases their content to the * license and copyright terms herein. * ***************** END Caffe Copyright Notice and Disclaimer ********************* * * Copyright (c) 2018 Microsoft * Licensed under The MIT License [see LICENSE for details] * \file modulated_deformable_im2col.cuh * \brief Function definitions of converting an image to * column matrix based on kernel, padding, dilation, and offset. * These functions are mainly used in deformable convolution operators. * \ref: https://arxiv.org/abs/1703.06211 * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng */ // modified from // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu #ifndef DEFORM_CONV_CUDA_KERNEL_CUH #define DEFORM_CONV_CUDA_KERNEL_CUH #include #ifdef MMCV_WITH_TRT #include "common_cuda_helper.hpp" #else // MMCV_WITH_TRT #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else // MMCV_USE_PARROTS #include "pytorch_cuda_helper.hpp" #endif // MMCV_USE_PARROTS #endif // MMCV_WITH_TRT template __device__ T deformable_im2col_bilinear(const T *input, const int data_width, const int height, const int width, T h, T w) { if (h <= -1 || height <= h || w <= -1 || width <= w) { return 0; } int h_low = floorf(h); int w_low = floorf(w); int h_high = h_low + 1; int w_high = w_low + 1; T lh = h - h_low; T lw = w - w_low; T hh = 1 - lh, hw = 1 - lw; T v1 = 0; if (h_low >= 0 && w_low >= 0) v1 = input[h_low * data_width + w_low]; T v2 = 0; if (h_low >= 0 && w_high <= width - 1) v2 = input[h_low * data_width + w_high]; T v3 = 0; if (h_high <= height - 1 && w_low >= 0) v3 = input[h_high * data_width + w_low]; T v4 = 0; if (h_high <= height - 1 && w_high <= width - 1) v4 = input[h_high * data_width + w_high]; T w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); return val; } template __device__ T get_gradient_weight(T argmax_h, T argmax_w, const int h, const int w, const int height, const int width) { if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) { // empty return 0; } int argmax_h_low = floorf(argmax_h); int argmax_w_low = floorf(argmax_w); int argmax_h_high = argmax_h_low + 1; int argmax_w_high = argmax_w_low + 1; T weight = 0; if (h == argmax_h_low && w == argmax_w_low) weight = (h + 1 - argmax_h) * (w + 1 - argmax_w); if (h == argmax_h_low && w == argmax_w_high) weight = (h + 1 - argmax_h) * (argmax_w + 1 - w); if (h == argmax_h_high && w == argmax_w_low) weight = (argmax_h + 1 - h) * (w + 1 - argmax_w); if (h == argmax_h_high && w == argmax_w_high) weight = (argmax_h + 1 - h) * (argmax_w + 1 - w); return weight; } template __device__ T get_coordinate_weight(T argmax_h, T argmax_w, const int height, const int width, const T *im_data, const int data_width, const int bp_dir) { if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) { // empty return 0; } int argmax_h_low = floorf(argmax_h); int argmax_w_low = floorf(argmax_w); int argmax_h_high = argmax_h_low + 1; int argmax_w_high = argmax_w_low + 1; T weight = 0; if (bp_dir == 0) { if (argmax_h_low >= 0 && argmax_w_low >= 0) weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low]; if (argmax_h_low >= 0 && argmax_w_high <= width - 1) weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high]; if (argmax_h_high <= height - 1 && argmax_w_low >= 0) weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low]; if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high]; } else if (bp_dir == 1) { if (argmax_h_low >= 0 && argmax_w_low >= 0) weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low]; if (argmax_h_low >= 0 && argmax_w_high <= width - 1) weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high]; if (argmax_h_high <= height - 1 && argmax_w_low >= 0) weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low]; if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high]; } return weight; } template __global__ void deformable_im2col_gpu_kernel( const int n, const T *data_im, const T *data_offset, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int channel_per_deformable_group, const int batch_size, const int num_channels, const int deformable_group, const int height_col, const int width_col, T *data_col) { CUDA_1D_KERNEL_LOOP(index, n) { // index index of output matrix const int w_col = index % width_col; const int h_col = (index / width_col) % height_col; const int b_col = (index / width_col / height_col) % batch_size; const int c_im = (index / width_col / height_col) / batch_size; const int c_col = c_im * kernel_h * kernel_w; // compute deformable group index const int deformable_group_index = c_im / channel_per_deformable_group; const int h_in = h_col * stride_h - pad_h; const int w_in = w_col * stride_w - pad_w; T *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col; const T *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width; const T *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; for (int i = 0; i < kernel_h; ++i) { for (int j = 0; j < kernel_w; ++j) { const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col; const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col; const T offset_h = data_offset_ptr[data_offset_h_ptr]; const T offset_w = data_offset_ptr[data_offset_w_ptr]; T val = static_cast(0); const T h_im = h_in + i * dilation_h + offset_h; const T w_im = w_in + j * dilation_w + offset_w; if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) val = deformable_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im); *data_col_ptr = val; data_col_ptr += batch_size * height_col * width_col; } } } } template __global__ void deformable_col2im_gpu_kernel( const int n, const T *data_col, const T *data_offset, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int channel_per_deformable_group, const int batch_size, const int deformable_group, const int height_col, const int width_col, T *grad_im) { CUDA_1D_KERNEL_LOOP(index, n) { const int j = (index / width_col / height_col / batch_size) % kernel_w; const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h; const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h; // compute the start and end of the output const int deformable_group_index = c / channel_per_deformable_group; int w_out = index % width_col; int h_out = (index / width_col) % height_col; int b = (index / width_col / height_col) % batch_size; int w_in = w_out * stride_w - pad_w; int h_in = h_out * stride_h - pad_h; const T *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out; const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out; const T offset_h = data_offset_ptr[data_offset_h_ptr]; const T offset_w = data_offset_ptr[data_offset_w_ptr]; const T cur_inv_h_data = h_in + i * dilation_h + offset_h; const T cur_inv_w_data = w_in + j * dilation_w + offset_w; const T cur_top_grad = data_col[index]; const int cur_h = (int)cur_inv_h_data; const int cur_w = (int)cur_inv_w_data; for (int dy = -2; dy <= 2; dy++) { for (int dx = -2; dx <= 2; dx++) { if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 && cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 && abs(cur_inv_w_data - (cur_w + dx)) < 1) { int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx; T weight = get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width); atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad); } } } } } template __global__ void deformable_col2im_coord_gpu_kernel( const int n, const T *data_col, const T *data_im, const T *data_offset, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int channel_per_deformable_group, const int batch_size, const int offset_channels, const int deformable_group, const int height_col, const int width_col, T *grad_offset) { CUDA_1D_KERNEL_LOOP(index, n) { T val = 0; int w = index % width_col; int h = (index / width_col) % height_col; int c = (index / width_col / height_col) % offset_channels; int b = (index / width_col / height_col) / offset_channels; // compute the start and end of the output const int deformable_group_index = c / (2 * kernel_h * kernel_w); const int col_step = kernel_h * kernel_w; int cnt = 0; const T *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group * batch_size * width_col * height_col; const T *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) * channel_per_deformable_group / kernel_h / kernel_w * height * width; const T *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w; for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step) { const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w; const int bp_dir = offset_c % 2; int j = (col_pos / width_col / height_col / batch_size) % kernel_w; int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h; int w_out = col_pos % width_col; int h_out = (col_pos / width_col) % height_col; int w_in = w_out * stride_w - pad_w; int h_in = h_out * stride_h - pad_h; const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out); const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out); const T offset_h = data_offset_ptr[data_offset_h_ptr]; const T offset_w = data_offset_ptr[data_offset_w_ptr]; T inv_h = h_in + i * dilation_h + offset_h; T inv_w = w_in + j * dilation_w + offset_w; if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) inv_h = inv_w = -2; const T weight = get_coordinate_weight(inv_h, inv_w, height, width, data_im_ptr + cnt * height * width, width, bp_dir); val += weight * data_col_ptr[col_pos]; cnt += 1; } grad_offset[index] = val; } } #endif // DEFORM_CONV_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/deform_roi_pool_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef DEFORM_ROI_POOL_CUDA_KERNEL_CUH #define DEFORM_ROI_POOL_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif template __global__ void deform_roi_pool_forward_cuda_kernel( const int nthreads, const T* input, const T* rois, const T* offset, T* output, const int pooled_height, const int pooled_width, const T spatial_scale, const int sampling_ratio, const T gamma, const int channels, const int height, const int width) { CUDA_1D_KERNEL_LOOP(index, nthreads) { // (n, c, ph, pw) is an element in the pooled output int pw = index % pooled_width; int ph = (index / pooled_width) % pooled_height; int c = (index / pooled_width / pooled_height) % channels; int n = index / pooled_width / pooled_height / channels; const T* offset_rois = rois + n * 5; int roi_batch_ind = offset_rois[0]; // Do not using rounding; this implementation detail is critical T roi_start_w = offset_rois[1] * spatial_scale - 0.5; T roi_start_h = offset_rois[2] * spatial_scale - 0.5; T roi_end_w = offset_rois[3] * spatial_scale - 0.5; T roi_end_h = offset_rois[4] * spatial_scale - 0.5; T roi_width = roi_end_w - roi_start_w; T roi_height = roi_end_h - roi_start_h; T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); const T* offset_input = input + (roi_batch_ind * channels + c) * height * width; // We use roi_bin_grid to sample the grid and mimic integral int roi_bin_grid_h = (sampling_ratio > 0) ? sampling_ratio : static_cast(ceilf(roi_height / pooled_height)); int roi_bin_grid_w = (sampling_ratio > 0) ? sampling_ratio : static_cast(ceilf(roi_width / pooled_width)); // Compute roi offset if (offset != NULL) { const T* offset_cur_w = offset + n * pooled_width * pooled_height * 2 + ph * pooled_width + pw; T offset_roi_w = gamma * roi_width * offset_cur_w[0]; T offset_roi_h = gamma * roi_height * offset_cur_w[pooled_width * pooled_height]; roi_start_w += offset_roi_w; roi_start_h += offset_roi_h; } // We do average pooling inside a bin const T count = max(roi_bin_grid_h * roi_bin_grid_w, 1); T output_val = 0.; for (int iy = 0; iy < roi_bin_grid_h; iy++) { const T y = roi_start_h + ph * bin_size_h + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); for (int ix = 0; ix < roi_bin_grid_w; ix++) { const T x = roi_start_w + pw * bin_size_w + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); T val = bilinear_interpolate(offset_input, height, width, y, x, index); output_val += val; } } output[index] = output_val / count; } } template __global__ void deform_roi_pool_backward_cuda_kernel( const int nthreads, const T* grad_output, const T* input, const T* rois, const T* offset, T* grad_input, T* grad_offset, const int pooled_height, const int pooled_width, const T spatial_scale, const int sampling_ratio, const T gamma, const int channels, const int height, const int width) { CUDA_1D_KERNEL_LOOP(index, nthreads) { // (n, c, ph, pw) is an element in the pooled output int pw = index % pooled_width; int ph = (index / pooled_width) % pooled_height; int c = (index / pooled_width / pooled_height) % channels; int n = index / pooled_width / pooled_height / channels; const T* offset_rois = rois + n * 5; int roi_batch_ind = offset_rois[0]; const T* offset_input = input + ((roi_batch_ind * channels + c) * height * width); T* offset_grad_input = grad_input + ((roi_batch_ind * channels + c) * height * width); // Do not using rounding; this implementation detail is critical T roi_start_w = offset_rois[1] * spatial_scale - 0.5; T roi_start_h = offset_rois[2] * spatial_scale - 0.5; T roi_end_w = offset_rois[3] * spatial_scale - 0.5; T roi_end_h = offset_rois[4] * spatial_scale - 0.5; T roi_width = roi_end_w - roi_start_w; T roi_height = roi_end_h - roi_start_h; T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); // We use roi_bin_grid to sample the grid and mimic integral int roi_bin_grid_h = (sampling_ratio > 0) ? sampling_ratio : static_cast(ceilf(roi_height / pooled_height)); int roi_bin_grid_w = (sampling_ratio > 0) ? sampling_ratio : static_cast(ceilf(roi_width / pooled_width)); // Compute roi offset if (offset != NULL) { const T* offset_cur_w = offset + n * pooled_width * pooled_height * 2 + ph * pooled_width + pw; T offset_roi_w = gamma * roi_width * offset_cur_w[0]; T offset_roi_h = gamma * roi_height * offset_cur_w[pooled_width * pooled_height]; roi_start_w += offset_roi_w; roi_start_h += offset_roi_h; } // We do average (integral) pooling inside a bin const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 const T grad_output_this_bin = grad_output[index] / count; for (int iy = 0; iy < roi_bin_grid_h; iy++) { const T y = roi_start_h + ph * bin_size_h + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); for (int ix = 0; ix < roi_bin_grid_w; ix++) { const T x = roi_start_w + pw * bin_size_w + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); T w1, w2, w3, w4; int x_low, x_high, y_low, y_high; bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high, index); if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { atomicAdd(offset_grad_input + y_low * width + x_low, grad_output_this_bin * w1); atomicAdd(offset_grad_input + y_low * width + x_high, grad_output_this_bin * w2); atomicAdd(offset_grad_input + y_high * width + x_low, grad_output_this_bin * w3); atomicAdd(offset_grad_input + y_high * width + x_high, grad_output_this_bin * w4); if (offset != NULL) { T input_00 = offset_input[y_low * width + x_low]; T input_10 = offset_input[y_low * width + x_high]; T input_01 = offset_input[y_high * width + x_low]; T input_11 = offset_input[y_high * width + x_high]; T ogx = gamma * roi_width * grad_output_this_bin * (input_11 * (y - y_low) + input_10 * (y_high - y) + input_01 * (y_low - y) + input_00 * (y - y_high)); T ogy = gamma * roi_height * grad_output_this_bin * (input_11 * (x - x_low) + input_01 * (x_high - x) + input_10 * (x_low - x) + input_00 * (x - x_high)); atomicAdd(grad_offset + n * pooled_width * pooled_height * 2 + ph * pooled_width + pw, ogx); atomicAdd(grad_offset + n * pooled_width * pooled_height * 2 + pooled_width * pooled_height + ph * pooled_width + pw, ogy); } } } } } } #endif // DEFORM_ROI_POOL_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/furthest_point_sample_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef FURTHEST_POINT_SAMPLE_CUDA_KERNEL_CUH #define FURTHEST_POINT_SAMPLE_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif __device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i, int idx1, int idx2) { const float v1 = dists[idx1], v2 = dists[idx2]; const int i1 = dists_i[idx1], i2 = dists_i[idx2]; dists[idx1] = max(v1, v2); dists_i[idx1] = v2 > v1 ? i2 : i1; } template __global__ void furthest_point_sampling_forward_cuda_kernel( int b, int n, int m, const float *__restrict__ dataset, float *__restrict__ temp, int *__restrict__ idxs) { // dataset: (B, N, 3) // tmp: (B, N) // output: // idx: (B, M) if (m <= 0) return; __shared__ float dists[block_size]; __shared__ int dists_i[block_size]; int batch_index = blockIdx.x; dataset += batch_index * n * 3; temp += batch_index * n; idxs += batch_index * m; int tid = threadIdx.x; const int stride = block_size; int old = 0; if (threadIdx.x == 0) idxs[0] = old; __syncthreads(); for (int j = 1; j < m; j++) { int besti = 0; float best = -1; float x1 = dataset[old * 3 + 0]; float y1 = dataset[old * 3 + 1]; float z1 = dataset[old * 3 + 2]; for (int k = tid; k < n; k += stride) { float x2, y2, z2; x2 = dataset[k * 3 + 0]; y2 = dataset[k * 3 + 1]; z2 = dataset[k * 3 + 2]; // float mag = (x2 * x2) + (y2 * y2) + (z2 * z2); // if (mag <= 1e-3) // continue; float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1); float d2 = min(d, temp[k]); temp[k] = d2; besti = d2 > best ? k : besti; best = d2 > best ? d2 : best; } dists[tid] = best; dists_i[tid] = besti; __syncthreads(); #pragma unroll for (int block_size_thres = 1024; block_size_thres >= 2; block_size_thres >>= 1) { const int tid_thres = block_size_thres / 2; if (block_size >= block_size_thres && tid < tid_thres) { __update(dists, dists_i, tid, tid + tid_thres); } __syncthreads(); } old = dists_i[0]; if (tid == 0) idxs[j] = old; } } // Modified from // https://github.com/qiqihaer/3DSSD-pytorch/blob/master/lib/pointnet2/src/sampling_gpu.cu template __global__ void furthest_point_sampling_with_dist_forward_cuda_kernel( int b, int n, int m, const float *__restrict__ dataset, float *__restrict__ temp, int *__restrict__ idxs) { // dataset: (B, N, N) // tmp: (B, N) // output: // idx: (B, M) if (m <= 0) return; __shared__ float dists[block_size]; __shared__ int dists_i[block_size]; int batch_index = blockIdx.x; dataset += batch_index * n * n; temp += batch_index * n; idxs += batch_index * m; int tid = threadIdx.x; const int stride = block_size; int old = 0; if (threadIdx.x == 0) idxs[0] = old; __syncthreads(); for (int j = 1; j < m; j++) { int besti = 0; float best = -1; // float x1 = dataset[old * 3 + 0]; // float y1 = dataset[old * 3 + 1]; // float z1 = dataset[old * 3 + 2]; for (int k = tid; k < n; k += stride) { // float x2, y2, z2; // x2 = dataset[k * 3 + 0]; // y2 = dataset[k * 3 + 1]; // z2 = dataset[k * 3 + 2]; // float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * // (z2 - z1); float d = dataset[old * n + k]; float d2 = min(d, temp[k]); temp[k] = d2; besti = d2 > best ? k : besti; best = d2 > best ? d2 : best; } dists[tid] = best; dists_i[tid] = besti; __syncthreads(); #pragma unroll for (int block_size_thres = 1024; block_size_thres >= 2; block_size_thres >>= 1) { const int tid_thres = block_size_thres / 2; if (block_size >= block_size_thres && tid < tid_thres) { __update(dists, dists_i, tid, tid + tid_thres); } __syncthreads(); } old = dists_i[0]; if (tid == 0) idxs[j] = old; } } #endif // FURTHEST_POINT_SAMPLE_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/gather_points_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef GATHER_POINTS_CUDA_KERNEL_CUH #define GATHER_POINTS_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif #define TOTAL_THREADS 1024 template __global__ void gather_points_forward_cuda_kernel(int b, int c, int n, int m, const T *points, const int *__restrict__ idx, T *out) { // points: (B, C, N) // idx: (B, M) // output: // out: (B, C, M) int bs_idx = blockIdx.z; int c_idx = blockIdx.y; CUDA_1D_KERNEL_LOOP(pt_idx, m) { if (bs_idx >= b || c_idx >= c) return; out += bs_idx * c * m + c_idx * m + pt_idx; idx += bs_idx * m + pt_idx; points += bs_idx * c * n + c_idx * n; out[0] = points[idx[0]]; } } template __global__ void gather_points_backward_cuda_kernel(int b, int c, int n, int m, const T *grad_out, const int *__restrict__ idx, T *grad_points) { // grad_out: (B, C, M) // idx: (B, M) // output: // grad_points: (B, C, N) int bs_idx = blockIdx.z; int c_idx = blockIdx.y; CUDA_1D_KERNEL_LOOP(pt_idx, m) { if (bs_idx >= b || c_idx >= c) return; grad_out += bs_idx * c * m + c_idx * m + pt_idx; idx += bs_idx * m + pt_idx; grad_points += bs_idx * c * n + c_idx * n; atomicAdd(grad_points + idx[0], grad_out[0]); } } #endif // GATHER_POINTS_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/group_points_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved. // Modified from // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/group_points_gpu.cu #ifndef GROUP_POINTS_CUDA_KERNEL_CUH #define GROUP_POINTS_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif template __global__ void group_points_forward_cuda_kernel(int b, int c, int n, int npoints, int nsample, const T *points, const int *__restrict__ idx, T *out) { // points: (B, C, N) // idx: (B, npoints, nsample) // output: // out: (B, C, npoints, nsample) int bs_idx = blockIdx.z; int c_idx = blockIdx.y; CUDA_1D_KERNEL_LOOP(index, npoints * nsample) { if (bs_idx >= b || c_idx >= c) return; int pt_idx = index / nsample; int sample_idx = index % nsample; idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx; int in_idx = bs_idx * c * n + c_idx * n + idx[0]; int out_idx = bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx; out[out_idx] = points[in_idx]; } } template __global__ void group_points_backward_cuda_kernel(int b, int c, int n, int npoints, int nsample, const T *grad_out, const int *__restrict__ idx, T *grad_points) { // grad_out: (B, C, npoints, nsample) // idx: (B, npoints, nsample) // output: // grad_points: (B, C, N) int bs_idx = blockIdx.z; int c_idx = blockIdx.y; CUDA_1D_KERNEL_LOOP(index, npoints * nsample) { int pt_idx = index / nsample; if (bs_idx >= b || c_idx >= c) return; int sample_idx = index % nsample; grad_out += bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx; idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx; atomicAdd(grad_points + bs_idx * c * n + c_idx * n + idx[0], grad_out[0]); } } #endif // GROUP_POINTS_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/iou3d_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef IOU3D_CUDA_KERNEL_CUH #define IOU3D_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif const int THREADS_PER_BLOCK_IOU3D = 16; const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8; __device__ const float EPS = 1e-8; struct Point { float x, y; __device__ Point() {} __device__ Point(double _x, double _y) { x = _x, y = _y; } __device__ void set(float _x, float _y) { x = _x; y = _y; } __device__ Point operator+(const Point &b) const { return Point(x + b.x, y + b.y); } __device__ Point operator-(const Point &b) const { return Point(x - b.x, y - b.y); } }; __device__ inline float cross(const Point &a, const Point &b) { return a.x * b.y - a.y * b.x; } __device__ inline float cross(const Point &p1, const Point &p2, const Point &p0) { return (p1.x - p0.x) * (p2.y - p0.y) - (p2.x - p0.x) * (p1.y - p0.y); } __device__ int check_rect_cross(const Point &p1, const Point &p2, const Point &q1, const Point &q2) { int ret = min(p1.x, p2.x) <= max(q1.x, q2.x) && min(q1.x, q2.x) <= max(p1.x, p2.x) && min(p1.y, p2.y) <= max(q1.y, q2.y) && min(q1.y, q2.y) <= max(p1.y, p2.y); return ret; } __device__ inline int check_in_box2d(const float *box, const Point &p) { // params: box (5) [x1, y1, x2, y2, angle] const float MARGIN = 1e-5; float center_x = (box[0] + box[2]) / 2; float center_y = (box[1] + box[3]) / 2; float angle_cos = cos(-box[4]), angle_sin = sin(-box[4]); // rotate the point in the opposite direction of box float rot_x = (p.x - center_x) * angle_cos - (p.y - center_y) * angle_sin + center_x; float rot_y = (p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos + center_y; return (rot_x > box[0] - MARGIN && rot_x < box[2] + MARGIN && rot_y > box[1] - MARGIN && rot_y < box[3] + MARGIN); } __device__ inline int intersection(const Point &p1, const Point &p0, const Point &q1, const Point &q0, Point &ans_point) { // fast exclusion if (check_rect_cross(p0, p1, q0, q1) == 0) return 0; // check cross standing float s1 = cross(q0, p1, p0); float s2 = cross(p1, q1, p0); float s3 = cross(p0, q1, q0); float s4 = cross(q1, p1, q0); if (!(s1 * s2 > 0 && s3 * s4 > 0)) return 0; // calculate intersection of two lines float s5 = cross(q1, p1, p0); if (fabs(s5 - s1) > EPS) { ans_point.x = (s5 * q0.x - s1 * q1.x) / (s5 - s1); ans_point.y = (s5 * q0.y - s1 * q1.y) / (s5 - s1); } else { float a0 = p0.y - p1.y, b0 = p1.x - p0.x, c0 = p0.x * p1.y - p1.x * p0.y; float a1 = q0.y - q1.y, b1 = q1.x - q0.x, c1 = q0.x * q1.y - q1.x * q0.y; float D = a0 * b1 - a1 * b0; ans_point.x = (b0 * c1 - b1 * c0) / D; ans_point.y = (a1 * c0 - a0 * c1) / D; } return 1; } __device__ inline void rotate_around_center(const Point ¢er, const float angle_cos, const float angle_sin, Point &p) { float new_x = (p.x - center.x) * angle_cos - (p.y - center.y) * angle_sin + center.x; float new_y = (p.x - center.x) * angle_sin + (p.y - center.y) * angle_cos + center.y; p.set(new_x, new_y); } __device__ inline int point_cmp(const Point &a, const Point &b, const Point ¢er) { return atan2(a.y - center.y, a.x - center.x) > atan2(b.y - center.y, b.x - center.x); } __device__ inline float box_overlap(const float *box_a, const float *box_b) { // params: box_a (5) [x1, y1, x2, y2, angle] // params: box_b (5) [x1, y1, x2, y2, angle] float a_x1 = box_a[0], a_y1 = box_a[1], a_x2 = box_a[2], a_y2 = box_a[3], a_angle = box_a[4]; float b_x1 = box_b[0], b_y1 = box_b[1], b_x2 = box_b[2], b_y2 = box_b[3], b_angle = box_b[4]; Point center_a((a_x1 + a_x2) / 2, (a_y1 + a_y2) / 2); Point center_b((b_x1 + b_x2) / 2, (b_y1 + b_y2) / 2); Point box_a_corners[5]; box_a_corners[0].set(a_x1, a_y1); box_a_corners[1].set(a_x2, a_y1); box_a_corners[2].set(a_x2, a_y2); box_a_corners[3].set(a_x1, a_y2); Point box_b_corners[5]; box_b_corners[0].set(b_x1, b_y1); box_b_corners[1].set(b_x2, b_y1); box_b_corners[2].set(b_x2, b_y2); box_b_corners[3].set(b_x1, b_y2); // get oriented corners float a_angle_cos = cos(a_angle), a_angle_sin = sin(a_angle); float b_angle_cos = cos(b_angle), b_angle_sin = sin(b_angle); for (int k = 0; k < 4; k++) { rotate_around_center(center_a, a_angle_cos, a_angle_sin, box_a_corners[k]); rotate_around_center(center_b, b_angle_cos, b_angle_sin, box_b_corners[k]); } box_a_corners[4] = box_a_corners[0]; box_b_corners[4] = box_b_corners[0]; // get intersection of lines Point cross_points[16]; Point poly_center; int cnt = 0, flag = 0; poly_center.set(0, 0); for (int i = 0; i < 4; i++) { for (int j = 0; j < 4; j++) { flag = intersection(box_a_corners[i + 1], box_a_corners[i], box_b_corners[j + 1], box_b_corners[j], cross_points[cnt]); if (flag) { poly_center = poly_center + cross_points[cnt]; cnt++; } } } // check corners for (int k = 0; k < 4; k++) { if (check_in_box2d(box_a, box_b_corners[k])) { poly_center = poly_center + box_b_corners[k]; cross_points[cnt] = box_b_corners[k]; cnt++; } if (check_in_box2d(box_b, box_a_corners[k])) { poly_center = poly_center + box_a_corners[k]; cross_points[cnt] = box_a_corners[k]; cnt++; } } poly_center.x /= cnt; poly_center.y /= cnt; // sort the points of polygon Point temp; for (int j = 0; j < cnt - 1; j++) { for (int i = 0; i < cnt - j - 1; i++) { if (point_cmp(cross_points[i], cross_points[i + 1], poly_center)) { temp = cross_points[i]; cross_points[i] = cross_points[i + 1]; cross_points[i + 1] = temp; } } } // get the overlap areas float area = 0; for (int k = 0; k < cnt - 1; k++) { area += cross(cross_points[k] - cross_points[0], cross_points[k + 1] - cross_points[0]); } return fabs(area) / 2.0; } __device__ inline float iou_bev(const float *box_a, const float *box_b) { // params: box_a (5) [x1, y1, x2, y2, angle] // params: box_b (5) [x1, y1, x2, y2, angle] float sa = (box_a[2] - box_a[0]) * (box_a[3] - box_a[1]); float sb = (box_b[2] - box_b[0]) * (box_b[3] - box_b[1]); float s_overlap = box_overlap(box_a, box_b); return s_overlap / fmaxf(sa + sb - s_overlap, EPS); } __global__ void iou3d_boxes_overlap_bev_forward_cuda_kernel( const int num_a, const float *boxes_a, const int num_b, const float *boxes_b, float *ans_overlap) { CUDA_2D_KERNEL_LOOP(b_idx, num_b, a_idx, num_a) { if (a_idx >= num_a || b_idx >= num_b) { return; } const float *cur_box_a = boxes_a + a_idx * 5; const float *cur_box_b = boxes_b + b_idx * 5; float s_overlap = box_overlap(cur_box_a, cur_box_b); ans_overlap[a_idx * num_b + b_idx] = s_overlap; } } __global__ void iou3d_boxes_iou_bev_forward_cuda_kernel(const int num_a, const float *boxes_a, const int num_b, const float *boxes_b, float *ans_iou) { CUDA_2D_KERNEL_LOOP(b_idx, num_b, a_idx, num_a) { if (a_idx >= num_a || b_idx >= num_b) { return; } const float *cur_box_a = boxes_a + a_idx * 5; const float *cur_box_b = boxes_b + b_idx * 5; float cur_iou_bev = iou_bev(cur_box_a, cur_box_b); ans_iou[a_idx * num_b + b_idx] = cur_iou_bev; } } __global__ void nms_forward_cuda_kernel(const int boxes_num, const float nms_overlap_thresh, const float *boxes, unsigned long long *mask) { // params: boxes (N, 5) [x1, y1, x2, y2, ry] // params: mask (N, N/THREADS_PER_BLOCK_NMS) const int blocks = (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS; CUDA_2D_KERNEL_BLOCK_LOOP(col_start, blocks, row_start, blocks) { // if (row_start > col_start) return; const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS); const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS); __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 5]; if (threadIdx.x < col_size) { block_boxes[threadIdx.x * 5 + 0] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 0]; block_boxes[threadIdx.x * 5 + 1] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 1]; block_boxes[threadIdx.x * 5 + 2] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 2]; block_boxes[threadIdx.x * 5 + 3] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 3]; block_boxes[threadIdx.x * 5 + 4] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 4]; } __syncthreads(); if (threadIdx.x < row_size) { const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x; const float *cur_box = boxes + cur_box_idx * 5; int i = 0; unsigned long long t = 0; int start = 0; if (row_start == col_start) { start = threadIdx.x + 1; } for (i = start; i < col_size; i++) { if (iou_bev(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { t |= 1ULL << i; } } const int col_blocks = (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS; mask[cur_box_idx * col_blocks + col_start] = t; } } } __device__ inline float iou_normal(float const *const a, float const *const b) { float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]); float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]); float width = fmaxf(right - left, 0.f), height = fmaxf(bottom - top, 0.f); float interS = width * height; float Sa = (a[2] - a[0]) * (a[3] - a[1]); float Sb = (b[2] - b[0]) * (b[3] - b[1]); return interS / fmaxf(Sa + Sb - interS, EPS); } __global__ void nms_normal_forward_cuda_kernel(const int boxes_num, const float nms_overlap_thresh, const float *boxes, unsigned long long *mask) { // params: boxes (N, 5) [x1, y1, x2, y2, ry] // params: mask (N, N/THREADS_PER_BLOCK_NMS) const int blocks = (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS; CUDA_2D_KERNEL_BLOCK_LOOP(col_start, blocks, row_start, blocks) { // if (row_start > col_start) return; const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS); const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS); __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 5]; if (threadIdx.x < col_size) { block_boxes[threadIdx.x * 5 + 0] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 0]; block_boxes[threadIdx.x * 5 + 1] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 1]; block_boxes[threadIdx.x * 5 + 2] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 2]; block_boxes[threadIdx.x * 5 + 3] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 3]; block_boxes[threadIdx.x * 5 + 4] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 4]; } __syncthreads(); if (threadIdx.x < row_size) { const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x; const float *cur_box = boxes + cur_box_idx * 5; int i = 0; unsigned long long t = 0; int start = 0; if (row_start == col_start) { start = threadIdx.x + 1; } for (i = start; i < col_size; i++) { if (iou_normal(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { t |= 1ULL << i; } } const int col_blocks = (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS; mask[cur_box_idx * col_blocks + col_start] = t; } } } #endif // IOU3D_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/knn_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved // Modified from // https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/pointops/src/knnquery_heap #ifndef KNN_CUDA_KERNEL_CUH #define KNN_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif inline __device__ void swap_float(float *x, float *y) { float tmp = *x; *x = *y; *y = tmp; } inline __device__ void swap_int(int *x, int *y) { int tmp = *x; *x = *y; *y = tmp; } __device__ void reheap(float *dist, int *idx, int k) { int root = 0; int child = root * 2 + 1; while (child < k) { if (child + 1 < k && dist[child + 1] > dist[child]) child++; if (dist[root] > dist[child]) return; swap_float(&dist[root], &dist[child]); swap_int(&idx[root], &idx[child]); root = child; child = root * 2 + 1; } } __device__ void heap_sort(float *dist, int *idx, int k) { int i; for (i = k - 1; i > 0; i--) { swap_float(&dist[0], &dist[i]); swap_int(&idx[0], &idx[i]); reheap(dist, idx, i); } } // input: xyz (b, n, 3) new_xyz (b, m, 3) // output: idx (b, m, nsample) dist2 (b, m, nsample) template __global__ void knn_forward_cuda_kernel(int b, int n, int m, int nsample, const T *xyz, const T *new_xyz, int *__restrict__ idx, T *dist2) { int bs_idx = blockIdx.y; CUDA_1D_KERNEL_LOOP(pt_idx, m) { if (bs_idx >= b) return; new_xyz += bs_idx * m * 3 + pt_idx * 3; xyz += bs_idx * n * 3; idx += bs_idx * m * nsample + pt_idx * nsample; dist2 += bs_idx * m * nsample + pt_idx * nsample; T new_x = new_xyz[0]; T new_y = new_xyz[1]; T new_z = new_xyz[2]; float best_dist[100]; int best_idx[100]; for (int i = 0; i < nsample; i++) { best_dist[i] = 1e10; best_idx[i] = 0; } for (int i = 0; i < n; i++) { T x = xyz[i * 3 + 0]; T y = xyz[i * 3 + 1]; T z = xyz[i * 3 + 2]; T d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); if (d2 < best_dist[0]) { best_dist[0] = d2; best_idx[0] = i; reheap(best_dist, best_idx, nsample); } } heap_sort(best_dist, best_idx, nsample); for (int i = 0; i < nsample; i++) { idx[i] = best_idx[i]; dist2[i] = best_dist[i]; } } } #endif // KNN_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/masked_conv2d_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef MASKED_CONV2D_CUDA_KERNEL_CUH #define MASKED_CONV2D_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif template __global__ void MaskedIm2colForward(const int n, const scalar_t *data_im, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int64_t *mask_h_idx, const int64_t *mask_w_idx, const int mask_cnt, scalar_t *data_col) { // mask_cnt * channels CUDA_1D_KERNEL_LOOP(index, n) { const int m_index = index % mask_cnt; const int h_col = mask_h_idx[m_index]; const int w_col = mask_w_idx[m_index]; const int c_im = index / mask_cnt; const int c_col = c_im * kernel_h * kernel_w; const int h_offset = h_col - pad_h; const int w_offset = w_col - pad_w; scalar_t *data_col_ptr = data_col + c_col * mask_cnt + m_index; for (int i = 0; i < kernel_h; ++i) { int h_im = h_offset + i; for (int j = 0; j < kernel_w; ++j) { int w_im = w_offset + j; if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) { *data_col_ptr = (scalar_t)data_im[(c_im * height + h_im) * width + w_im]; } else { *data_col_ptr = 0.0; } data_col_ptr += mask_cnt; } } } } template __global__ void MaskedCol2imForward(const int n, const scalar_t *data_col, const int height, const int width, const int channels, const int64_t *mask_h_idx, const int64_t *mask_w_idx, const int mask_cnt, scalar_t *data_im) { CUDA_1D_KERNEL_LOOP(index, n) { const int m_index = index % mask_cnt; const int h_im = mask_h_idx[m_index]; const int w_im = mask_w_idx[m_index]; const int c_im = index / mask_cnt; // compute the start and end of the output data_im[(c_im * height + h_im) * width + w_im] = data_col[index]; } } #endif // MASKED_CONV2D_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/min_area_polygons_cuda.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef MIN_AREA_POLYGONS_CUDA_KERNEL_CUH #define MIN_AREA_POLYGONS_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif #define MAXN 20 __device__ const float PI = 3.1415926; struct Point { float x, y; __device__ Point() {} __device__ Point(float x, float y) : x(x), y(y) {} }; __device__ inline void swap1(Point *a, Point *b) { Point temp; temp.x = a->x; temp.y = a->y; a->x = b->x; a->y = b->y; b->x = temp.x; b->y = temp.y; } __device__ inline float cross(Point o, Point a, Point b) { return (a.x - o.x) * (b.y - o.y) - (b.x - o.x) * (a.y - o.y); } __device__ inline float dis(Point a, Point b) { return (a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y); } __device__ inline void minBoundingRect(Point *ps, int n_points, float *minbox) { float convex_points[2][MAXN]; for (int j = 0; j < n_points; j++) { convex_points[0][j] = ps[j].x; } for (int j = 0; j < n_points; j++) { convex_points[1][j] = ps[j].y; } Point edges[MAXN]; float edges_angles[MAXN]; float unique_angles[MAXN]; int n_edges = n_points - 1; int n_unique = 0; int unique_flag = 0; for (int i = 0; i < n_edges; i++) { edges[i].x = ps[i + 1].x - ps[i].x; edges[i].y = ps[i + 1].y - ps[i].y; } for (int i = 0; i < n_edges; i++) { edges_angles[i] = atan2((double)edges[i].y, (double)edges[i].x); if (edges_angles[i] >= 0) { edges_angles[i] = fmod((double)edges_angles[i], (double)PI / 2); } else { edges_angles[i] = edges_angles[i] - (int)(edges_angles[i] / (PI / 2) - 1) * (PI / 2); } } unique_angles[0] = edges_angles[0]; n_unique += 1; for (int i = 1; i < n_edges; i++) { for (int j = 0; j < n_unique; j++) { if (edges_angles[i] == unique_angles[j]) { unique_flag += 1; } } if (unique_flag == 0) { unique_angles[n_unique] = edges_angles[i]; n_unique += 1; unique_flag = 0; } else { unique_flag = 0; } } float minarea = 1e12; for (int i = 0; i < n_unique; i++) { float R[2][2]; float rot_points[2][MAXN]; R[0][0] = cos(unique_angles[i]); R[0][1] = sin(unique_angles[i]); R[1][0] = -sin(unique_angles[i]); R[1][1] = cos(unique_angles[i]); // R x Points for (int m = 0; m < 2; m++) { for (int n = 0; n < n_points; n++) { float sum = 0.0; for (int k = 0; k < 2; k++) { sum = sum + R[m][k] * convex_points[k][n]; } rot_points[m][n] = sum; } } // xmin; float xmin, ymin, xmax, ymax; xmin = 1e12; for (int j = 0; j < n_points; j++) { if (isinf(rot_points[0][j]) || isnan(rot_points[0][j])) { continue; } else { if (rot_points[0][j] < xmin) { xmin = rot_points[0][j]; } } } // ymin ymin = 1e12; for (int j = 0; j < n_points; j++) { if (isinf(rot_points[1][j]) || isnan(rot_points[1][j])) { continue; } else { if (rot_points[1][j] < ymin) { ymin = rot_points[1][j]; } } } // xmax xmax = -1e12; for (int j = 0; j < n_points; j++) { if (isinf(rot_points[0][j]) || isnan(rot_points[0][j])) { continue; } else { if (rot_points[0][j] > xmax) { xmax = rot_points[0][j]; } } } // ymax ymax = -1e12; for (int j = 0; j < n_points; j++) { if (isinf(rot_points[1][j]) || isnan(rot_points[1][j])) { continue; } else { if (rot_points[1][j] > ymax) { ymax = rot_points[1][j]; } } } float area = (xmax - xmin) * (ymax - ymin); if (area < minarea) { minarea = area; minbox[0] = unique_angles[i]; minbox[1] = xmin; minbox[2] = ymin; minbox[3] = xmax; minbox[4] = ymax; } } } // convex_find __device__ inline void Jarvis(Point *in_poly, int &n_poly) { int n_input = n_poly; Point input_poly[20]; for (int i = 0; i < n_input; i++) { input_poly[i].x = in_poly[i].x; input_poly[i].y = in_poly[i].y; } Point p_max, p_k; int max_index, k_index; int Stack[20], top1, top2; // float sign; double sign; Point right_point[10], left_point[10]; for (int i = 0; i < n_poly; i++) { if (in_poly[i].y < in_poly[0].y || in_poly[i].y == in_poly[0].y && in_poly[i].x < in_poly[0].x) { Point *j = &(in_poly[0]); Point *k = &(in_poly[i]); swap1(j, k); } if (i == 0) { p_max = in_poly[0]; max_index = 0; } if (in_poly[i].y > p_max.y || in_poly[i].y == p_max.y && in_poly[i].x > p_max.x) { p_max = in_poly[i]; max_index = i; } } if (max_index == 0) { max_index = 1; p_max = in_poly[max_index]; } k_index = 0, Stack[0] = 0, top1 = 0; while (k_index != max_index) { p_k = p_max; k_index = max_index; for (int i = 1; i < n_poly; i++) { sign = cross(in_poly[Stack[top1]], in_poly[i], p_k); if ((sign > 0) || ((sign == 0) && (dis(in_poly[Stack[top1]], in_poly[i]) > dis(in_poly[Stack[top1]], p_k)))) { p_k = in_poly[i]; k_index = i; } } top1++; Stack[top1] = k_index; } for (int i = 0; i <= top1; i++) { right_point[i] = in_poly[Stack[i]]; } k_index = 0, Stack[0] = 0, top2 = 0; while (k_index != max_index) { p_k = p_max; k_index = max_index; for (int i = 1; i < n_poly; i++) { sign = cross(in_poly[Stack[top2]], in_poly[i], p_k); if ((sign < 0) || (sign == 0) && (dis(in_poly[Stack[top2]], in_poly[i]) > dis(in_poly[Stack[top2]], p_k))) { p_k = in_poly[i]; k_index = i; } } top2++; Stack[top2] = k_index; } for (int i = top2 - 1; i >= 0; i--) { left_point[i] = in_poly[Stack[i]]; } for (int i = 0; i < top1 + top2; i++) { if (i <= top1) { in_poly[i] = right_point[i]; } else { in_poly[i] = left_point[top2 - (i - top1)]; } } n_poly = top1 + top2; } template __device__ inline void Findminbox(T const *const p, T *minpoints) { Point ps1[MAXN]; Point convex[MAXN]; for (int i = 0; i < 9; i++) { convex[i].x = p[i * 2]; convex[i].y = p[i * 2 + 1]; } int n_convex = 9; Jarvis(convex, n_convex); int n1 = n_convex; for (int i = 0; i < n1; i++) { ps1[i].x = convex[i].x; ps1[i].y = convex[i].y; } ps1[n1].x = convex[0].x; ps1[n1].y = convex[0].y; float minbbox[5] = {0}; minBoundingRect(ps1, n1 + 1, minbbox); float angle = minbbox[0]; float xmin = minbbox[1]; float ymin = minbbox[2]; float xmax = minbbox[3]; float ymax = minbbox[4]; float R[2][2]; R[0][0] = cos(angle); R[0][1] = sin(angle); R[1][0] = -sin(angle); R[1][1] = cos(angle); minpoints[0] = xmax * R[0][0] + ymin * R[1][0]; minpoints[1] = xmax * R[0][1] + ymin * R[1][1]; minpoints[2] = xmin * R[0][0] + ymin * R[1][0]; minpoints[3] = xmin * R[0][1] + ymin * R[1][1]; minpoints[4] = xmin * R[0][0] + ymax * R[1][0]; minpoints[5] = xmin * R[0][1] + ymax * R[1][1]; minpoints[6] = xmax * R[0][0] + ymax * R[1][0]; minpoints[7] = xmax * R[0][1] + ymax * R[1][1]; } template __global__ void min_area_polygons_cuda_kernel(const int ex_n_boxes, const T *ex_boxes, T *minbox) { CUDA_1D_KERNEL_LOOP(index, ex_n_boxes) { const T *cur_box = ex_boxes + index * 18; T *cur_min_box = minbox + index * 8; Findminbox(cur_box, cur_min_box); } } #endif // MIN_AREA_POLYGONS_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/modulated_deform_conv_cuda_kernel.cuh ================================================ /*! ******************* BEGIN Caffe Copyright Notice and Disclaimer ***************** * * COPYRIGHT * * All contributions by the University of California: * Copyright (c) 2014-2017 The Regents of the University of California (Regents) * All rights reserved. * * All other contributions: * Copyright (c) 2014-2017, the respective contributors * All rights reserved. * * Caffe uses a shared copyright model: each contributor holds copyright over * their contributions to Caffe. The project versioning records all such * contribution and copyright details. If a contributor wants to further mark * their specific copyright on a particular contribution, they should indicate * their copyright solely in the commit message of the change when it is * committed. * * LICENSE * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, *this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" *AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE *FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL *DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR *SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER *CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, *OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE *OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * CONTRIBUTION AGREEMENT * * By contributing to the BVLC/caffe repository through pull-request, comment, * or otherwise, the contributor releases their content to the * license and copyright terms herein. * ***************** END Caffe Copyright Notice and Disclaimer ********************* * * Copyright (c) 2018 Microsoft * Licensed under The MIT License [see LICENSE for details] * \file modulated_deformable_im2col.cuh * \brief Function definitions of converting an image to * column matrix based on kernel, padding, dilation, and offset. * These functions are mainly used in deformable convolution operators. * \ref: https://arxiv.org/abs/1703.06211 * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng */ // modified from // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu #ifndef MODULATED_DEFORM_CONV_CUDA_KERNEL_CUH #define MODULATED_DEFORM_CONV_CUDA_KERNEL_CUH #include #ifdef MMCV_WITH_TRT #include "common_cuda_helper.hpp" #else // MMCV_WITH_TRT #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else // MMCV_USE_PARROTS #include "pytorch_cuda_helper.hpp" #endif // MMCV_USE_PARROTS #endif // MMCV_WITH_TRT template __device__ T dmcn_im2col_bilinear(const T *input, const int data_width, const int height, const int width, T h, T w) { int h_low = floorf(h); int w_low = floorf(w); int h_high = h_low + 1; int w_high = w_low + 1; T lh = h - h_low; T lw = w - w_low; T hh = 1 - lh, hw = 1 - lw; T v1 = 0; if (h_low >= 0 && w_low >= 0) v1 = input[h_low * data_width + w_low]; T v2 = 0; if (h_low >= 0 && w_high <= width - 1) v2 = input[h_low * data_width + w_high]; T v3 = 0; if (h_high <= height - 1 && w_low >= 0) v3 = input[h_high * data_width + w_low]; T v4 = 0; if (h_high <= height - 1 && w_high <= width - 1) v4 = input[h_high * data_width + w_high]; T w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); return val; } template __device__ T dmcn_get_gradient_weight(T argmax_h, T argmax_w, const int h, const int w, const int height, const int width) { if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) { // empty return 0; } int argmax_h_low = floorf(argmax_h); int argmax_w_low = floorf(argmax_w); int argmax_h_high = argmax_h_low + 1; int argmax_w_high = argmax_w_low + 1; T weight = 0; if (h == argmax_h_low && w == argmax_w_low) weight = (h + 1 - argmax_h) * (w + 1 - argmax_w); if (h == argmax_h_low && w == argmax_w_high) weight = (h + 1 - argmax_h) * (argmax_w + 1 - w); if (h == argmax_h_high && w == argmax_w_low) weight = (argmax_h + 1 - h) * (w + 1 - argmax_w); if (h == argmax_h_high && w == argmax_w_high) weight = (argmax_h + 1 - h) * (argmax_w + 1 - w); return weight; } template __device__ T dmcn_get_coordinate_weight(T argmax_h, T argmax_w, const int height, const int width, const T *im_data, const int data_width, const int bp_dir) { if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) { // empty return 0; } int argmax_h_low = floorf(argmax_h); int argmax_w_low = floorf(argmax_w); int argmax_h_high = argmax_h_low + 1; int argmax_w_high = argmax_w_low + 1; T weight = 0; if (bp_dir == 0) { if (argmax_h_low >= 0 && argmax_w_low >= 0) weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low]; if (argmax_h_low >= 0 && argmax_w_high <= width - 1) weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high]; if (argmax_h_high <= height - 1 && argmax_w_low >= 0) weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low]; if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high]; } else if (bp_dir == 1) { if (argmax_h_low >= 0 && argmax_w_low >= 0) weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low]; if (argmax_h_low >= 0 && argmax_w_high <= width - 1) weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high]; if (argmax_h_high <= height - 1 && argmax_w_low >= 0) weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low]; if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high]; } return weight; } template __global__ void modulated_deformable_im2col_gpu_kernel( const int n, const T *data_im, const T *data_offset, const T *data_mask, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int channel_per_deformable_group, const int batch_size, const int num_channels, const int deformable_group, const int height_col, const int width_col, T *data_col) { CUDA_1D_KERNEL_LOOP(index, n) { // index index of output matrix const int w_col = index % width_col; const int h_col = (index / width_col) % height_col; const int b_col = (index / width_col / height_col) % batch_size; const int c_im = (index / width_col / height_col) / batch_size; const int c_col = c_im * kernel_h * kernel_w; // compute deformable group index const int deformable_group_index = c_im / channel_per_deformable_group; const int h_in = h_col * stride_h - pad_h; const int w_in = w_col * stride_w - pad_w; T *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col; const T *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width; const T *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; const T *data_mask_ptr = data_mask + (b_col * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col; for (int i = 0; i < kernel_h; ++i) { for (int j = 0; j < kernel_w; ++j) { const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col; const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col; const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_col) * width_col + w_col; const T offset_h = data_offset_ptr[data_offset_h_ptr]; const T offset_w = data_offset_ptr[data_offset_w_ptr]; const T mask = data_mask_ptr[data_mask_hw_ptr]; T val = static_cast(0); const T h_im = h_in + i * dilation_h + offset_h; const T w_im = w_in + j * dilation_w + offset_w; if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) val = dmcn_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im); *data_col_ptr = val * mask; data_col_ptr += batch_size * height_col * width_col; } } } } template __global__ void modulated_deformable_col2im_gpu_kernel( const int n, const T *data_col, const T *data_offset, const T *data_mask, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int channel_per_deformable_group, const int batch_size, const int deformable_group, const int height_col, const int width_col, T *grad_im) { CUDA_1D_KERNEL_LOOP(index, n) { const int j = (index / width_col / height_col / batch_size) % kernel_w; const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h; const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h; // compute the start and end of the output const int deformable_group_index = c / channel_per_deformable_group; int w_out = index % width_col; int h_out = (index / width_col) % height_col; int b = (index / width_col / height_col) % batch_size; int w_in = w_out * stride_w - pad_w; int h_in = h_out * stride_h - pad_h; const T *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; const T *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col; const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out; const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out; const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_out) * width_col + w_out; const T offset_h = data_offset_ptr[data_offset_h_ptr]; const T offset_w = data_offset_ptr[data_offset_w_ptr]; const T mask = data_mask_ptr[data_mask_hw_ptr]; const T cur_inv_h_data = h_in + i * dilation_h + offset_h; const T cur_inv_w_data = w_in + j * dilation_w + offset_w; const T cur_top_grad = data_col[index] * mask; const int cur_h = (int)cur_inv_h_data; const int cur_w = (int)cur_inv_w_data; for (int dy = -2; dy <= 2; dy++) { for (int dx = -2; dx <= 2; dx++) { if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 && cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 && abs(cur_inv_w_data - (cur_w + dx)) < 1) { int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx; T weight = dmcn_get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width); atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad); } } } } } template __global__ void modulated_deformable_col2im_coord_gpu_kernel( const int n, const T *data_col, const T *data_im, const T *data_offset, const T *data_mask, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int channel_per_deformable_group, const int batch_size, const int offset_channels, const int deformable_group, const int height_col, const int width_col, T *grad_offset, T *grad_mask) { CUDA_1D_KERNEL_LOOP(index, n) { T val = 0, mval = 0; int w = index % width_col; int h = (index / width_col) % height_col; int c = (index / width_col / height_col) % offset_channels; int b = (index / width_col / height_col) / offset_channels; // compute the start and end of the output const int deformable_group_index = c / (2 * kernel_h * kernel_w); const int col_step = kernel_h * kernel_w; int cnt = 0; const T *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group * batch_size * width_col * height_col; const T *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) * channel_per_deformable_group / kernel_h / kernel_w * height * width; const T *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; const T *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col; const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w; for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step) { const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w; const int bp_dir = offset_c % 2; int j = (col_pos / width_col / height_col / batch_size) % kernel_w; int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h; int w_out = col_pos % width_col; int h_out = (col_pos / width_col) % height_col; int w_in = w_out * stride_w - pad_w; int h_in = h_out * stride_h - pad_h; const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out); const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out); const int data_mask_hw_ptr = (((i * kernel_w + j) * height_col + h_out) * width_col + w_out); const T offset_h = data_offset_ptr[data_offset_h_ptr]; const T offset_w = data_offset_ptr[data_offset_w_ptr]; const T mask = data_mask_ptr[data_mask_hw_ptr]; T inv_h = h_in + i * dilation_h + offset_h; T inv_w = w_in + j * dilation_w + offset_w; if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) inv_h = inv_w = -2; else mval += data_col_ptr[col_pos] * dmcn_im2col_bilinear(data_im_ptr + cnt * height * width, width, height, width, inv_h, inv_w); const T weight = dmcn_get_coordinate_weight( inv_h, inv_w, height, width, data_im_ptr + cnt * height * width, width, bp_dir); val += weight * data_col_ptr[col_pos] * mask; cnt += 1; } // KERNEL_ASSIGN(grad_offset[index], offset_req, val); grad_offset[index] = val; if (offset_c % 2 == 0) // KERNEL_ASSIGN(grad_mask[(((b * deformable_group + // deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * // height_col + h) * width_col + w], mask_req, mval); grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w] = mval; } } #endif // MODULATED_DEFORM_CONV_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/ms_deform_attn_cuda_kernel.cuh ================================================ /*! ************************************************************************************************** * Deformable DETR * Copyright (c) 2020 SenseTime. All Rights Reserved. * Licensed under the Apache License, Version 2.0 [see LICENSE for details] ************************************************************************************************** * Modified from *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 ************************************************************************************************** */ #ifndef DEFORM_ATTN_CUDA_KERNEL #define DEFORM_ATTN_CUDA_KERNEL #include "common_cuda_helper.hpp" #include "pytorch_cuda_helper.hpp" const int CUDA_NUM_THREADS = 1024; template __device__ scalar_t ms_deform_attn_im2col_bilinear( const scalar_t *&bottom_data, const int &height, const int &width, const int &nheads, const int &channels, const scalar_t &h, const scalar_t &w, const int &m, const int &c) { const int h_low = floorf(h); const int w_low = floorf(w); const int h_high = h_low + 1; const int w_high = w_low + 1; const scalar_t lh = h - h_low; const scalar_t lw = w - w_low; const scalar_t hh = 1 - lh, hw = 1 - lw; const int w_stride = nheads * channels; const int h_stride = width * w_stride; const int h_low_ptr_offset = h_low * h_stride; const int h_high_ptr_offset = h_low_ptr_offset + h_stride; const int w_low_ptr_offset = w_low * w_stride; const int w_high_ptr_offset = w_low_ptr_offset + w_stride; const int base_ptr = m * channels + c; scalar_t v1 = 0; if (h_low >= 0 && w_low >= 0) { const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr; v1 = bottom_data[ptr1]; } scalar_t v2 = 0; if (h_low >= 0 && w_high <= width - 1) { const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr; v2 = bottom_data[ptr2]; } scalar_t v3 = 0; if (h_high <= height - 1 && w_low >= 0) { const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr; v3 = bottom_data[ptr3]; } scalar_t v4 = 0; if (h_high <= height - 1 && w_high <= width - 1) { const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr; v4 = bottom_data[ptr4]; } const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); return val; } template __device__ void ms_deform_attn_col2im_bilinear( const scalar_t *&bottom_data, const int &height, const int &width, const int &nheads, const int &channels, const scalar_t &h, const scalar_t &w, const int &m, const int &c, const scalar_t &top_grad, const scalar_t &attn_weight, scalar_t *&grad_value, scalar_t *grad_sampling_loc, scalar_t *grad_attn_weight) { const int h_low = floorf(h); const int w_low = floorf(w); const int h_high = h_low + 1; const int w_high = w_low + 1; const scalar_t lh = h - h_low; const scalar_t lw = w - w_low; const scalar_t hh = 1 - lh, hw = 1 - lw; const int w_stride = nheads * channels; const int h_stride = width * w_stride; const int h_low_ptr_offset = h_low * h_stride; const int h_high_ptr_offset = h_low_ptr_offset + h_stride; const int w_low_ptr_offset = w_low * w_stride; const int w_high_ptr_offset = w_low_ptr_offset + w_stride; const int base_ptr = m * channels + c; const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; const scalar_t top_grad_value = top_grad * attn_weight; scalar_t grad_h_weight = 0, grad_w_weight = 0; scalar_t v1 = 0; if (h_low >= 0 && w_low >= 0) { const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr; v1 = bottom_data[ptr1]; grad_h_weight -= hw * v1; grad_w_weight -= hh * v1; atomicAdd(grad_value + ptr1, w1 * top_grad_value); } scalar_t v2 = 0; if (h_low >= 0 && w_high <= width - 1) { const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr; v2 = bottom_data[ptr2]; grad_h_weight -= lw * v2; grad_w_weight += hh * v2; atomicAdd(grad_value + ptr2, w2 * top_grad_value); } scalar_t v3 = 0; if (h_high <= height - 1 && w_low >= 0) { const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr; v3 = bottom_data[ptr3]; grad_h_weight += hw * v3; grad_w_weight -= lh * v3; atomicAdd(grad_value + ptr3, w3 * top_grad_value); } scalar_t v4 = 0; if (h_high <= height - 1 && w_high <= width - 1) { const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr; v4 = bottom_data[ptr4]; grad_h_weight += lw * v4; grad_w_weight += lh * v4; atomicAdd(grad_value + ptr4, w4 * top_grad_value); } const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); *grad_attn_weight = top_grad * val; *grad_sampling_loc = width * grad_w_weight * top_grad_value; *(grad_sampling_loc + 1) = height * grad_h_weight * top_grad_value; } template __device__ void ms_deform_attn_col2im_bilinear_gm( const scalar_t *&bottom_data, const int &height, const int &width, const int &nheads, const int &channels, const scalar_t &h, const scalar_t &w, const int &m, const int &c, const scalar_t &top_grad, const scalar_t &attn_weight, scalar_t *&grad_value, scalar_t *grad_sampling_loc, scalar_t *grad_attn_weight) { const int h_low = floorf(h); const int w_low = floorf(w); const int h_high = h_low + 1; const int w_high = w_low + 1; const scalar_t lh = h - h_low; const scalar_t lw = w - w_low; const scalar_t hh = 1 - lh, hw = 1 - lw; const int w_stride = nheads * channels; const int h_stride = width * w_stride; const int h_low_ptr_offset = h_low * h_stride; const int h_high_ptr_offset = h_low_ptr_offset + h_stride; const int w_low_ptr_offset = w_low * w_stride; const int w_high_ptr_offset = w_low_ptr_offset + w_stride; const int base_ptr = m * channels + c; const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; const scalar_t top_grad_value = top_grad * attn_weight; scalar_t grad_h_weight = 0, grad_w_weight = 0; scalar_t v1 = 0; if (h_low >= 0 && w_low >= 0) { const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr; v1 = bottom_data[ptr1]; grad_h_weight -= hw * v1; grad_w_weight -= hh * v1; atomicAdd(grad_value + ptr1, w1 * top_grad_value); } scalar_t v2 = 0; if (h_low >= 0 && w_high <= width - 1) { const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr; v2 = bottom_data[ptr2]; grad_h_weight -= lw * v2; grad_w_weight += hh * v2; atomicAdd(grad_value + ptr2, w2 * top_grad_value); } scalar_t v3 = 0; if (h_high <= height - 1 && w_low >= 0) { const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr; v3 = bottom_data[ptr3]; grad_h_weight += hw * v3; grad_w_weight -= lh * v3; atomicAdd(grad_value + ptr3, w3 * top_grad_value); } scalar_t v4 = 0; if (h_high <= height - 1 && w_high <= width - 1) { const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr; v4 = bottom_data[ptr4]; grad_h_weight += lw * v4; grad_w_weight += lh * v4; atomicAdd(grad_value + ptr4, w4 * top_grad_value); } const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); atomicAdd(grad_attn_weight, top_grad * val); atomicAdd(grad_sampling_loc, width * grad_w_weight * top_grad_value); atomicAdd(grad_sampling_loc + 1, height * grad_h_weight * top_grad_value); } template __global__ void ms_deformable_im2col_gpu_kernel( const int n, const scalar_t *data_value, const int64_t *data_spatial_shapes, const int64_t *data_level_start_index, const scalar_t *data_sampling_loc, const scalar_t *data_attn_weight, const int batch_size, const int spatial_size, const int num_heads, const int channels, const int num_levels, const int num_query, const int num_point, scalar_t *data_col) { CUDA_1D_KERNEL_LOOP(index, n) { int _temp = index; const int c_col = _temp % channels; _temp /= channels; const int sampling_index = _temp; const int m_col = _temp % num_heads; _temp /= num_heads; _temp /= num_query; const int b_col = _temp; scalar_t *data_col_ptr = data_col + index; int data_weight_ptr = sampling_index * num_levels * num_point; int data_loc_w_ptr = data_weight_ptr << 1; const int qid_stride = num_heads * channels; const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; scalar_t col = 0; for (int l_col = 0; l_col < num_levels; ++l_col) { const int level_start_id = data_level_start_index[l_col]; const int spatial_h_ptr = l_col << 1; const int spatial_h = data_spatial_shapes[spatial_h_ptr]; const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; const scalar_t *data_value_ptr = data_value + (data_value_ptr_init_offset + level_start_id * qid_stride); for (int p_col = 0; p_col < num_point; ++p_col) { const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; const scalar_t weight = data_attn_weight[data_weight_ptr]; const scalar_t h_im = loc_h * spatial_h - 0.5; const scalar_t w_im = loc_w * spatial_w - 0.5; if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) { col += ms_deform_attn_im2col_bilinear(data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col) * weight; } data_weight_ptr += 1; data_loc_w_ptr += 2; } } *data_col_ptr = col; } } template __global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1( const int n, const scalar_t *grad_col, const scalar_t *data_value, const int64_t *data_spatial_shapes, const int64_t *data_level_start_index, const scalar_t *data_sampling_loc, const scalar_t *data_attn_weight, const int batch_size, const int spatial_size, const int num_heads, const int channels, const int num_levels, const int num_query, const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc, scalar_t *grad_attn_weight) { CUDA_1D_KERNEL_LOOP(index, n) { __shared__ scalar_t cache_grad_sampling_loc[blockSize * 2]; __shared__ scalar_t cache_grad_attn_weight[blockSize]; unsigned int tid = threadIdx.x; int _temp = index; const int c_col = _temp % channels; _temp /= channels; const int sampling_index = _temp; const int m_col = _temp % num_heads; _temp /= num_heads; _temp /= num_query; const int b_col = _temp; const scalar_t top_grad = grad_col[index]; int data_weight_ptr = sampling_index * num_levels * num_point; int data_loc_w_ptr = data_weight_ptr << 1; const int grad_sampling_ptr = data_weight_ptr; grad_sampling_loc += grad_sampling_ptr << 1; grad_attn_weight += grad_sampling_ptr; const int grad_weight_stride = 1; const int grad_loc_stride = 2; const int qid_stride = num_heads * channels; const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; for (int l_col = 0; l_col < num_levels; ++l_col) { const int level_start_id = data_level_start_index[l_col]; const int spatial_h_ptr = l_col << 1; const int spatial_h = data_spatial_shapes[spatial_h_ptr]; const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride; const scalar_t *data_value_ptr = data_value + value_ptr_offset; scalar_t *grad_value_ptr = grad_value + value_ptr_offset; for (int p_col = 0; p_col < num_point; ++p_col) { const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; const scalar_t weight = data_attn_weight[data_weight_ptr]; const scalar_t h_im = loc_h * spatial_h - 0.5; const scalar_t w_im = loc_w * spatial_w - 0.5; *(cache_grad_sampling_loc + (threadIdx.x << 1)) = 0; *(cache_grad_sampling_loc + ((threadIdx.x << 1) + 1)) = 0; *(cache_grad_attn_weight + threadIdx.x) = 0; if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) { ms_deform_attn_col2im_bilinear( data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, top_grad, weight, grad_value_ptr, cache_grad_sampling_loc + (threadIdx.x << 1), cache_grad_attn_weight + threadIdx.x); } __syncthreads(); if (tid == 0) { scalar_t _grad_w = cache_grad_sampling_loc[0], _grad_h = cache_grad_sampling_loc[1], _grad_a = cache_grad_attn_weight[0]; int sid = 2; for (unsigned int tid = 1; tid < blockSize; ++tid) { _grad_w += cache_grad_sampling_loc[sid]; _grad_h += cache_grad_sampling_loc[sid + 1]; _grad_a += cache_grad_attn_weight[tid]; sid += 2; } *grad_sampling_loc = _grad_w; *(grad_sampling_loc + 1) = _grad_h; *grad_attn_weight = _grad_a; } __syncthreads(); data_weight_ptr += 1; data_loc_w_ptr += 2; grad_attn_weight += grad_weight_stride; grad_sampling_loc += grad_loc_stride; } } } } template __global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2( const int n, const scalar_t *grad_col, const scalar_t *data_value, const int64_t *data_spatial_shapes, const int64_t *data_level_start_index, const scalar_t *data_sampling_loc, const scalar_t *data_attn_weight, const int batch_size, const int spatial_size, const int num_heads, const int channels, const int num_levels, const int num_query, const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc, scalar_t *grad_attn_weight) { CUDA_1D_KERNEL_LOOP(index, n) { __shared__ scalar_t cache_grad_sampling_loc[blockSize * 2]; __shared__ scalar_t cache_grad_attn_weight[blockSize]; unsigned int tid = threadIdx.x; int _temp = index; const int c_col = _temp % channels; _temp /= channels; const int sampling_index = _temp; const int m_col = _temp % num_heads; _temp /= num_heads; _temp /= num_query; const int b_col = _temp; const scalar_t top_grad = grad_col[index]; int data_weight_ptr = sampling_index * num_levels * num_point; int data_loc_w_ptr = data_weight_ptr << 1; const int grad_sampling_ptr = data_weight_ptr; grad_sampling_loc += grad_sampling_ptr << 1; grad_attn_weight += grad_sampling_ptr; const int grad_weight_stride = 1; const int grad_loc_stride = 2; const int qid_stride = num_heads * channels; const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; for (int l_col = 0; l_col < num_levels; ++l_col) { const int level_start_id = data_level_start_index[l_col]; const int spatial_h_ptr = l_col << 1; const int spatial_h = data_spatial_shapes[spatial_h_ptr]; const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride; const scalar_t *data_value_ptr = data_value + value_ptr_offset; scalar_t *grad_value_ptr = grad_value + value_ptr_offset; for (int p_col = 0; p_col < num_point; ++p_col) { const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; const scalar_t weight = data_attn_weight[data_weight_ptr]; const scalar_t h_im = loc_h * spatial_h - 0.5; const scalar_t w_im = loc_w * spatial_w - 0.5; *(cache_grad_sampling_loc + (threadIdx.x << 1)) = 0; *(cache_grad_sampling_loc + ((threadIdx.x << 1) + 1)) = 0; *(cache_grad_attn_weight + threadIdx.x) = 0; if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) { ms_deform_attn_col2im_bilinear( data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, top_grad, weight, grad_value_ptr, cache_grad_sampling_loc + (threadIdx.x << 1), cache_grad_attn_weight + threadIdx.x); } __syncthreads(); for (unsigned int s = blockSize / 2; s > 0; s >>= 1) { if (tid < s) { const unsigned int xid1 = tid << 1; const unsigned int xid2 = (tid + s) << 1; cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s]; cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2]; cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1]; } __syncthreads(); } if (tid == 0) { *grad_sampling_loc = cache_grad_sampling_loc[0]; *(grad_sampling_loc + 1) = cache_grad_sampling_loc[1]; *grad_attn_weight = cache_grad_attn_weight[0]; } __syncthreads(); data_weight_ptr += 1; data_loc_w_ptr += 2; grad_attn_weight += grad_weight_stride; grad_sampling_loc += grad_loc_stride; } } } } template __global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v1( const int n, const scalar_t *grad_col, const scalar_t *data_value, const int64_t *data_spatial_shapes, const int64_t *data_level_start_index, const scalar_t *data_sampling_loc, const scalar_t *data_attn_weight, const int batch_size, const int spatial_size, const int num_heads, const int channels, const int num_levels, const int num_query, const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc, scalar_t *grad_attn_weight) { CUDA_1D_KERNEL_LOOP(index, n) { extern __shared__ int _s[]; scalar_t *cache_grad_sampling_loc = reinterpret_cast(_s); scalar_t *cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x; unsigned int tid = threadIdx.x; int _temp = index; const int c_col = _temp % channels; _temp /= channels; const int sampling_index = _temp; const int m_col = _temp % num_heads; _temp /= num_heads; _temp /= num_query; const int b_col = _temp; const scalar_t top_grad = grad_col[index]; int data_weight_ptr = sampling_index * num_levels * num_point; int data_loc_w_ptr = data_weight_ptr << 1; const int grad_sampling_ptr = data_weight_ptr; grad_sampling_loc += grad_sampling_ptr << 1; grad_attn_weight += grad_sampling_ptr; const int grad_weight_stride = 1; const int grad_loc_stride = 2; const int qid_stride = num_heads * channels; const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; for (int l_col = 0; l_col < num_levels; ++l_col) { const int level_start_id = data_level_start_index[l_col]; const int spatial_h_ptr = l_col << 1; const int spatial_h = data_spatial_shapes[spatial_h_ptr]; const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride; const scalar_t *data_value_ptr = data_value + value_ptr_offset; scalar_t *grad_value_ptr = grad_value + value_ptr_offset; for (int p_col = 0; p_col < num_point; ++p_col) { const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; const scalar_t weight = data_attn_weight[data_weight_ptr]; const scalar_t h_im = loc_h * spatial_h - 0.5; const scalar_t w_im = loc_w * spatial_w - 0.5; *(cache_grad_sampling_loc + (threadIdx.x << 1)) = 0; *(cache_grad_sampling_loc + ((threadIdx.x << 1) + 1)) = 0; *(cache_grad_attn_weight + threadIdx.x) = 0; if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) { ms_deform_attn_col2im_bilinear( data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, top_grad, weight, grad_value_ptr, cache_grad_sampling_loc + (threadIdx.x << 1), cache_grad_attn_weight + threadIdx.x); } __syncthreads(); if (tid == 0) { scalar_t _grad_w = cache_grad_sampling_loc[0], _grad_h = cache_grad_sampling_loc[1], _grad_a = cache_grad_attn_weight[0]; int sid = 2; for (unsigned int tid = 1; tid < blockDim.x; ++tid) { _grad_w += cache_grad_sampling_loc[sid]; _grad_h += cache_grad_sampling_loc[sid + 1]; _grad_a += cache_grad_attn_weight[tid]; sid += 2; } *grad_sampling_loc = _grad_w; *(grad_sampling_loc + 1) = _grad_h; *grad_attn_weight = _grad_a; } __syncthreads(); data_weight_ptr += 1; data_loc_w_ptr += 2; grad_attn_weight += grad_weight_stride; grad_sampling_loc += grad_loc_stride; } } } } template __global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2( const int n, const scalar_t *grad_col, const scalar_t *data_value, const int64_t *data_spatial_shapes, const int64_t *data_level_start_index, const scalar_t *data_sampling_loc, const scalar_t *data_attn_weight, const int batch_size, const int spatial_size, const int num_heads, const int channels, const int num_levels, const int num_query, const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc, scalar_t *grad_attn_weight) { CUDA_1D_KERNEL_LOOP(index, n) { extern __shared__ int _s[]; scalar_t *cache_grad_sampling_loc = reinterpret_cast(_s); scalar_t *cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x; unsigned int tid = threadIdx.x; int _temp = index; const int c_col = _temp % channels; _temp /= channels; const int sampling_index = _temp; const int m_col = _temp % num_heads; _temp /= num_heads; _temp /= num_query; const int b_col = _temp; const scalar_t top_grad = grad_col[index]; int data_weight_ptr = sampling_index * num_levels * num_point; int data_loc_w_ptr = data_weight_ptr << 1; const int grad_sampling_ptr = data_weight_ptr; grad_sampling_loc += grad_sampling_ptr << 1; grad_attn_weight += grad_sampling_ptr; const int grad_weight_stride = 1; const int grad_loc_stride = 2; const int qid_stride = num_heads * channels; const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; for (int l_col = 0; l_col < num_levels; ++l_col) { const int level_start_id = data_level_start_index[l_col]; const int spatial_h_ptr = l_col << 1; const int spatial_h = data_spatial_shapes[spatial_h_ptr]; const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride; const scalar_t *data_value_ptr = data_value + value_ptr_offset; scalar_t *grad_value_ptr = grad_value + value_ptr_offset; for (int p_col = 0; p_col < num_point; ++p_col) { const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; const scalar_t weight = data_attn_weight[data_weight_ptr]; const scalar_t h_im = loc_h * spatial_h - 0.5; const scalar_t w_im = loc_w * spatial_w - 0.5; *(cache_grad_sampling_loc + (threadIdx.x << 1)) = 0; *(cache_grad_sampling_loc + ((threadIdx.x << 1) + 1)) = 0; *(cache_grad_attn_weight + threadIdx.x) = 0; if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) { ms_deform_attn_col2im_bilinear( data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, top_grad, weight, grad_value_ptr, cache_grad_sampling_loc + (threadIdx.x << 1), cache_grad_attn_weight + threadIdx.x); } __syncthreads(); for (unsigned int s = blockDim.x / 2, spre = blockDim.x; s > 0; s >>= 1, spre >>= 1) { if (tid < s) { const unsigned int xid1 = tid << 1; const unsigned int xid2 = (tid + s) << 1; cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s]; cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2]; cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1]; if (tid + (s << 1) < spre) { cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + (s << 1)]; cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2 + (s << 1)]; cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1 + (s << 1)]; } } __syncthreads(); } if (tid == 0) { *grad_sampling_loc = cache_grad_sampling_loc[0]; *(grad_sampling_loc + 1) = cache_grad_sampling_loc[1]; *grad_attn_weight = cache_grad_attn_weight[0]; } __syncthreads(); data_weight_ptr += 1; data_loc_w_ptr += 2; grad_attn_weight += grad_weight_stride; grad_sampling_loc += grad_loc_stride; } } } } template __global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2_multi_blocks( const int n, const scalar_t *grad_col, const scalar_t *data_value, const int64_t *data_spatial_shapes, const int64_t *data_level_start_index, const scalar_t *data_sampling_loc, const scalar_t *data_attn_weight, const int batch_size, const int spatial_size, const int num_heads, const int channels, const int num_levels, const int num_query, const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc, scalar_t *grad_attn_weight) { CUDA_1D_KERNEL_LOOP(index, n) { extern __shared__ int _s[]; scalar_t *cache_grad_sampling_loc = reinterpret_cast(_s); scalar_t *cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x; unsigned int tid = threadIdx.x; int _temp = index; const int c_col = _temp % channels; _temp /= channels; const int sampling_index = _temp; const int m_col = _temp % num_heads; _temp /= num_heads; _temp /= num_query; const int b_col = _temp; const scalar_t top_grad = grad_col[index]; int data_weight_ptr = sampling_index * num_levels * num_point; int data_loc_w_ptr = data_weight_ptr << 1; const int grad_sampling_ptr = data_weight_ptr; grad_sampling_loc += grad_sampling_ptr << 1; grad_attn_weight += grad_sampling_ptr; const int grad_weight_stride = 1; const int grad_loc_stride = 2; const int qid_stride = num_heads * channels; const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; for (int l_col = 0; l_col < num_levels; ++l_col) { const int level_start_id = data_level_start_index[l_col]; const int spatial_h_ptr = l_col << 1; const int spatial_h = data_spatial_shapes[spatial_h_ptr]; const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride; const scalar_t *data_value_ptr = data_value + value_ptr_offset; scalar_t *grad_value_ptr = grad_value + value_ptr_offset; for (int p_col = 0; p_col < num_point; ++p_col) { const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; const scalar_t weight = data_attn_weight[data_weight_ptr]; const scalar_t h_im = loc_h * spatial_h - 0.5; const scalar_t w_im = loc_w * spatial_w - 0.5; *(cache_grad_sampling_loc + (threadIdx.x << 1)) = 0; *(cache_grad_sampling_loc + ((threadIdx.x << 1) + 1)) = 0; *(cache_grad_attn_weight + threadIdx.x) = 0; if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) { ms_deform_attn_col2im_bilinear( data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, top_grad, weight, grad_value_ptr, cache_grad_sampling_loc + (threadIdx.x << 1), cache_grad_attn_weight + threadIdx.x); } __syncthreads(); for (unsigned int s = blockDim.x / 2, spre = blockDim.x; s > 0; s >>= 1, spre >>= 1) { if (tid < s) { const unsigned int xid1 = tid << 1; const unsigned int xid2 = (tid + s) << 1; cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s]; cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2]; cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1]; if (tid + (s << 1) < spre) { cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + (s << 1)]; cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2 + (s << 1)]; cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1 + (s << 1)]; } } __syncthreads(); } if (tid == 0) { atomicAdd(grad_sampling_loc, cache_grad_sampling_loc[0]); atomicAdd(grad_sampling_loc + 1, cache_grad_sampling_loc[1]); atomicAdd(grad_attn_weight, cache_grad_attn_weight[0]); } __syncthreads(); data_weight_ptr += 1; data_loc_w_ptr += 2; grad_attn_weight += grad_weight_stride; grad_sampling_loc += grad_loc_stride; } } } } template __global__ void ms_deformable_col2im_gpu_kernel_gm( const int n, const scalar_t *grad_col, const scalar_t *data_value, const int64_t *data_spatial_shapes, const int64_t *data_level_start_index, const scalar_t *data_sampling_loc, const scalar_t *data_attn_weight, const int batch_size, const int spatial_size, const int num_heads, const int channels, const int num_levels, const int num_query, const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc, scalar_t *grad_attn_weight) { CUDA_1D_KERNEL_LOOP(index, n) { int _temp = index; const int c_col = _temp % channels; _temp /= channels; const int sampling_index = _temp; const int m_col = _temp % num_heads; _temp /= num_heads; _temp /= num_query; const int b_col = _temp; const scalar_t top_grad = grad_col[index]; int data_weight_ptr = sampling_index * num_levels * num_point; int data_loc_w_ptr = data_weight_ptr << 1; const int grad_sampling_ptr = data_weight_ptr; grad_sampling_loc += grad_sampling_ptr << 1; grad_attn_weight += grad_sampling_ptr; const int grad_weight_stride = 1; const int grad_loc_stride = 2; const int qid_stride = num_heads * channels; const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; for (int l_col = 0; l_col < num_levels; ++l_col) { const int level_start_id = data_level_start_index[l_col]; const int spatial_h_ptr = l_col << 1; const int spatial_h = data_spatial_shapes[spatial_h_ptr]; const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride; const scalar_t *data_value_ptr = data_value + value_ptr_offset; scalar_t *grad_value_ptr = grad_value + value_ptr_offset; for (int p_col = 0; p_col < num_point; ++p_col) { const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; const scalar_t weight = data_attn_weight[data_weight_ptr]; const scalar_t h_im = loc_h * spatial_h - 0.5; const scalar_t w_im = loc_w * spatial_w - 0.5; if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) { ms_deform_attn_col2im_bilinear_gm( data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, top_grad, weight, grad_value_ptr, grad_sampling_loc, grad_attn_weight); } data_weight_ptr += 1; data_loc_w_ptr += 2; grad_attn_weight += grad_weight_stride; grad_sampling_loc += grad_loc_stride; } } } } #endif // DEFORM_ATTN_CUDA_KERNEL ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/nms_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef NMS_CUDA_KERNEL_CUH #define NMS_CUDA_KERNEL_CUH #include #ifdef MMCV_WITH_TRT #include "common_cuda_helper.hpp" #else // MMCV_WITH_TRT #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else // MMCV_USE_PARROTS #include "pytorch_cuda_helper.hpp" #endif // MMCV_USE_PARROTS #endif // MMCV_WITH_TRT int const threadsPerBlock = sizeof(unsigned long long int) * 8; __device__ inline bool devIoU(float const *const a, float const *const b, const int offset, const float threshold) { float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]); float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]); float width = fmaxf(right - left + offset, 0.f), height = fmaxf(bottom - top + offset, 0.f); float interS = width * height; float Sa = (a[2] - a[0] + offset) * (a[3] - a[1] + offset); float Sb = (b[2] - b[0] + offset) * (b[3] - b[1] + offset); return interS > threshold * (Sa + Sb - interS); } __global__ void nms_cuda(const int n_boxes, const float iou_threshold, const int offset, const float *dev_boxes, unsigned long long *dev_mask) { int blocks = (n_boxes + threadsPerBlock - 1) / threadsPerBlock; CUDA_2D_KERNEL_BLOCK_LOOP(col_start, blocks, row_start, blocks) { const int tid = threadIdx.x; if (row_start > col_start) return; const int row_size = fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock); const int col_size = fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock); __shared__ float block_boxes[threadsPerBlock * 4]; if (tid < col_size) { block_boxes[tid * 4 + 0] = dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 0]; block_boxes[tid * 4 + 1] = dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 1]; block_boxes[tid * 4 + 2] = dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 2]; block_boxes[tid * 4 + 3] = dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 3]; } __syncthreads(); if (tid < row_size) { const int cur_box_idx = threadsPerBlock * row_start + tid; const float *cur_box = dev_boxes + cur_box_idx * 4; int i = 0; unsigned long long int t = 0; int start = 0; if (row_start == col_start) { start = tid + 1; } for (i = start; i < col_size; i++) { if (devIoU(cur_box, block_boxes + i * 4, offset, iou_threshold)) { t |= 1ULL << i; } } dev_mask[cur_box_idx * gridDim.y + col_start] = t; } } } #endif // NMS_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/nms_rotated_cuda.cuh ================================================ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved // modified from // https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu #ifndef NMS_ROTATED_CUDA_CUH #define NMS_ROTATED_CUDA_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif #include "box_iou_rotated_utils.hpp" __host__ __device__ inline int divideUP(const int x, const int y) { return (((x) + (y)-1) / (y)); } namespace { int const threadsPerBlock = sizeof(unsigned long long) * 8; } template __global__ void nms_rotated_cuda_kernel(const int n_boxes, const float iou_threshold, const T* dev_boxes, unsigned long long* dev_mask, const int multi_label) { // nms_rotated_cuda_kernel is modified from torchvision's nms_cuda_kernel if (multi_label == 1) { const int row_start = blockIdx.y; const int col_start = blockIdx.x; // if (row_start > col_start) return; const int row_size = min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); const int col_size = min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); // Compared to nms_cuda_kernel, where each box is represented with 4 values // (x1, y1, x2, y2), each rotated box is represented with 5 values // (x_center, y_center, width, height, angle_degrees) here. __shared__ T block_boxes[threadsPerBlock * 5]; if (threadIdx.x < col_size) { block_boxes[threadIdx.x * 6 + 0] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 0]; block_boxes[threadIdx.x * 6 + 1] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 1]; block_boxes[threadIdx.x * 6 + 2] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 2]; block_boxes[threadIdx.x * 6 + 3] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 3]; block_boxes[threadIdx.x * 6 + 4] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 4]; block_boxes[threadIdx.x * 6 + 5] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 5]; } __syncthreads(); if (threadIdx.x < row_size) { const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; const T* cur_box = dev_boxes + cur_box_idx * 6; int i = 0; unsigned long long t = 0; int start = 0; if (row_start == col_start) { start = threadIdx.x + 1; } for (i = start; i < col_size; i++) { // Instead of devIoU used by original horizontal nms, here // we use the single_box_iou_rotated function from // box_iou_rotated_utils.h if (single_box_iou_rotated(cur_box, block_boxes + i * 6, 0) > iou_threshold) { t |= 1ULL << i; } } const int col_blocks = divideUP(n_boxes, threadsPerBlock); dev_mask[cur_box_idx * col_blocks + col_start] = t; } } else { const int row_start = blockIdx.y; const int col_start = blockIdx.x; // if (row_start > col_start) return; const int row_size = min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); const int col_size = min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); // Compared to nms_cuda_kernel, where each box is represented with 4 values // (x1, y1, x2, y2), each rotated box is represented with 5 values // (x_center, y_center, width, height, angle_degrees) here. __shared__ T block_boxes[threadsPerBlock * 5]; if (threadIdx.x < col_size) { block_boxes[threadIdx.x * 5 + 0] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; block_boxes[threadIdx.x * 5 + 1] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; block_boxes[threadIdx.x * 5 + 2] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; block_boxes[threadIdx.x * 5 + 3] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; block_boxes[threadIdx.x * 5 + 4] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; } __syncthreads(); if (threadIdx.x < row_size) { const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; const T* cur_box = dev_boxes + cur_box_idx * 5; int i = 0; unsigned long long t = 0; int start = 0; if (row_start == col_start) { start = threadIdx.x + 1; } for (i = start; i < col_size; i++) { // Instead of devIoU used by original horizontal nms, here // we use the single_box_iou_rotated function from // box_iou_rotated_utils.h if (single_box_iou_rotated(cur_box, block_boxes + i * 5, 0) > iou_threshold) { t |= 1ULL << i; } } const int col_blocks = divideUP(n_boxes, threadsPerBlock); dev_mask[cur_box_idx * col_blocks + col_start] = t; } } } #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/parrots_cudawarpfunction.cuh ================================================ /* * Copyright (c) 2019, SenseTime. */ #ifndef INCLUDE_PARROTS_DARRAY_CUDAWARPFUNCTION_CUH_ #define INCLUDE_PARROTS_DARRAY_CUDAWARPFUNCTION_CUH_ #ifndef __CUDACC__ #error cudawarpfunction.cuh should only be included by .cu files #endif #include #include #ifdef PARROTS_USE_HALF #include #endif #ifdef __CUDA_ARCH__ #define CUDA_INTRINSIC_FUNC(Expr) Expr #else #define CUDA_INTRINSIC_FUNC(Expr) #endif #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300 #ifdef PARROTS_USE_HALF #if CUDA_VERSION < 9000 __device__ inline float16 __shfl(float16 var, int srcLane, int width) { CUDA_INTRINSIC_FUNC(return __shfl(var.y, srcLane, width);); } __device__ inline float16 __shfl_up(float16 var, unsigned delta, int width) { CUDA_INTRINSIC_FUNC(return __shfl_up(var.y, delta, width);); } __device__ inline float16 __shfl_down(float16 var, unsigned delta, int width) { CUDA_INTRINSIC_FUNC(return __shfl_down(var.y, delta, width);); } __device__ inline float16 __shfl_xor(float16 var, int laneMask, int width) { CUDA_INTRINSIC_FUNC(return __shfl_xor(var.y, laneMask, width);); } #else // CUDA_VERSION >= 9000 __device__ inline float16 __shfl_sync(unsigned mask, float16 var, int srcLane, int width = warpSize) { CUDA_INTRINSIC_FUNC(float16 r; r.y = __shfl_sync(mask, var.y, srcLane, width); return r;); } __device__ inline float16 __shfl_up_sync(unsigned mask, float16 var, unsigned delta, int width = warpSize) { CUDA_INTRINSIC_FUNC( float16 r; r.y = __shfl_up_sync(mask, var.y, delta, width); return r;); } __device__ inline float16 __shfl_down_sync(unsigned mask, float16 var, unsigned delta, int width = warpSize) { CUDA_INTRINSIC_FUNC( float16 r; r.y = __shfl_down_sync(mask, var.y, delta, width); return r;); } __device__ inline float16 __shfl_xor_sync(unsigned mask, float16 var, int laneMask, int width) { CUDA_INTRINSIC_FUNC(float16 r; r.y = __shfl_xor_sync(mask, var.y, laneMask, width); return r;); } #endif // CUDA_VERSION < 9000 #endif // PARROTS_USE_HALF // warp shuffle interface with a dummy mask #if CUDA_VERSION < 9000 template __device__ inline T __shfl_sync(unsigned mask, T var, int srcLane, int width = warpSize) { CUDA_INTRINSIC_FUNC(return __shfl(var, srcLane, width);); } template __device__ inline T __shfl_up_sync(unsigned mask, T var, unsigned delta, int width = warpSize) { CUDA_INTRINSIC_FUNC(return __shfl_up(var, delta, width);); } template __device__ inline T __shfl_down_sync(unsigned mask, T var, unsigned delta, int width = warpSize) { CUDA_INTRINSIC_FUNC(return __shfl_down(var, delta, width);); } template __device__ inline T __shfl_xor_sync(unsigned mask, T var, int laneMask, int width = warpSize) { CUDA_INTRINSIC_FUNC(return __shfl_xor(var, laneMask, width);); } #endif // CUDA_VERSION < 9000 #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300 #endif // INCLUDE_PARROTS_DARRAY_CUDAWARPFUNCTION_CUH_ ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/points_in_boxes_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef POINT_IN_BOXES_CUDA_KERNEL_CUH #define POINT_IN_BOXES_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif template __device__ inline void lidar_to_local_coords(T shift_x, T shift_y, T rz, T &local_x, T &local_y) { T cosa = cos(-rz), sina = sin(-rz); local_x = shift_x * cosa + shift_y * (-sina); local_y = shift_x * sina + shift_y * cosa; } template __device__ inline int check_pt_in_box3d(const T *pt, const T *box3d, T &local_x, T &local_y) { // param pt: (x, y, z) // param box3d: (cx, cy, cz, x_size, y_size, z_size, rz) in LiDAR coordinate, // cz in the bottom center T x = pt[0], y = pt[1], z = pt[2]; T cx = box3d[0], cy = box3d[1], cz = box3d[2]; T x_size = box3d[3], y_size = box3d[4], z_size = box3d[5], rz = box3d[6]; cz += z_size / 2.0; // shift to the center since cz in box3d is the bottom center if (fabsf(z - cz) > z_size / 2.0) return 0; lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y); float in_flag = (local_x > -x_size / 2.0) & (local_x < x_size / 2.0) & (local_y > -y_size / 2.0) & (local_y < y_size / 2.0); return in_flag; } template __global__ void points_in_boxes_part_forward_cuda_kernel( int batch_size, int boxes_num, int pts_num, const T *boxes, const T *pts, int *box_idx_of_points) { // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR // coordinate, z is the bottom center, each box DO NOT overlaps params pts: // (B, npoints, 3) [x, y, z] in LiDAR coordinate params boxes_idx_of_points: // (B, npoints), default -1 int bs_idx = blockIdx.y; CUDA_1D_KERNEL_LOOP(pt_idx, pts_num) { if (bs_idx >= batch_size) return; boxes += bs_idx * boxes_num * 7; pts += bs_idx * pts_num * 3 + pt_idx * 3; box_idx_of_points += bs_idx * pts_num + pt_idx; T local_x = 0, local_y = 0; int cur_in_flag = 0; for (int k = 0; k < boxes_num; k++) { cur_in_flag = check_pt_in_box3d(pts, boxes + k * 7, local_x, local_y); if (cur_in_flag) { box_idx_of_points[0] = k; break; } } } } template __global__ void points_in_boxes_all_forward_cuda_kernel( int batch_size, int boxes_num, int pts_num, const T *boxes, const T *pts, int *box_idx_of_points) { // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR // coordinate, z is the bottom center, each box DO NOT overlaps params pts: // (B, npoints, 3) [x, y, z] in LiDAR coordinate params boxes_idx_of_points: // (B, npoints), default -1 int bs_idx = blockIdx.y; CUDA_1D_KERNEL_LOOP(pt_idx, pts_num) { if (bs_idx >= batch_size) return; boxes += bs_idx * boxes_num * 7; pts += bs_idx * pts_num * 3 + pt_idx * 3; box_idx_of_points += bs_idx * pts_num * boxes_num + pt_idx * boxes_num; T local_x = 0, local_y = 0; for (int k = 0; k < boxes_num; k++) { const int cur_in_flag = check_pt_in_box3d(pts, boxes + k * 7, local_x, local_y); if (cur_in_flag) { box_idx_of_points[k] = 1; } } } } #endif // POINT_IN_BOXES_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/points_in_polygons_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef POINTS_IN_POLYGONS_CUDA_KERNEL_CUH #define POINTS_IN_POLYGONS_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif struct point { float x, y; }; template __global__ void points_in_polygons_forward_cuda_kernel( const int nthreads, const scalar_t *vertex1, const scalar_t *vertex2, const int rows, const int cols, scalar_t *inside_flag) { CUDA_1D_KERNEL_LOOP(index, nthreads) { int row = index / cols; int col = index % cols; const scalar_t *offset_vertex1 = vertex1 + row * 2; const scalar_t *offset_vertex2 = vertex2 + col * 8; point point_[1]; point polygon[4]; point_[0].x = offset_vertex1[0]; point_[0].y = offset_vertex1[1]; polygon[0].x = offset_vertex2[0]; polygon[0].y = offset_vertex2[1]; polygon[1].x = offset_vertex2[2]; polygon[1].y = offset_vertex2[3]; polygon[2].x = offset_vertex2[4]; polygon[2].y = offset_vertex2[5]; polygon[3].x = offset_vertex2[6]; polygon[3].y = offset_vertex2[7]; int nCross = 0; int i, j; float sx, sy, tx, ty, px, py, x; for (i = 0, j = 3; i < 4; j = i, i++) { sx = polygon[i].x; sy = polygon[i].y; tx = polygon[j].x; ty = polygon[j].y; px = point_[0].x; py = point_[0].y; if (py < min(sy, ty)) continue; if (py > max(sy, ty)) continue; if ((sx == px && sy == py) || (tx == px && ty == py)) { break; } else { if ((sy < py && ty >= py) || (sy >= py && ty < py)) { x = sx + (py - sy) * (tx - sx) / (ty - sy); if (x == px) { break; } if (x > px) { nCross++; } } } } if (nCross % 2 == 1) { inside_flag[index] = 1.0; } else { inside_flag[index] = 0.0; } return; } } #endif // POINTS_IN_POLYGONS_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/psamask_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef PSAMASK_CUDA_KERNEL_CUH #define PSAMASK_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif // CUDA: grid stride looping #ifndef CUDA_KERNEL_LOOP #define CUDA_KERNEL_LOOP(i, n) \ for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ i += blockDim.x * gridDim.x) #endif template __global__ void psamask_collect_forward_cuda( const int nthreads, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask, const T* mask_data, T* buffer_data) { CUDA_KERNEL_LOOP(index, nthreads) { const int w = index % w_feature; const int h = (index / w_feature) % h_feature; const int n = index / w_feature / h_feature; // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed const int hstart = max(0, half_h_mask - h); const int hend = min(h_mask, h_feature + half_h_mask - h); const int wstart = max(0, half_w_mask - w); const int wend = min(w_mask, w_feature + half_w_mask - w); // (hidx, widx ) with mask-indexed // (hidx + h - half_h_mask, widx + w - half_w_mask) with feature-indexed for (int hidx = hstart; hidx < hend; hidx++) { for (int widx = wstart; widx < wend; widx++) { buffer_data[(n * h_feature * w_feature + (hidx + h - half_h_mask) * w_feature + (widx + w - half_w_mask)) * h_feature * w_feature + h * w_feature + w] = mask_data [((n * h_mask * w_mask + hidx * w_mask + widx) * h_feature + h) * w_feature + w]; } } } } template __global__ void psamask_distribute_forward_cuda( const int nthreads, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask, const T* mask_data, T* buffer_data) { CUDA_KERNEL_LOOP(index, nthreads) { const int w = index % w_feature; const int h = (index / w_feature) % h_feature; const int n = index / w_feature / h_feature; // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed const int hstart = max(0, half_h_mask - h); const int hend = min(h_mask, h_feature + half_h_mask - h); const int wstart = max(0, half_w_mask - w); const int wend = min(w_mask, w_feature + half_w_mask - w); // (hidx, widx ) with mask-indexed // (hidx + h - half_h_mask, widx + w - half_w_mask) with feature-indexed for (int hidx = hstart; hidx < hend; hidx++) { for (int widx = wstart; widx < wend; widx++) { buffer_data[(n * h_feature * w_feature + h * w_feature + w) * h_feature * w_feature + (hidx + h - half_h_mask) * w_feature + (widx + w - half_w_mask)] = mask_data [((n * h_mask * w_mask + hidx * w_mask + widx) * h_feature + h) * w_feature + w]; } } } } template __global__ void psamask_collect_backward_cuda( const int nthreads, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask, const T* buffer_diff, T* mask_diff) { CUDA_KERNEL_LOOP(index, nthreads) { const int w = index % w_feature; const int h = (index / w_feature) % h_feature; const int n = index / w_feature / h_feature; // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed const int hstart = max(0, half_h_mask - h); const int hend = min(h_mask, h_feature + half_h_mask - h); const int wstart = max(0, half_w_mask - w); const int wend = min(w_mask, w_feature + half_w_mask - w); // (hidx, widx ) with mask-indexed // (hidx + h - half_h_mask, widx + w - half_w_mask) with feature-indexed for (int hidx = hstart; hidx < hend; hidx++) { for (int widx = wstart; widx < wend; widx++) { mask_diff[((n * h_mask * w_mask + hidx * w_mask + widx) * h_feature + h) * w_feature + w] = buffer_diff[(n * h_feature * w_feature + (hidx + h - half_h_mask) * w_feature + (widx + w - half_w_mask)) * h_feature * w_feature + h * w_feature + w]; } } } } template __global__ void psamask_distribute_backward_cuda( const int nthreads, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask, const T* buffer_diff, T* mask_diff) { CUDA_KERNEL_LOOP(index, nthreads) { const int w = index % w_feature; const int h = (index / w_feature) % h_feature; const int n = index / w_feature / h_feature; // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed const int hstart = max(0, half_h_mask - h); const int hend = min(h_mask, h_feature + half_h_mask - h); const int wstart = max(0, half_w_mask - w); const int wend = min(w_mask, w_feature + half_w_mask - w); // (hidx, widx ) with mask-indexed // (hidx + h - half_h_mask, widx + w - half_w_mask) with feature-indexed for (int hidx = hstart; hidx < hend; hidx++) { for (int widx = wstart; widx < wend; widx++) { mask_diff[((n * h_mask * w_mask + hidx * w_mask + widx) * h_feature + h) * w_feature + w] = buffer_diff[(n * h_feature * w_feature + h * w_feature + w) * h_feature * w_feature + (hidx + h - half_h_mask) * w_feature + (widx + w - half_w_mask)]; } } } } #endif // PSAMASK_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/riroi_align_rotated_cuda_kernel.cuh ================================================ // Modified from // https://github.com/csuhan/ReDet/blob/master/mmdet/ops/riroi_align/src/riroi_align_kernel.cu #ifndef RIROI_ALIGN_ROTATED_CUDA_KERNEL_CUH #define RIROI_ALIGN_ROTATED_CUDA_KERNEL_CUH #include #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else // MMCV_USE_PARROTS #include "pytorch_cuda_helper.hpp" #endif // MMCV_USE_PARROTS /*** Forward ***/ template __global__ void riroi_align_rotated_forward_cuda_kernel( const int nthreads, const scalar_t *bottom_data, const scalar_t *bottom_rois, const scalar_t spatial_scale, const int num_samples, const bool clockwise, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const int num_orientations, scalar_t *top_data) { CUDA_1D_KERNEL_LOOP(index, nthreads) { // (n, c, ph, pw) is an element in the pooled output int pw = index % pooled_width; int ph = (index / pooled_width) % pooled_height; int o = (index / pooled_width / pooled_height) % num_orientations; int c = (index / pooled_width / pooled_height / num_orientations) % channels; int n = index / pooled_width / pooled_height / num_orientations / channels; const scalar_t *offset_bottom_rois = bottom_rois + n * 6; int roi_batch_ind = offset_bottom_rois[0]; // Do not using rounding; this implementation detail is critical scalar_t roi_center_w = offset_bottom_rois[1] * spatial_scale; scalar_t roi_center_h = offset_bottom_rois[2] * spatial_scale; scalar_t roi_width = offset_bottom_rois[3] * spatial_scale; scalar_t roi_height = offset_bottom_rois[4] * spatial_scale; // scalar_t theta = offset_bottom_rois[5] * M_PI / 180.0; scalar_t theta = offset_bottom_rois[5]; // Force malformed ROIs to be 1x1 roi_width = max(roi_width, (scalar_t)1.); roi_height = max(roi_height, (scalar_t)1.); scalar_t bin_size_h = static_cast(roi_height) / static_cast(pooled_height); scalar_t bin_size_w = static_cast(roi_width) / static_cast(pooled_width); // find aligned index scalar_t ind_float = theta * num_orientations / (2 * M_PI); int ind = floorf(ind_float); scalar_t l_var = ind_float - (scalar_t)ind; scalar_t r_var = 1.0 - l_var; // correct start channel ind = (ind + num_orientations) % num_orientations; // rotated channel int ind_rot = (o - ind + num_orientations) % num_orientations; int ind_rot_plus = (ind_rot + 1 + num_orientations) % num_orientations; const scalar_t *offset_bottom_data = bottom_data + (roi_batch_ind * channels * num_orientations + c * num_orientations + ind_rot) * height * width; const scalar_t *offset_bottom_data_plus = bottom_data + (roi_batch_ind * channels * num_orientations + c * num_orientations + ind_rot_plus) * height * width; // We use roi_bin_grid to sample the grid and mimic integral int roi_bin_grid_h = (num_samples > 0) ? num_samples : ceilf(roi_height / pooled_height); // e.g., = 2 int roi_bin_grid_w = (num_samples > 0) ? num_samples : ceilf(roi_width / pooled_width); // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). // Appropriate translation needs to be applied after. if (clockwise) { theta = -theta; // If clockwise, the angle needs to be reversed. } scalar_t roi_start_h = -roi_height / 2.0; scalar_t roi_start_w = -roi_width / 2.0; scalar_t cosscalar_theta = cos(theta); scalar_t sinscalar_theta = sin(theta); // We do average (integral) pooling inside a bin const scalar_t count = max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4 scalar_t output_val = 0.; for (int iy = 0; iy < roi_bin_grid_h; iy++) { // e.g., iy = 0, 1 const scalar_t yy = roi_start_h + ph * bin_size_h + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 for (int ix = 0; ix < roi_bin_grid_w; ix++) { const scalar_t xx = roi_start_w + pw * bin_size_w + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); // Rotate by theta (counterclockwise) around the center and translate scalar_t y = yy * cosscalar_theta - xx * sinscalar_theta + roi_center_h; scalar_t x = yy * sinscalar_theta + xx * cosscalar_theta + roi_center_w; scalar_t val = bilinear_interpolate( offset_bottom_data, height, width, y, x, index); scalar_t val_plus = bilinear_interpolate( offset_bottom_data_plus, height, width, y, x, index); output_val += r_var * val + l_var * val_plus; } } output_val /= count; top_data[index] = output_val; } } /*** Backward ***/ template __global__ void riroi_align_rotated_backward_cuda_kernel( const int nthreads, const scalar_t *top_diff, const scalar_t *bottom_rois, const scalar_t spatial_scale, const int num_samples, const bool clockwise, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const int num_orientations, scalar_t *bottom_diff) { CUDA_1D_KERNEL_LOOP(index, nthreads) { // (n, c, ph, pw) is an element in the pooled output int pw = index % pooled_width; int ph = (index / pooled_width) % pooled_height; int o = (index / pooled_width / pooled_height) % num_orientations; int c = (index / pooled_width / pooled_height / num_orientations) % channels; int n = index / pooled_width / pooled_height / num_orientations / channels; const scalar_t *offset_bottom_rois = bottom_rois + n * 6; int roi_batch_ind = offset_bottom_rois[0]; // Do not round scalar_t roi_center_w = offset_bottom_rois[1] * spatial_scale; scalar_t roi_center_h = offset_bottom_rois[2] * spatial_scale; scalar_t roi_width = offset_bottom_rois[3] * spatial_scale; scalar_t roi_height = offset_bottom_rois[4] * spatial_scale; // scalar_t theta = offset_bottom_rois[5] * M_PI / 180.0; scalar_t theta = offset_bottom_rois[5]; // Force malformed ROIs to be 1x1 roi_width = max(roi_width, (scalar_t)1.); roi_height = max(roi_height, (scalar_t)1.); scalar_t bin_size_h = static_cast(roi_height) / static_cast(pooled_height); scalar_t bin_size_w = static_cast(roi_width) / static_cast(pooled_width); // find aligned index scalar_t ind_float = theta * num_orientations / (2 * M_PI); int ind = floorf(ind_float); scalar_t l_var = ind_float - (scalar_t)ind; scalar_t r_var = 1.0 - l_var; // correct start channel ind = (ind + num_orientations) % num_orientations; // rotated channel int ind_rot = (o - ind + num_orientations) % num_orientations; int ind_rot_plus = (ind_rot + 1 + num_orientations) % num_orientations; scalar_t *offset_bottom_diff = bottom_diff + (roi_batch_ind * channels * num_orientations + c * num_orientations + ind_rot) * height * width; scalar_t *offset_bottom_diff_plus = bottom_diff + (roi_batch_ind * channels * num_orientations + c * num_orientations + ind_rot_plus) * height * width; int top_offset = (n * channels * num_orientations + c * num_orientations + o) * pooled_height * pooled_width; const scalar_t *offset_top_diff = top_diff + top_offset; const scalar_t top_diff_this_bin = offset_top_diff[ph * pooled_width + pw]; // We use roi_bin_grid to sample the grid and mimic integral int roi_bin_grid_h = (num_samples > 0) ? num_samples : ceilf(roi_height / pooled_height); // e.g., = 2 int roi_bin_grid_w = (num_samples > 0) ? num_samples : ceilf(roi_width / pooled_width); // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). // Appropriate translation needs to be applied after. if (clockwise) { theta = -theta; // If clockwise, the angle needs to be reversed. } scalar_t roi_start_h = -roi_height / 2.0; scalar_t roi_start_w = -roi_width / 2.0; scalar_t cosTheta = cos(theta); scalar_t sinTheta = sin(theta); // We do average (integral) pooling inside a bin const scalar_t count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 for (int iy = 0; iy < roi_bin_grid_h; iy++) { // e.g., iy = 0, 1 const scalar_t yy = roi_start_h + ph * bin_size_h + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 for (int ix = 0; ix < roi_bin_grid_w; ix++) { const scalar_t xx = roi_start_w + pw * bin_size_w + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); // Rotate by theta around the center and translate scalar_t y = yy * cosTheta - xx * sinTheta + roi_center_h; scalar_t x = yy * sinTheta + xx * cosTheta + roi_center_w; scalar_t w1, w2, w3, w4; int x_low, x_high, y_low, y_high; bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high, index); scalar_t g1 = top_diff_this_bin * w1 / count; scalar_t g2 = top_diff_this_bin * w2 / count; scalar_t g3 = top_diff_this_bin * w3 / count; scalar_t g4 = top_diff_this_bin * w4 / count; if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { atomicAdd(offset_bottom_diff + y_low * width + x_low, g1 * r_var); atomicAdd(offset_bottom_diff + y_low * width + x_high, g2 * r_var); atomicAdd(offset_bottom_diff + y_high * width + x_low, g3 * r_var); atomicAdd(offset_bottom_diff + y_high * width + x_high, g4 * r_var); atomicAdd(offset_bottom_diff_plus + y_low * width + x_low, g1 * l_var); atomicAdd(offset_bottom_diff_plus + y_low * width + x_high, g2 * l_var); atomicAdd(offset_bottom_diff_plus + y_high * width + x_low, g3 * l_var); atomicAdd(offset_bottom_diff_plus + y_high * width + x_high, g4 * l_var); } // if } // ix } // iy } // CUDA_1D_KERNEL_LOOP } // RiRoIAlignBackward #endif // RIROI_ALIGN_ROTATED_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/roi_align_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef ROI_ALIGN_CUDA_KERNEL_CUH #define ROI_ALIGN_CUDA_KERNEL_CUH #include #ifdef MMCV_WITH_TRT #include "common_cuda_helper.hpp" #else // MMCV_WITH_TRT #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else // MMCV_USE_PARROTS #include "pytorch_cuda_helper.hpp" #endif // MMCV_USE_PARROTS #endif // MMCV_WITH_TRT /*** Forward ***/ template __global__ void roi_align_forward_cuda_kernel( const int nthreads, const T* input, const T* rois, T* output, T* argmax_y, T* argmax_x, const int pooled_height, const int pooled_width, const T spatial_scale, const int sampling_ratio, const int pool_mode, // 0 - max pool, 1 - avg pool const bool aligned, const int channels, const int height, const int width) { CUDA_1D_KERNEL_LOOP(index, nthreads) { // (n, c, ph, pw) is an element in the pooled output int pw = index % pooled_width; int ph = (index / pooled_width) % pooled_height; int c = (index / pooled_width / pooled_height) % channels; int n = index / pooled_width / pooled_height / channels; const T* offset_rois = rois + n * 5; int roi_batch_ind = offset_rois[0]; // Do not using rounding; this implementation detail is critical T offset = aligned ? (T)0.5 : (T)0.0; T roi_start_w = offset_rois[1] * spatial_scale - offset; T roi_start_h = offset_rois[2] * spatial_scale - offset; T roi_end_w = offset_rois[3] * spatial_scale - offset; T roi_end_h = offset_rois[4] * spatial_scale - offset; T roi_width = roi_end_w - roi_start_w; T roi_height = roi_end_h - roi_start_h; if (!aligned) { // for backward-compatibility only roi_width = max(roi_width, (T)1.); roi_height = max(roi_height, (T)1.); } T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); const T* offset_input = input + (roi_batch_ind * channels + c) * height * width; // We use roi_bin_grid to sample the grid and mimic integral int roi_bin_grid_h = (sampling_ratio > 0) ? sampling_ratio : static_cast(ceilf(roi_height / pooled_height)); int roi_bin_grid_w = (sampling_ratio > 0) ? sampling_ratio : static_cast(ceilf(roi_width / pooled_width)); if (pool_mode == 0) { // We do max pooling inside a bin T maxval = -FLT_MAX; T maxidx_y = -1.f, maxidx_x = -1.f; for (int iy = 0; iy < roi_bin_grid_h; iy++) { const T y = roi_start_h + ph * bin_size_h + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); for (int ix = 0; ix < roi_bin_grid_w; ix++) { const T x = roi_start_w + pw * bin_size_w + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); T val = bilinear_interpolate(offset_input, height, width, y, x, index); if (val > maxval) { maxval = val; maxidx_y = y; maxidx_x = x; } } } output[index] = maxval; argmax_y[index] = maxidx_y; argmax_x[index] = maxidx_x; } else if (pool_mode == 1) { // We do average pooling inside a bin const T count = max(roi_bin_grid_h * roi_bin_grid_w, 1); T output_val = 0.; for (int iy = 0; iy < roi_bin_grid_h; iy++) { const T y = roi_start_h + ph * bin_size_h + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); for (int ix = 0; ix < roi_bin_grid_w; ix++) { const T x = roi_start_w + pw * bin_size_w + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); T val = bilinear_interpolate(offset_input, height, width, y, x, index); output_val += val; } } output[index] = output_val / count; } } } /*** Backward ***/ template __global__ void roi_align_backward_cuda_kernel( const int nthreads, const T* grad_output, const T* rois, const T* argmax_y, const T* argmax_x, T* grad_input, const int pooled_height, const int pooled_width, const T spatial_scale, const int sampling_ratio, const int pool_mode, // 0 - max pool, 1 - avg pool const bool aligned, const int channels, const int height, const int width) { CUDA_1D_KERNEL_LOOP(index, nthreads) { // (n, c, ph, pw) is an element in the pooled output int pw = index % pooled_width; int ph = (index / pooled_width) % pooled_height; int c = (index / pooled_width / pooled_height) % channels; int n = index / pooled_width / pooled_height / channels; const T grad_output_this_bin = grad_output[index]; const T* offset_rois = rois + n * 5; int roi_batch_ind = offset_rois[0]; T* offset_grad_input = grad_input + ((roi_batch_ind * channels + c) * height * width); if (pool_mode == 0) { T y = argmax_y[index], x = argmax_x[index]; if (y != -1.f) { T w1, w2, w3, w4; int x_low, x_high, y_low, y_high; bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high, index); if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { atomicAdd(offset_grad_input + y_low * width + x_low, grad_output_this_bin * w1); atomicAdd(offset_grad_input + y_low * width + x_high, grad_output_this_bin * w2); atomicAdd(offset_grad_input + y_high * width + x_low, grad_output_this_bin * w3); atomicAdd(offset_grad_input + y_high * width + x_high, grad_output_this_bin * w4); } } } else if (pool_mode == 1) { // Do not using rounding; this implementation detail is critical T offset = aligned ? (T)0.5 : (T)0.0; T roi_start_w = offset_rois[1] * spatial_scale - offset; T roi_start_h = offset_rois[2] * spatial_scale - offset; T roi_end_w = offset_rois[3] * spatial_scale - offset; T roi_end_h = offset_rois[4] * spatial_scale - offset; T roi_width = roi_end_w - roi_start_w; T roi_height = roi_end_h - roi_start_h; if (!aligned) { // for backward-compatibility only roi_width = max(roi_width, (T)1.); roi_height = max(roi_height, (T)1.); } T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); // We use roi_bin_grid to sample the grid and mimic integral int roi_bin_grid_h = (sampling_ratio > 0) ? sampling_ratio : static_cast(ceilf(roi_height / pooled_height)); int roi_bin_grid_w = (sampling_ratio > 0) ? sampling_ratio : static_cast(ceilf(roi_width / pooled_width)); // We do average (integral) pooling inside a bin const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 for (int iy = 0; iy < roi_bin_grid_h; iy++) { const T y = roi_start_h + ph * bin_size_h + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); for (int ix = 0; ix < roi_bin_grid_w; ix++) { const T x = roi_start_w + pw * bin_size_w + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); T w1, w2, w3, w4; int x_low, x_high, y_low, y_high; bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high, index); if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { atomicAdd(offset_grad_input + y_low * width + x_low, grad_output_this_bin * w1 / count); atomicAdd(offset_grad_input + y_low * width + x_high, grad_output_this_bin * w2 / count); atomicAdd(offset_grad_input + y_high * width + x_low, grad_output_this_bin * w3 / count); atomicAdd(offset_grad_input + y_high * width + x_high, grad_output_this_bin * w4 / count); } } } } } } #endif // ROI_ALIGN_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/roi_align_rotated_cuda_kernel.cuh ================================================ // Modified from // https://github.com/facebookresearch/detectron2/tree/master/detectron2/layers/csrc/ROIAlignRotated // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved #ifndef ROI_ALIGN_ROTATED_CUDA_KERNEL_CUH #define ROI_ALIGN_ROTATED_CUDA_KERNEL_CUH #include #ifdef MMCV_WITH_TRT #include "common_cuda_helper.hpp" #else // MMCV_WITH_TRT #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else // MMCV_USE_PARROTS #include "pytorch_cuda_helper.hpp" #endif // MMCV_USE_PARROTS #endif // MMCV_WITH_TRT /*** Forward ***/ template __global__ void roi_align_rotated_forward_cuda_kernel( const int nthreads, const scalar_t *bottom_data, const scalar_t *bottom_rois, const scalar_t spatial_scale, const int sample_num, const bool aligned, const bool clockwise, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, scalar_t *top_data) { CUDA_1D_KERNEL_LOOP(index, nthreads) { // (n, c, ph, pw) is an element in the pooled output int pw = index % pooled_width; int ph = (index / pooled_width) % pooled_height; int c = (index / pooled_width / pooled_height) % channels; int n = index / pooled_width / pooled_height / channels; const scalar_t *offset_bottom_rois = bottom_rois + n * 6; int roi_batch_ind = offset_bottom_rois[0]; // Do not using rounding; this implementation detail is critical scalar_t offset = aligned ? (scalar_t)0.5 : (scalar_t)0.0; scalar_t roi_center_w = offset_bottom_rois[1] * spatial_scale - offset; scalar_t roi_center_h = offset_bottom_rois[2] * spatial_scale - offset; scalar_t roi_width = offset_bottom_rois[3] * spatial_scale; scalar_t roi_height = offset_bottom_rois[4] * spatial_scale; // scalar_t theta = offset_bottom_rois[5] * M_PI / 180.0; scalar_t theta = offset_bottom_rois[5]; if (clockwise) { theta = -theta; // If clockwise, the angle needs to be reversed. } if (!aligned) { // for backward-compatibility only // Force malformed ROIs to be 1x1 roi_width = max(roi_width, (scalar_t)1.); roi_height = max(roi_height, (scalar_t)1.); } scalar_t bin_size_h = static_cast(roi_height) / static_cast(pooled_height); scalar_t bin_size_w = static_cast(roi_width) / static_cast(pooled_width); const scalar_t *offset_bottom_data = bottom_data + (roi_batch_ind * channels + c) * height * width; // We use roi_bin_grid to sample the grid and mimic integral int roi_bin_grid_h = (sample_num > 0) ? sample_num : ceilf(roi_height / pooled_height); // e.g., = 2 int roi_bin_grid_w = (sample_num > 0) ? sample_num : ceilf(roi_width / pooled_width); // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). // Appropriate translation needs to be applied after. scalar_t roi_start_h = -roi_height / 2.0; scalar_t roi_start_w = -roi_width / 2.0; scalar_t cosscalar_theta = cos(theta); scalar_t sinscalar_theta = sin(theta); // We do average (integral) pooling inside a bin const scalar_t count = max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4 scalar_t output_val = 0.; for (int iy = 0; iy < roi_bin_grid_h; iy++) { // e.g., iy = 0, 1 const scalar_t yy = roi_start_h + ph * bin_size_h + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 for (int ix = 0; ix < roi_bin_grid_w; ix++) { const scalar_t xx = roi_start_w + pw * bin_size_w + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); // Rotate by theta (counterclockwise) around the center and translate scalar_t y = yy * cosscalar_theta - xx * sinscalar_theta + roi_center_h; scalar_t x = yy * sinscalar_theta + xx * cosscalar_theta + roi_center_w; scalar_t val = bilinear_interpolate( offset_bottom_data, height, width, y, x, index); output_val += val; } } output_val /= count; top_data[index] = output_val; } } /*** Backward ***/ template __global__ void roi_align_rotated_backward_cuda_kernel( const int nthreads, const scalar_t *top_diff, const scalar_t *bottom_rois, const scalar_t spatial_scale, const int sample_num, const bool aligned, const bool clockwise, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, scalar_t *bottom_diff) { CUDA_1D_KERNEL_LOOP(index, nthreads) { // (n, c, ph, pw) is an element in the pooled output int pw = index % pooled_width; int ph = (index / pooled_width) % pooled_height; int c = (index / pooled_width / pooled_height) % channels; int n = index / pooled_width / pooled_height / channels; const scalar_t *offset_bottom_rois = bottom_rois + n * 6; int roi_batch_ind = offset_bottom_rois[0]; // Do not round scalar_t offset = aligned ? (scalar_t)0.5 : (scalar_t)0.0; scalar_t roi_center_w = offset_bottom_rois[1] * spatial_scale - offset; scalar_t roi_center_h = offset_bottom_rois[2] * spatial_scale - offset; scalar_t roi_width = offset_bottom_rois[3] * spatial_scale; scalar_t roi_height = offset_bottom_rois[4] * spatial_scale; // scalar_t theta = offset_bottom_rois[5] * M_PI / 180.0; scalar_t theta = offset_bottom_rois[5]; if (clockwise) { theta = -theta; // If clockwise, the angle needs to be reversed. } if (!aligned) { // for backward-compatibility only // Force malformed ROIs to be 1x1 roi_width = max(roi_width, (scalar_t)1.); roi_height = max(roi_height, (scalar_t)1.); } scalar_t bin_size_h = static_cast(roi_height) / static_cast(pooled_height); scalar_t bin_size_w = static_cast(roi_width) / static_cast(pooled_width); scalar_t *offset_bottom_diff = bottom_diff + (roi_batch_ind * channels + c) * height * width; int top_offset = (n * channels + c) * pooled_height * pooled_width; const scalar_t *offset_top_diff = top_diff + top_offset; const scalar_t top_diff_this_bin = offset_top_diff[ph * pooled_width + pw]; // We use roi_bin_grid to sample the grid and mimic integral int roi_bin_grid_h = (sample_num > 0) ? sample_num : ceilf(roi_height / pooled_height); // e.g., = 2 int roi_bin_grid_w = (sample_num > 0) ? sample_num : ceilf(roi_width / pooled_width); // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). // Appropriate translation needs to be applied after. scalar_t roi_start_h = -roi_height / 2.0; scalar_t roi_start_w = -roi_width / 2.0; scalar_t cosTheta = cos(theta); scalar_t sinTheta = sin(theta); // We do average (integral) pooling inside a bin const scalar_t count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 for (int iy = 0; iy < roi_bin_grid_h; iy++) { // e.g., iy = 0, 1 const scalar_t yy = roi_start_h + ph * bin_size_h + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 for (int ix = 0; ix < roi_bin_grid_w; ix++) { const scalar_t xx = roi_start_w + pw * bin_size_w + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); // Rotate by theta around the center and translate scalar_t y = yy * cosTheta - xx * sinTheta + roi_center_h; scalar_t x = yy * sinTheta + xx * cosTheta + roi_center_w; scalar_t w1, w2, w3, w4; int x_low, x_high, y_low, y_high; bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high, index); scalar_t g1 = top_diff_this_bin * w1 / count; scalar_t g2 = top_diff_this_bin * w2 / count; scalar_t g3 = top_diff_this_bin * w3 / count; scalar_t g4 = top_diff_this_bin * w4 / count; if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { atomicAdd(offset_bottom_diff + y_low * width + x_low, g1); atomicAdd(offset_bottom_diff + y_low * width + x_high, g2); atomicAdd(offset_bottom_diff + y_high * width + x_low, g3); atomicAdd(offset_bottom_diff + y_high * width + x_high, g4); } // if } // ix } // iy } // CUDA_1D_KERNEL_LOOP } // RoIAlignBackward #endif // ROI_ALIGN_ROTATED_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/roi_pool_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef ROI_POOL_CUDA_KERNEL_CUH #define ROI_POOL_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif template __global__ void roi_pool_forward_cuda_kernel( const int nthreads, const T* input, const T* rois, T* output, int* argmax, const int pooled_height, const int pooled_width, const T spatial_scale, const int channels, const int height, const int width) { CUDA_1D_KERNEL_LOOP(index, nthreads) { // (n, c, ph, pw) is an element in the pooled output int pw = index % pooled_width; int ph = (index / pooled_width) % pooled_height; int c = (index / pooled_width / pooled_height) % channels; int n = index / pooled_width / pooled_height / channels; const T* offset_rois = rois + n * 5; int roi_batch_ind = offset_rois[0]; // calculate the roi region on feature maps T roi_x1 = offset_rois[1] * spatial_scale; T roi_y1 = offset_rois[2] * spatial_scale; T roi_x2 = (offset_rois[3] + 1) * spatial_scale; T roi_y2 = (offset_rois[4] + 1) * spatial_scale; // force malformed rois to be 1x1 T roi_w = roi_x2 - roi_x1; T roi_h = roi_y2 - roi_y1; if (roi_w <= 0 || roi_h <= 0) continue; T bin_size_w = roi_w / static_cast(pooled_width); T bin_size_h = roi_h / static_cast(pooled_height); // the corresponding bin region int bin_x1 = floorf(static_cast(pw) * bin_size_w + roi_x1); int bin_y1 = floorf(static_cast(ph) * bin_size_h + roi_y1); int bin_x2 = ceilf(static_cast(pw + 1) * bin_size_w + roi_x1); int bin_y2 = ceilf(static_cast(ph + 1) * bin_size_h + roi_y1); // add roi offsets and clip to input boundaries bin_x1 = min(max(bin_x1, 0), width); bin_y1 = min(max(bin_y1, 0), height); bin_x2 = min(max(bin_x2, 0), width); bin_y2 = min(max(bin_y2, 0), height); bool is_empty = (bin_y2 <= bin_y1) || (bin_x2 <= bin_x1); const T* offset_input = input + (roi_batch_ind * channels + c) * height * width; // Define an empty pooling region to be zero // If nothing is pooled, argmax = -1 causes nothing to be backprop'd T max_val = is_empty ? 0 : -FLT_MAX; int max_idx = -1; for (int h = bin_y1; h < bin_y2; ++h) { for (int w = bin_x1; w < bin_x2; ++w) { int offset = h * width + w; if (offset_input[offset] > max_val) { max_val = offset_input[offset]; max_idx = offset; } } } output[index] = max_val; if (argmax != NULL) argmax[index] = max_idx; } } template __global__ void roi_pool_backward_cuda_kernel( const int nthreads, const T* grad_output, const T* rois, const int* argmax, T* grad_input, const int pooled_height, const int pooled_width, const int channels, const int height, const int width) { CUDA_1D_KERNEL_LOOP(index, nthreads) { // (n, c) is an element in the pooled output int c = (index / pooled_width / pooled_height) % channels; int n = index / pooled_width / pooled_height / channels; int roi_batch_ind = rois[n * 5]; T* grad_input_offset = grad_input + ((roi_batch_ind * channels + c) * height * width); int argmax_index = argmax[index]; if (argmax_index != -1) { atomicAdd(grad_input_offset + argmax_index, grad_output[index]); } } } #endif // ROI_POOL_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/roiaware_pool3d_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef ROIAWARE_POOL3D_CUDA_KERNEL_CUH #define ROIAWARE_POOL3D_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif template __device__ inline void lidar_to_local_coords(T shift_x, T shift_y, T rz, T &local_x, T &local_y) { T cosa = cos(-rz), sina = sin(-rz); local_x = shift_x * cosa + shift_y * (-sina); local_y = shift_x * sina + shift_y * cosa; } template __device__ inline int check_pt_in_box3d(const T *pt, const T *box3d, T &local_x, T &local_y) { // param pt: (x, y, z) // param box3d: (cx, cy, cz, x_size, y_size, z_size, rz) in LiDAR coordinate, // cz in the bottom center T x = pt[0], y = pt[1], z = pt[2]; T cx = box3d[0], cy = box3d[1], cz = box3d[2]; T x_size = box3d[3], y_size = box3d[4], z_size = box3d[5], rz = box3d[6]; cz += z_size / 2.0; // shift to the center since cz in box3d is the bottom center if (fabsf(z - cz) > z_size / 2.0) return 0; lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y); float in_flag = (local_x > -x_size / 2.0) & (local_x < x_size / 2.0) & (local_y > -y_size / 2.0) & (local_y < y_size / 2.0); return in_flag; } template __global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num, int out_x, int out_y, int out_z, const T *rois, const T *pts, int *pts_mask) { // params rois: (N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR // coordinate params pts: (npoints, 3) [x, y, z] params pts_mask: (N, // npoints): -1 means point does not in this box, otherwise: encode (x_idxs, // y_idxs, z_idxs) by binary bit int box_idx = blockIdx.y; CUDA_1D_KERNEL_LOOP(pt_idx, pts_num) { if (box_idx >= boxes_num) return; pts += pt_idx * 3; rois += box_idx * 7; pts_mask += box_idx * pts_num + pt_idx; T local_x = 0, local_y = 0; int cur_in_flag = check_pt_in_box3d(pts, rois, local_x, local_y); pts_mask[0] = -1; if (cur_in_flag > 0) { T local_z = pts[2] - rois[2]; T x_size = rois[3], y_size = rois[4], z_size = rois[5]; T x_res = x_size / out_x; T y_res = y_size / out_y; T z_res = z_size / out_z; unsigned int x_idx = int((local_x + x_size / 2) / x_res); unsigned int y_idx = int((local_y + y_size / 2) / y_res); unsigned int z_idx = int(local_z / z_res); x_idx = min(max(x_idx, 0), out_x - 1); y_idx = min(max(y_idx, 0), out_y - 1); z_idx = min(max(z_idx, 0), out_z - 1); unsigned int idx_encoding = (x_idx << 16) + (y_idx << 8) + z_idx; pts_mask[0] = idx_encoding; } } } template __global__ void collect_inside_pts_for_box3d(int boxes_num, int pts_num, int max_pts_each_voxel, int out_x, int out_y, int out_z, const int *pts_mask, T *pts_idx_of_voxels) { // params pts_mask: (N, npoints) 0 or 1 // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel) CUDA_1D_KERNEL_LOOP(box_idx, boxes_num) { int max_num_pts = max_pts_each_voxel - 1; // index 0 is the counter pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel; for (int k = 0; k < pts_num; k++) { if (pts_mask[box_idx * pts_num + k] != -1) { unsigned int idx_encoding = pts_mask[box_idx * pts_num + k]; unsigned int x_idx = (idx_encoding >> 16) & 0xFF; unsigned int y_idx = (idx_encoding >> 8) & 0xFF; unsigned int z_idx = idx_encoding & 0xFF; unsigned int base_offset = x_idx * out_y * out_z * max_pts_each_voxel + y_idx * out_z * max_pts_each_voxel + z_idx * max_pts_each_voxel; unsigned int cnt = pts_idx_of_voxels[base_offset]; if (cnt < max_num_pts) { pts_idx_of_voxels[base_offset + cnt + 1] = k; pts_idx_of_voxels[base_offset]++; } } } } } template __global__ void roiaware_maxpool3d(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int out_y, int out_z, const T *pts_feature, const int *pts_idx_of_voxels, T *pooled_features, int *argmax) { // params pts_feature: (npoints, C) // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel), // index 0 is the counter params pooled_features: (N, out_x, out_y, out_z, C) // params argmax: (N, out_x, out_y, out_z, C) int box_idx = blockIdx.z; int channel_idx = blockIdx.y; CUDA_1D_KERNEL_LOOP(voxel_idx_flat, out_x * out_y * out_z) { int x_idx = voxel_idx_flat / (out_y * out_z); int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z; int z_idx = voxel_idx_flat % out_z; if (box_idx >= boxes_num || channel_idx >= channels) return; int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx; pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + offset_base * max_pts_each_voxel; pooled_features += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; argmax += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; int argmax_idx = -1; float max_val = -1e50; int total_pts = pts_idx_of_voxels[0]; for (int k = 1; k <= total_pts; k++) { if (pts_feature[pts_idx_of_voxels[k] * channels + channel_idx] > max_val) { max_val = pts_feature[pts_idx_of_voxels[k] * channels + channel_idx]; argmax_idx = pts_idx_of_voxels[k]; } } if (argmax_idx != -1) { pooled_features[0] = max_val; } argmax[0] = argmax_idx; } } template __global__ void roiaware_avgpool3d(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int out_y, int out_z, const T *pts_feature, const int *pts_idx_of_voxels, T *pooled_features) { // params pts_feature: (npoints, C) // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel), // index 0 is the counter params pooled_features: (N, out_x, out_y, out_z, C) // params argmax: (N, out_x, out_y, out_z, C) int box_idx = blockIdx.z; int channel_idx = blockIdx.y; CUDA_1D_KERNEL_LOOP(voxel_idx_flat, out_x * out_y * out_z) { int x_idx = voxel_idx_flat / (out_y * out_z); int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z; int z_idx = voxel_idx_flat % out_z; if (box_idx >= boxes_num || channel_idx >= channels) return; int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx; pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + offset_base * max_pts_each_voxel; pooled_features += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; float sum_val = 0; int total_pts = pts_idx_of_voxels[0]; for (int k = 1; k <= total_pts; k++) { sum_val += pts_feature[pts_idx_of_voxels[k] * channels + channel_idx]; } if (total_pts > 0) { pooled_features[0] = sum_val / total_pts; } } } template __global__ void roiaware_maxpool3d_backward(int boxes_num, int channels, int out_x, int out_y, int out_z, const int *argmax, const T *grad_out, T *grad_in) { // params argmax: (N, out_x, out_y, out_z, C) // params grad_out: (N, out_x, out_y, out_z, C) // params grad_in: (npoints, C), return value int box_idx = blockIdx.z; int channel_idx = blockIdx.y; CUDA_1D_KERNEL_LOOP(voxel_idx_flat, out_x * out_y * out_z) { int x_idx = voxel_idx_flat / (out_y * out_z); int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z; int z_idx = voxel_idx_flat % out_z; if (box_idx >= boxes_num || channel_idx >= channels) return; int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx; argmax += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; grad_out += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; if (argmax[0] == -1) return; atomicAdd(grad_in + argmax[0] * channels + channel_idx, grad_out[0] * 1); } } template __global__ void roiaware_avgpool3d_backward(int boxes_num, int channels, int out_x, int out_y, int out_z, int max_pts_each_voxel, const int *pts_idx_of_voxels, const T *grad_out, T *grad_in) { // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel) // params grad_out: (N, out_x, out_y, out_z, C) // params grad_in: (npoints, C), return value int box_idx = blockIdx.z; int channel_idx = blockIdx.y; CUDA_1D_KERNEL_LOOP(voxel_idx_flat, out_x * out_y * out_z) { int x_idx = voxel_idx_flat / (out_y * out_z); int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z; int z_idx = voxel_idx_flat % out_z; if (box_idx >= boxes_num || channel_idx >= channels) return; int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx; pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + offset_base * max_pts_each_voxel; grad_out += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; int total_pts = pts_idx_of_voxels[0]; float cur_grad = 1 / fmaxf(float(total_pts), 1.0); for (int k = 1; k <= total_pts; k++) { atomicAdd(grad_in + pts_idx_of_voxels[k] * channels + channel_idx, grad_out[0] * cur_grad); } } } #endif // ROIAWARE_POOL3D_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/roipoint_pool3d_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef ROIPOINT_POOL3D_CUDA_KERNEL_CUH #define ROIPOINT_POOL3D_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif template __device__ inline void lidar_to_local_coords(T shift_x, T shift_y, T rz, T &local_x, T &local_y) { T cosa = cos(-rz), sina = sin(-rz); local_x = shift_x * cosa + shift_y * (-sina); local_y = shift_x * sina + shift_y * cosa; } template __device__ inline int check_pt_in_box3d(const T *pt, const T *box3d, T &local_x, T &local_y) { // param pt: (x, y, z) // param box3d: (cx, cy, cz, dx, dy, dz, rz) in LiDAR coordinate, cz in the // bottom center T x = pt[0], y = pt[1], z = pt[2]; T cx = box3d[0], cy = box3d[1], cz = box3d[2]; T dx = box3d[3], dy = box3d[4], dz = box3d[5], rz = box3d[6]; cz += dz / 2.0; // shift to the center since cz in box3d is the bottom center if (fabsf(z - cz) > dz / 2.0) return 0; lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y); T in_flag = (local_x > -dx / 2.0) & (local_x < dx / 2.0) & (local_y > -dy / 2.0) & (local_y < dy / 2.0); return in_flag; } template __global__ void assign_pts_to_box3d(int batch_size, int pts_num, int boxes_num, const T *xyz, const T *boxes3d, int *pts_assign) { // params xyz: (B, N, 3) // params boxes3d: (B, M, 7) // params pts_assign: (B, N, M): idx of the corresponding box3d, -1 means // background points int box_idx = blockIdx.y; int bs_idx = blockIdx.z; CUDA_1D_KERNEL_LOOP(pt_idx, pts_num) { if (box_idx >= boxes_num || bs_idx >= batch_size) return; int assign_idx = bs_idx * pts_num * boxes_num + pt_idx * boxes_num + box_idx; pts_assign[assign_idx] = 0; int box_offset = bs_idx * boxes_num * 7 + box_idx * 7; int pt_offset = bs_idx * pts_num * 3 + pt_idx * 3; T local_x = 0, local_y = 0; int cur_in_flag = check_pt_in_box3d(xyz + pt_offset, boxes3d + box_offset, local_x, local_y); pts_assign[assign_idx] = cur_in_flag; } } __global__ void get_pooled_idx(int batch_size, int pts_num, int boxes_num, int sampled_pts_num, const int *pts_assign, int *pts_idx, int *pooled_empty_flag) { // params xyz: (B, N, 3) // params pts_feature: (B, N, C) // params pts_assign: (B, N) // params pts_idx: (B, M, 512) // params pooled_empty_flag: (B, M) CUDA_1D_KERNEL_LOOP(boxes_idx, boxes_num) { int bs_idx = blockIdx.y; int cnt = 0; for (int k = 0; k < pts_num; k++) { if (pts_assign[bs_idx * pts_num * boxes_num + k * boxes_num + boxes_idx]) { if (cnt < sampled_pts_num) { pts_idx[bs_idx * boxes_num * sampled_pts_num + boxes_idx * sampled_pts_num + cnt] = k; cnt++; } else break; } } if (cnt == 0) { pooled_empty_flag[bs_idx * boxes_num + boxes_idx] = 1; } else if (cnt < sampled_pts_num) { // duplicate same points for sampling for (int k = cnt; k < sampled_pts_num; k++) { int duplicate_idx = k % cnt; int base_offset = bs_idx * boxes_num * sampled_pts_num + boxes_idx * sampled_pts_num; pts_idx[base_offset + k] = pts_idx[base_offset + duplicate_idx]; } } } } template __global__ void roipoint_pool3d_forward( int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, const T *xyz, const int *pts_idx, const T *pts_feature, T *pooled_features, int *pooled_empty_flag) { // params xyz: (B, N, 3) // params pts_idx: (B, M, 512) // params pts_feature: (B, N, C) // params pooled_features: (B, M, 512, 3+C) // params pooled_empty_flag: (B, M) int box_idx = blockIdx.y; int bs_idx = blockIdx.z; CUDA_1D_KERNEL_LOOP(sample_pt_idx, sampled_pts_num) { if (box_idx >= boxes_num || bs_idx >= batch_size) return; if (pooled_empty_flag[bs_idx * boxes_num + box_idx]) return; int temp_idx = bs_idx * boxes_num * sampled_pts_num + box_idx * sampled_pts_num + sample_pt_idx; int src_pt_idx = pts_idx[temp_idx]; int dst_feature_offset = temp_idx * (3 + feature_in_len); for (int j = 0; j < 3; j++) pooled_features[dst_feature_offset + j] = xyz[bs_idx * pts_num * 3 + src_pt_idx * 3 + j]; int src_feature_offset = bs_idx * pts_num * feature_in_len + src_pt_idx * feature_in_len; memcpy(pooled_features + dst_feature_offset + 3, pts_feature + src_feature_offset, feature_in_len * sizeof(T)); } } #endif // ROIPOINT_POOL3D_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/rotated_feature_align_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved. // Modified from // https://github.com/SJTU-Thinklab-Det/r3det-on-mmdetection/blob/master/mmdet/ops/fr/src/feature_refine_kernel.cu #ifndef ROTATED_FEATURE_ALIGN_CUDA_KERNEL_CUH #define ROTATED_FEATURE_ALIGN_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif template __global__ void rotated_feature_align_forward_kernel( const int nthreads, const int points, const scalar_t* bottom_data, const scalar_t* best_bboxes, const scalar_t spatial_scale, const int channels, const int height, const int width, scalar_t* top_data) { CUDA_1D_KERNEL_LOOP(index, nthreads) { int w = index % width; int h = (index / width) % height; int c = (index / width / height) % channels; int n = index / width / height / channels; const scalar_t* bbox_offset = best_bboxes + ((n * height + h) * width + w) * 5; scalar_t roi_y = bbox_offset[0] * spatial_scale; scalar_t roi_x = bbox_offset[1] * spatial_scale; scalar_t px[5] = {roi_x, 0, 0, 0, 0}; scalar_t py[5] = {roi_y, 0, 0, 0, 0}; if (points > 1) { scalar_t roi_w = bbox_offset[2] * spatial_scale; scalar_t roi_h = bbox_offset[3] * spatial_scale; scalar_t roi_a = bbox_offset[4]; scalar_t w_2 = roi_w / 2, h_2 = roi_h / 2; scalar_t cosa = cosf(roi_a), sina = sinf(roi_a); scalar_t wx = cosa * w_2, wy = sina * w_2; scalar_t hx = -sina * h_2, hy = cosa * h_2; px[1] = roi_x + wx + hx; py[1] = roi_y + wy + hy; px[2] = roi_x - wx + hx; py[2] = roi_y - wy + hy; px[3] = roi_x - wx - hx; py[3] = roi_y - wy - hy; px[4] = roi_x + wx - hx; py[4] = roi_y + wy - hy; } const scalar_t* offset_bottom_data = bottom_data + (n * channels + c) * height * width; scalar_t output_val = bottom_data[index]; for (int i = 0; i < points; i++) { output_val += bilinear_interpolate(offset_bottom_data, height, width, py[i], px[i], i); } top_data[index] = output_val; } } template __global__ void rotated_feature_align_backward_kernel( const int nthreads, const int points, const scalar_t* top_diff, const scalar_t* best_bboxes, const scalar_t spatial_scale, const int channels, const int height, const int width, scalar_t* bottom_diff) { CUDA_1D_KERNEL_LOOP(index, nthreads) { int w = index % width; int h = (index / width) % height; int c = (index / width / height) % channels; int n = index / width / height / channels; const scalar_t* bbox_offset = best_bboxes + ((n * height + h) * width + w) * 5; scalar_t roi_y = bbox_offset[0] * spatial_scale; scalar_t roi_x = bbox_offset[1] * spatial_scale; scalar_t px[5] = {roi_x, 0, 0, 0, 0}; scalar_t py[5] = {roi_y, 0, 0, 0, 0}; if (points > 1) { scalar_t roi_w = bbox_offset[2] * spatial_scale; scalar_t roi_h = bbox_offset[3] * spatial_scale; scalar_t roi_a = bbox_offset[4]; scalar_t w_2 = roi_w / 2, h_2 = roi_h / 2; scalar_t cosa = cosf(roi_a), sina = sinf(roi_a); scalar_t wx = cosa * w_2, wy = sina * w_2; scalar_t hx = -sina * h_2, hy = cosa * h_2; px[1] = roi_x + wx + hx; py[1] = roi_y + wy + hy; px[2] = roi_x - wx + hx; py[2] = roi_y - wy + hy; px[3] = roi_x - wx - hx; py[3] = roi_y - wy - hy; px[4] = roi_x + wx - hx; py[4] = roi_y + wy - hy; } scalar_t* offset_bottom_diff = bottom_diff + (n * channels + c) * height * width; scalar_t value_top_diff = top_diff[index]; atomicAdd(bottom_diff + index, value_top_diff); for (int i = 0; i < points; i++) { scalar_t w1, w2, w3, w4; int x_low, x_high, y_low, y_high; bilinear_interpolate_gradient(height, width, py[i], px[i], w1, w2, w3, w4, x_low, x_high, y_low, y_high, i); scalar_t g1 = value_top_diff * w1; scalar_t g2 = value_top_diff * w2; scalar_t g3 = value_top_diff * w3; scalar_t g4 = value_top_diff * w4; if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { atomicAdd(offset_bottom_diff + y_low * width + x_low, g1); atomicAdd(offset_bottom_diff + y_low * width + x_high, g2); atomicAdd(offset_bottom_diff + y_high * width + x_low, g3); atomicAdd(offset_bottom_diff + y_high * width + x_high, g4); } } } } #endif // ROTATED_FEATURE_ALIGN_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/scatter_points_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef SCATTER_POINTS_CUDA_KERNEL_CUH #define SCATTER_POINTS_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif typedef enum { SUM = 0, MEAN = 1, MAX = 2 } reduce_t; int const maxGridDim = 50000; __device__ __forceinline__ static void reduceMax(float *address, float val) { int *address_as_i = reinterpret_cast(address); int old = *address_as_i, assumed; do { assumed = old; old = atomicCAS(address_as_i, assumed, __float_as_int(fmaxf(val, __int_as_float(assumed)))); } while (assumed != old || __int_as_float(old) < val); } __device__ __forceinline__ static void reduceMax(double *address, double val) { unsigned long long *address_as_ull = reinterpret_cast(address); unsigned long long old = *address_as_ull, assumed; do { assumed = old; old = atomicCAS( address_as_ull, assumed, __double_as_longlong(fmax(val, __longlong_as_double(assumed)))); } while (assumed != old || __longlong_as_double(old) < val); } // get rid of meaningless warnings when compiling host code #ifdef HIP_DIFF __device__ __forceinline__ static void reduceAdd(float *address, float val) { atomicAdd(address, val); } __device__ __forceinline__ static void reduceAdd(double *address, double val) { atomicAdd(address, val); } #else #ifdef __CUDA_ARCH__ __device__ __forceinline__ static void reduceAdd(float *address, float val) { #if (__CUDA_ARCH__ < 200) #ifdef _MSC_VER #pragma message( \ "compute capability lower than 2.x. fall back to use CAS version of atomicAdd for float32") #else #warning \ "compute capability lower than 2.x. fall back to use CAS version of atomicAdd for float32" #endif int *address_as_i = reinterpret_cast(address); int old = *address_as_i, assumed; do { assumed = old; old = atomicCAS(address_as_i, assumed, __float_as_int(val + __int_as_float(assumed))); } while (assumed != old); #else atomicAdd(address, val); #endif } __device__ __forceinline__ static void reduceAdd(double *address, double val) { #if (__CUDA_ARCH__ < 600) #ifdef _MSC_VER #pragma message( \ "compute capability lower than 6.x. fall back to use CAS version of atomicAdd for float64") #else #warning \ "compute capability lower than 6.x. fall back to use CAS version of atomicAdd for float64" #endif unsigned long long *address_as_ull = reinterpret_cast(address); unsigned long long old = *address_as_ull, assumed; do { assumed = old; old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); } while (assumed != old); #else atomicAdd(address, val); #endif } #endif // __CUDA_ARCH__ #endif // HIP_DIFF template __global__ void feats_reduce_kernel( const T *feats, const int32_t *coors_map, T *reduced_feats, // shall be 0 at initialization const int num_input, const int num_feats, const reduce_t reduce_type) { CUDA_1D_KERNEL_LOOP(x, num_input) { int32_t reduce_to = coors_map[x]; if (reduce_to == -1) continue; const T *feats_offset = feats + x * num_feats; T *reduced_feats_offset = reduced_feats + reduce_to * num_feats; if (reduce_type == reduce_t::MAX) { for (int i = 0; i < num_feats; i++) { reduceMax(&reduced_feats_offset[i], feats_offset[i]); } } else { for (int i = 0; i < num_feats; i++) { reduceAdd(&reduced_feats_offset[i], feats_offset[i]); } } } } template __global__ void add_reduce_traceback_grad_kernel( T *grad_feats, const T *grad_reduced_feats, const int32_t *coors_map, const int32_t *reduce_count, const int num_input, const int num_feats, const reduce_t reduce_type) { CUDA_1D_KERNEL_LOOP(x, num_input) { int32_t reduce_to = coors_map[x]; if (reduce_to == -1) { continue; } const int input_offset = x * num_feats; T *grad_feats_offset = grad_feats + input_offset; const int reduced_offset = reduce_to * num_feats; const T *grad_reduced_feats_offset = grad_reduced_feats + reduced_offset; if (reduce_type == reduce_t::SUM) { for (int i = 0; i < num_feats; i++) { grad_feats_offset[i] = grad_reduced_feats_offset[i]; } } else if (reduce_type == reduce_t::MEAN) { for (int i = 0; i < num_feats; i++) { grad_feats_offset[i] = grad_reduced_feats_offset[i] / static_cast(reduce_count[reduce_to]); } } } } template __global__ void max_reduce_traceback_scatter_idx_kernel( const T *feats, const T *reduced_feats, int32_t *reduce_from, const int32_t *coors_map, const int num_input, const int num_feats) { CUDA_1D_KERNEL_LOOP(x, num_input) { int32_t reduce_to = coors_map[x]; const int input_offset = x * num_feats; const T *feats_offset = feats + input_offset; if (reduce_to == -1) { continue; } const int reduced_offset = reduce_to * num_feats; const T *reduced_feats_offset = reduced_feats + reduced_offset; int32_t *reduce_from_offset = reduce_from + reduced_offset; for (int i = 0; i < num_feats; i++) { if (feats_offset[i] == reduced_feats_offset[i]) { atomicMin(&reduce_from_offset[i], static_cast(x)); } } } } template __global__ void max_reduce_scatter_grad_kernel(T *grad_feats, const T *grad_reduced_feats, const int32_t *reduce_from, const int num_reduced, const int num_feats) { CUDA_1D_KERNEL_LOOP(x, num_reduced) { const int reduced_offset = x * num_feats; const int32_t *scatter_to_offset = reduce_from + reduced_offset; const T *grad_reduced_feats_offset = grad_reduced_feats + reduced_offset; for (int i = 0; i < num_feats; i++) { grad_feats[scatter_to_offset[i] * num_feats + i] = grad_reduced_feats_offset[i]; } } } #endif // SCATTER_POINTS_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/sigmoid_focal_loss_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef SIGMOID_FOCAL_LOSS_CUDA_KERNEL_CUH #define SIGMOID_FOCAL_LOSS_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif template __global__ void sigmoid_focal_loss_forward_cuda_kernel( const int nthreads, const T* input, const int64_t* target, const T* weight, T* output, const T gamma, const T alpha, const int num_classes) { CUDA_1D_KERNEL_LOOP(index, nthreads) { int n = index / num_classes; int c = index % num_classes; int64_t t = target[n]; T flag_p = (t == c); T flag_n = (t != c); // p = sigmoid(x) = 1. / 1. + expf(-x) T p = (T)1. / ((T)1. + expf(-input[index])); // (1 - p)**gamma * log(p) T term_p = pow(((T)1. - p), gamma) * log(max(p, (T)FLT_MIN)); // p**gamma * log(1 - p) T term_n = pow(p, gamma) * log(max((T)1. - p, (T)FLT_MIN)); output[index] = (T)0.; output[index] += -flag_p * alpha * term_p; output[index] += -flag_n * ((T)1. - alpha) * term_n; if (weight != NULL) { output[index] *= weight[t]; } } } template __global__ void sigmoid_focal_loss_backward_cuda_kernel( const int nthreads, const T* input, const int64_t* target, const T* weight, T* grad_input, const T gamma, const T alpha, const int num_classes) { CUDA_1D_KERNEL_LOOP(index, nthreads) { int n = index / num_classes; int c = index % num_classes; int64_t t = target[n]; T flag_p = (t == c); T flag_n = (t != c); // p = sigmoid(x) = 1. / 1. + expf(-x) T p = (T)1. / ((T)1. + exp(-input[index])); // (1 - p)**gamma * (1 - p - gamma*p*log(p)) T term_p = pow(((T)1. - p), gamma) * ((T)1. - p - (gamma * p * log(max(p, (T)FLT_MIN)))); // p**gamma * (gamma * (1 - p) * log(1 - p) - p) T term_n = pow(p, gamma) * (gamma * ((T)1. - p) * log(max((T)1. - p, (T)FLT_MIN)) - p); grad_input[index] = (T)0.; grad_input[index] += -flag_p * alpha * term_p; grad_input[index] += -flag_n * ((T)1. - alpha) * term_n; if (weight != NULL) { grad_input[index] *= weight[t]; } } } #endif // SIGMOID_FOCAL_LOSS_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/softmax_focal_loss_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef SOFTMAX_FOCAL_LOSS_CUDA_KERNEL_CUH #define SOFTMAX_FOCAL_LOSS_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif template __global__ void softmax_focal_loss_forward_cuda_kernel( const int nthreads, const T* softmax, const int64_t* target, const T* weight, T* output, const T gamma, const T alpha, const int num_classes) { CUDA_1D_KERNEL_LOOP(index, nthreads) { int64_t label = target[index]; T pred = softmax[index * num_classes + label]; if (label >= 0) { output[index] = -alpha * pow((T)1. - pred, gamma) * log(max(pred, (T)FLT_MIN)); } else { output[index] = 0; } if (weight != NULL) { output[index] *= weight[label]; } } } template __global__ void softmax_focal_loss_backward_cuda1_kernel( const int nthreads, const T* softmax, const int64_t* target, const T* weight, T* buff, const T gamma, const T alpha, const int num_classes) { CUDA_1D_KERNEL_LOOP(index, nthreads) { int64_t label = target[index]; T pred = softmax[index * num_classes + label]; if (label >= 0) { buff[index] = alpha * (-pow((T)1. - pred, gamma) + gamma * pow((T)1. - pred, gamma - 1) * pred * log(max(pred, (T)FLT_MIN))); } else { buff[index] = 0; } if (weight != NULL) { buff[index] *= weight[label]; } } } template __global__ void softmax_focal_loss_backward_cuda2_kernel( const int nthreads, const T* softmax, const int64_t* target, const T* buff, T* grad_input, const int num_classes) { CUDA_1D_KERNEL_LOOP(index, nthreads) { int n = index / num_classes; int c = index % num_classes; int64_t label = target[n]; if (label >= 0) { T flag = (label == c ? (T)1. : (T)0.); grad_input[index] = buff[n] * (flag - softmax[index]); } else { grad_input[index] = 0; } } } #endif // SOFTMAX_FOCAL_LOSS_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/sync_bn_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef SYNCBN_CUDA_KERNEL_CUH #define SYNCBN_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif template __global__ void sync_bn_forward_mean_cuda_kernel(const T *input, float *mean, int num, int channels, int spatial) { __shared__ float buffer[THREADS_PER_BLOCK]; int tid = threadIdx.x; int c = blockIdx.x; buffer[tid] = 0; for (int i = tid; i < num * spatial; i += blockDim.x) { int index = (i / spatial) * channels * spatial + c * spatial + i % spatial; buffer[tid] += input[index]; } __syncthreads(); for (int s = blockDim.x / 2; s > 0; s >>= 1) { if (tid < s) { buffer[tid] += buffer[tid + s]; } __syncthreads(); } int total = num * spatial; if (tid == 0) { mean[c] = buffer[0] / total; } } template <> __global__ void sync_bn_forward_mean_cuda_kernel(const phalf *input, float *mean, int num, int channels, int spatial) { __shared__ float buffer[THREADS_PER_BLOCK]; int tid = threadIdx.x; int c = blockIdx.x; buffer[tid] = 0; for (int i = tid; i < num * spatial; i += blockDim.x) { int index = (i / spatial) * channels * spatial + c * spatial + i % spatial; buffer[tid] += static_cast(input[index]); } __syncthreads(); for (int s = blockDim.x / 2; s > 0; s >>= 1) { if (tid < s) { buffer[tid] += buffer[tid + s]; } __syncthreads(); } int total = num * spatial; if (tid == 0) { mean[c] = buffer[0] / total; } } template __global__ void sync_bn_forward_var_cuda_kernel(const T *input, const float *mean, float *var, int num, int channels, int spatial) { __shared__ float buffer[THREADS_PER_BLOCK]; int tid = threadIdx.x; int c = blockIdx.x; buffer[tid] = 0; for (int i = tid; i < num * spatial; i += blockDim.x) { int index = (i / spatial) * channels * spatial + c * spatial + i % spatial; float td = input[index] - mean[c]; buffer[tid] += td * td; } __syncthreads(); for (int s = blockDim.x / 2; s > 0; s >>= 1) { if (tid < s) { buffer[tid] += buffer[tid + s]; } __syncthreads(); } int total = num * spatial; if (tid == 0) { var[c] = buffer[0] / total; } } template <> __global__ void sync_bn_forward_var_cuda_kernel(const phalf *input, const float *mean, float *var, int num, int channels, int spatial) { __shared__ float buffer[THREADS_PER_BLOCK]; int tid = threadIdx.x; int c = blockIdx.x; buffer[tid] = 0; for (int i = tid; i < num * spatial; i += blockDim.x) { int index = (i / spatial) * channels * spatial + c * spatial + i % spatial; float td = static_cast(input[index]) - mean[c]; buffer[tid] += td * td; } __syncthreads(); for (int s = blockDim.x / 2; s > 0; s >>= 1) { if (tid < s) { buffer[tid] += buffer[tid + s]; } __syncthreads(); } int total = num * spatial; if (tid == 0) { var[c] = buffer[0] / total; } } template __global__ void sync_bn_forward_output_cuda_kernel( const T *input, const float *mean, const float *var, float *running_mean, float *running_var, const float *weight, const float *bias, float *norm, float *std, T *output, int num, int channels, int spatial, float eps, float momentum, int group_size) { int tid = threadIdx.x; int c = blockIdx.x; float mean_value = mean[c]; float std_value = sqrt(var[c] + eps); if (weight != nullptr) { float weight_value = weight[c]; float bias_value = bias[c]; if (norm != nullptr) { for (int i = tid; i < num * spatial; i += blockDim.x) { int index = (i / spatial) * channels * spatial + c * spatial + i % spatial; norm[index] = (input[index] - mean_value) / std_value; output[index] = norm[index] * weight_value + bias_value; } } else { for (int i = tid; i < num * spatial; i += blockDim.x) { int index = (i / spatial) * channels * spatial + c * spatial + i % spatial; output[index] = (input[index] - mean_value) / std_value * weight_value + bias_value; } } } else { if (norm != nullptr) { for (int i = tid; i < num * spatial; i += blockDim.x) { int index = (i / spatial) * channels * spatial + c * spatial + i % spatial; output[index] = norm[index] = (input[index] - mean_value) / std_value; } } else { for (int i = tid; i < num * spatial; i += blockDim.x) { int index = (i / spatial) * channels * spatial + c * spatial + i % spatial; output[index] = (input[index] - mean_value) / std_value; } } } if (tid == 0) { if (std != nullptr) std[c] = std_value; if (running_mean != nullptr) { running_mean[c] = momentum * mean_value + (1 - momentum) * running_mean[c]; int count = num * spatial * group_size; float var_unbias = count > 1 ? var[c] * count / (count - 1) : var[c]; running_var[c] = momentum * var_unbias + (1 - momentum) * running_var[c]; } } } template <> __global__ void sync_bn_forward_output_cuda_kernel( const phalf *input, const float *mean, const float *var, float *running_mean, float *running_var, const float *weight, const float *bias, float *norm, float *std, phalf *output, int num, int channels, int spatial, float eps, float momentum, int group_size) { int tid = threadIdx.x; int c = blockIdx.x; float mean_value = mean[c]; float std_value = sqrt(var[c] + eps); if (weight != nullptr) { float weight_value = weight[c]; float bias_value = bias[c]; if (norm != nullptr) { for (int i = tid; i < num * spatial; i += blockDim.x) { int index = (i / spatial) * channels * spatial + c * spatial + i % spatial; norm[index] = (static_cast(input[index]) - mean_value) / std_value; output[index] = static_cast(norm[index] * weight_value + bias_value); } } else { for (int i = tid; i < num * spatial; i += blockDim.x) { int index = (i / spatial) * channels * spatial + c * spatial + i % spatial; output[index] = static_cast((static_cast(input[index]) - mean_value) / std_value * weight_value + bias_value); } } } else { if (norm != nullptr) { for (int i = tid; i < num * spatial; i += blockDim.x) { int index = (i / spatial) * channels * spatial + c * spatial + i % spatial; norm[index] = (static_cast(input[index]) - mean_value) / std_value; output[index] = static_cast(norm[index]); } } else { for (int i = tid; i < num * spatial; i += blockDim.x) { int index = (i / spatial) * channels * spatial + c * spatial + i % spatial; output[index] = static_cast( (static_cast(input[index]) - mean_value) / std_value); } } } if (tid == 0) { if (std != nullptr) std[c] = std_value; if (running_mean != nullptr) { running_mean[c] = momentum * mean_value + (1 - momentum) * running_mean[c]; int count = num * spatial * group_size; float var_unbias = count > 1 ? var[c] * count / (count - 1) : var[c]; running_var[c] = momentum * var_unbias + (1 - momentum) * running_var[c]; } } } template __global__ void sync_bn_backward_param_cuda_kernel(const T *grad_output, const float *norm, float *grad_weight, float *grad_bias, int num, int channels, int spatial) { __shared__ float buffer1[THREADS_PER_BLOCK]; __shared__ float buffer2[THREADS_PER_BLOCK]; int tid = threadIdx.x; int c = blockIdx.x; buffer1[tid] = buffer2[tid] = 0; for (int i = tid; i < num * spatial; i += blockDim.x) { int index = (i / spatial) * channels * spatial + c * spatial + i % spatial; buffer1[tid] += grad_output[index] * norm[index]; buffer2[tid] += grad_output[index]; } __syncthreads(); for (int s = blockDim.x / 2; s > 0; s >>= 1) { if (tid < s) { buffer1[tid] += buffer1[tid + s]; buffer2[tid] += buffer2[tid + s]; } __syncthreads(); } if (tid == 0) { grad_weight[c] = buffer1[0]; grad_bias[c] = buffer2[0]; } } template <> __global__ void sync_bn_backward_param_cuda_kernel(const phalf *grad_output, const float *norm, float *grad_weight, float *grad_bias, int num, int channels, int spatial) { __shared__ float buffer1[THREADS_PER_BLOCK]; __shared__ float buffer2[THREADS_PER_BLOCK]; int tid = threadIdx.x; int c = blockIdx.x; buffer1[tid] = buffer2[tid] = 0; for (int i = tid; i < num * spatial; i += blockDim.x) { int index = (i / spatial) * channels * spatial + c * spatial + i % spatial; buffer1[tid] += static_cast(grad_output[index]) * norm[index]; buffer2[tid] += static_cast(grad_output[index]); } __syncthreads(); for (int s = blockDim.x / 2; s > 0; s >>= 1) { if (tid < s) { buffer1[tid] += buffer1[tid + s]; buffer2[tid] += buffer2[tid + s]; } __syncthreads(); } if (tid == 0) { grad_weight[c] = buffer1[0]; grad_bias[c] = buffer2[0]; } } template __global__ void sync_bn_backward_data_cuda_kernel( int output_size, const T *grad_output, const float *weight, const float *grad_weight, const float *grad_bias, const float *norm, const float *std, T *grad_input, int num, int channels, int spatial) { int factor = num * spatial; CUDA_1D_KERNEL_LOOP(index, output_size) { int c = (index / spatial) % channels; grad_input[index] = weight[c] * (grad_output[index] - (grad_weight[c] * norm[index] + grad_bias[c]) / factor) / std[c]; } } template <> __global__ void sync_bn_backward_data_cuda_kernel( int output_size, const phalf *grad_output, const float *weight, const float *grad_weight, const float *grad_bias, const float *norm, const float *std, phalf *grad_input, int num, int channels, int spatial) { int factor = num * spatial; CUDA_1D_KERNEL_LOOP(index, output_size) { int c = (index / spatial) % channels; grad_input[index] = static_cast( weight[c] * (static_cast(grad_output[index]) - (grad_weight[c] * norm[index] + grad_bias[c]) / factor) / std[c]); } } #endif // SYNCBN_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/three_interpolate_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef THREE_INTERPOLATE_CUDA_KERNEL_CUH #define THREE_INTERPOLATE_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif template __global__ void three_interpolate_forward_cuda_kernel( int b, int c, int m, int n, const T *points, const int *__restrict__ idx, const T *weight, T *out) { // points: (B, C, M) // idx: (B, N, 3) // weight: (B, N, 3) // output: // out: (B, C, N) int bs_idx = blockIdx.z; int c_idx = blockIdx.y; CUDA_1D_KERNEL_LOOP(pt_idx, n) { if (bs_idx >= b || c_idx >= c) return; weight += bs_idx * n * 3 + pt_idx * 3; points += bs_idx * c * m + c_idx * m; idx += bs_idx * n * 3 + pt_idx * 3; out += bs_idx * c * n + c_idx * n; out[pt_idx] = weight[0] * points[idx[0]] + weight[1] * points[idx[1]] + weight[2] * points[idx[2]]; } } template __global__ void three_interpolate_backward_cuda_kernel( int b, int c, int n, int m, const T *grad_out, const int *__restrict__ idx, const T *weight, T *grad_points) { // grad_out: (B, C, N) // weight: (B, N, 3) // output: // grad_points: (B, C, M) int bs_idx = blockIdx.z; int c_idx = blockIdx.y; CUDA_1D_KERNEL_LOOP(pt_idx, n) { if (bs_idx >= b || c_idx >= c) return; grad_out += bs_idx * c * n + c_idx * n + pt_idx; weight += bs_idx * n * 3 + pt_idx * 3; grad_points += bs_idx * c * m + c_idx * m; idx += bs_idx * n * 3 + pt_idx * 3; atomicAdd(grad_points + idx[0], grad_out[0] * weight[0]); atomicAdd(grad_points + idx[1], grad_out[0] * weight[1]); atomicAdd(grad_points + idx[2], grad_out[0] * weight[2]); } } #endif // THREE_INTERPOLATE_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/three_nn_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef THREE_NN_CUDA_KERNEL_CUH #define THREE_NN_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif template __global__ void three_nn_forward_cuda_kernel(int b, int n, int m, const T *unknown, const T *known, T *dist2, int *__restrict__ idx) { // unknown: (B, N, 3) // known: (B, M, 3) // output: // dist2: (B, N, 3) // idx: (B, N, 3) int bs_idx = blockIdx.y; CUDA_1D_KERNEL_LOOP(pt_idx, n) { if (bs_idx >= b) return; unknown += bs_idx * n * 3 + pt_idx * 3; known += bs_idx * m * 3; dist2 += bs_idx * n * 3 + pt_idx * 3; idx += bs_idx * n * 3 + pt_idx * 3; T ux = unknown[0]; T uy = unknown[1]; T uz = unknown[2]; double best1 = 1e40, best2 = 1e40, best3 = 1e40; int besti1 = 0, besti2 = 0, besti3 = 0; for (int k = 0; k < m; ++k) { T x = known[k * 3 + 0]; T y = known[k * 3 + 1]; T z = known[k * 3 + 2]; T d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z); if (d < best1) { best3 = best2; besti3 = besti2; best2 = best1; besti2 = besti1; best1 = d; besti1 = k; } else if (d < best2) { best3 = best2; besti3 = besti2; best2 = d; besti2 = k; } else if (d < best3) { best3 = d; besti3 = k; } } dist2[0] = best1; dist2[1] = best2; dist2[2] = best3; idx[0] = besti1; idx[1] = besti2; idx[2] = besti3; } } #endif // THREE_NN_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/tin_shift_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef TIN_SHIFT_CUDA_KERNEL_CUH #define TIN_SHIFT_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif template __global__ void tin_shift_forward_cuda_kernel( const int nthreads, const T* input, const int* shift, T* output, const int batch_size, const int channels, const int t_size, const int hw_size, const int group_size, const int group_channel) { CUDA_1D_KERNEL_LOOP(index, nthreads) { const int hw_index = index % hw_size; const int j = (index / hw_size) % channels; const int n_index = (index / hw_size / channels) % batch_size; int group_id = j / group_channel; int t_shift = shift[n_index * group_size + group_id]; int offset = n_index * t_size * hw_size * channels + hw_size * j + hw_index; for (int i = 0; i < t_size; i++) { int now_t = i + t_shift; int data_id = i * hw_size * channels + offset; if (now_t < 0 || now_t >= t_size) { continue; } int out_id = now_t * hw_size * channels + offset; output[out_id] = input[data_id]; } } } template __global__ void tin_shift_backward_cuda_kernel( const int nthreads, const T* input, const int* shift, T* output, const int batch_size, const int channels, const int t_size, const int hw_size, const int group_size, const int group_channel) { CUDA_1D_KERNEL_LOOP(index, nthreads) { const int hw_index = index % hw_size; const int j = (index / hw_size) % channels; const int n_index = (index / hw_size / channels) % batch_size; int group_id = j / group_channel; int t_shift = shift[n_index * group_size + group_id]; int offset = n_index * t_size * hw_size * channels + hw_size * j + hw_index; for (int i = 0; i < t_size; i++) { int now_t = i + t_shift; int data_id = i * hw_size * channels + offset; if (now_t < 0 || now_t >= t_size) { continue; } int out_id = now_t * hw_size * channels + offset; output[out_id] = input[data_id]; } } } #endif // TIN_SHIFT_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/voxelization_cuda_kernel.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved. #ifndef VOXELIZATION_CUDA_KERNEL_CUH #define VOXELIZATION_CUDA_KERNEL_CUH #ifdef MMCV_USE_PARROTS #include "parrots_cuda_helper.hpp" #else #include "pytorch_cuda_helper.hpp" #endif typedef enum { SUM = 0, MEAN = 1, MAX = 2 } reduce_t; template __global__ void dynamic_voxelize_kernel( const T* points, T_int* coors, const float voxel_x, const float voxel_y, const float voxel_z, const float coors_x_min, const float coors_y_min, const float coors_z_min, const float coors_x_max, const float coors_y_max, const float coors_z_max, const int grid_x, const int grid_y, const int grid_z, const int num_points, const int num_features, const int NDim) { // const int index = blockIdx.x * threadsPerBlock + threadIdx.x; CUDA_1D_KERNEL_LOOP(index, num_points) { // To save some computation auto points_offset = points + index * num_features; auto coors_offset = coors + index * NDim; int c_x = floorf((points_offset[0] - coors_x_min) / voxel_x); if (c_x < 0 || c_x >= grid_x) { coors_offset[0] = -1; continue; } int c_y = floorf((points_offset[1] - coors_y_min) / voxel_y); if (c_y < 0 || c_y >= grid_y) { coors_offset[0] = -1; coors_offset[1] = -1; continue; } int c_z = floorf((points_offset[2] - coors_z_min) / voxel_z); if (c_z < 0 || c_z >= grid_z) { coors_offset[0] = -1; coors_offset[1] = -1; coors_offset[2] = -1; } else { coors_offset[0] = c_z; coors_offset[1] = c_y; coors_offset[2] = c_x; } } } template __global__ void assign_point_to_voxel(const int nthreads, const T* points, T_int* point_to_voxelidx, T_int* coor_to_voxelidx, T* voxels, const int max_points, const int num_features, const int num_points, const int NDim) { CUDA_1D_KERNEL_LOOP(thread_idx, nthreads) { // const int index = blockIdx.x * threadsPerBlock + threadIdx.x; int index = thread_idx / num_features; int num = point_to_voxelidx[index]; int voxelidx = coor_to_voxelidx[index]; if (num > -1 && voxelidx > -1) { auto voxels_offset = voxels + voxelidx * max_points * num_features + num * num_features; int k = thread_idx % num_features; voxels_offset[k] = points[thread_idx]; } } } template __global__ void assign_voxel_coors(const int nthreads, T_int* coor, T_int* point_to_voxelidx, T_int* coor_to_voxelidx, T_int* voxel_coors, const int num_points, const int NDim) { CUDA_1D_KERNEL_LOOP(thread_idx, nthreads) { // const int index = blockIdx.x * threadsPerBlock + threadIdx.x; // if (index >= num_points) return; int index = thread_idx / NDim; int num = point_to_voxelidx[index]; int voxelidx = coor_to_voxelidx[index]; if (num == 0 && voxelidx > -1) { auto coors_offset = voxel_coors + voxelidx * NDim; int k = thread_idx % NDim; coors_offset[k] = coor[thread_idx]; } } } template __global__ void point_to_voxelidx_kernel(const T_int* coor, T_int* point_to_voxelidx, T_int* point_to_pointidx, const int max_points, const int max_voxels, const int num_points, const int NDim) { CUDA_1D_KERNEL_LOOP(index, num_points) { auto coor_offset = coor + index * NDim; // skip invalid points if (coor_offset[0] == -1) return; int num = 0; int coor_x = coor_offset[0]; int coor_y = coor_offset[1]; int coor_z = coor_offset[2]; // only calculate the coors before this coor[index] for (int i = 0; i < index; ++i) { auto prev_coor = coor + i * NDim; if (prev_coor[0] == -1) continue; // Find all previous points that have the same coors // if find the same coor, record it if ((prev_coor[0] == coor_x) && (prev_coor[1] == coor_y) && (prev_coor[2] == coor_z)) { num++; if (num == 1) { // point to the same coor that first show up point_to_pointidx[index] = i; } else if (num >= max_points) { // out of boundary return; } } } if (num == 0) { point_to_pointidx[index] = index; } if (num < max_points) { point_to_voxelidx[index] = num; } } } template __global__ void determin_voxel_num( // const T_int* coor, T_int* num_points_per_voxel, T_int* point_to_voxelidx, T_int* point_to_pointidx, T_int* coor_to_voxelidx, T_int* voxel_num, const int max_points, const int max_voxels, const int num_points) { // only calculate the coors before this coor[index] for (int i = 0; i < num_points; ++i) { int point_pos_in_voxel = point_to_voxelidx[i]; // record voxel if (point_pos_in_voxel == -1) { // out of max_points or invalid point continue; } else if (point_pos_in_voxel == 0) { // record new voxel int voxelidx = voxel_num[0]; if (voxel_num[0] >= max_voxels) continue; voxel_num[0] += 1; coor_to_voxelidx[i] = voxelidx; num_points_per_voxel[voxelidx] = 1; } else { int point_idx = point_to_pointidx[i]; int voxelidx = coor_to_voxelidx[point_idx]; if (voxelidx != -1) { coor_to_voxelidx[i] = voxelidx; num_points_per_voxel[voxelidx] += 1; } } } } #endif // VOXELIZATION_CUDA_KERNEL_CUH ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/parrots_cpp_helper.hpp ================================================ #ifndef PARROTS_CPP_HELPER #define PARROTS_CPP_HELPER #include #include #include #include #include using namespace parrots; #define PARROTS_PRIVATE_CASE_TYPE(prim_type, type, ...) \ case prim_type: { \ using scalar_t = type; \ return __VA_ARGS__(); \ } #define PARROTS_DISPATCH_FLOATING_TYPES(TYPE, ...) \ [&] { \ const auto& the_type = TYPE; \ switch (the_type) { \ PARROTS_PRIVATE_CASE_TYPE(Prim::Float64, double, __VA_ARGS__) \ PARROTS_PRIVATE_CASE_TYPE(Prim::Float32, float, __VA_ARGS__) \ default: \ PARROTS_NOTSUPPORTED; \ } \ }() #define PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(TYPE, ...) \ [&] { \ const auto& the_type = TYPE; \ switch (the_type) { \ PARROTS_PRIVATE_CASE_TYPE(Prim::Float64, double, __VA_ARGS__) \ PARROTS_PRIVATE_CASE_TYPE(Prim::Float32, float, __VA_ARGS__) \ PARROTS_PRIVATE_CASE_TYPE(Prim::Float16, float16, __VA_ARGS__) \ default: \ PARROTS_NOTSUPPORTED; \ } \ }() #endif // PARROTS_CPP_HELPER ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/parrots_cuda_helper.hpp ================================================ #ifndef PARROTS_CUDA_HELPER #define PARROTS_CUDA_HELPER #include #include #include #include #include #include #include #include #include #include "common_cuda_helper.hpp" #include "parrots_cudawarpfunction.cuh" using namespace parrots; using phalf = float16; #define __PHALF(x) (x.y) #define PARROTS_CUDA_CHECK(exp) \ do { \ cudaError_t err = exp; \ if (err != cudaSuccess) { \ fprintf(stderr, "cudaCheckError() failed : %s\n", \ cudaGetErrorString(err)); \ exit(-1); \ } \ } while (0) #define PARROTS_PRIVATE_CASE_TYPE(prim_type, type, ...) \ case prim_type: { \ using scalar_t = type; \ return __VA_ARGS__(); \ } #define PARROTS_DISPATCH_FLOATING_TYPES(TYPE, ...) \ [&] { \ const auto& the_type = TYPE; \ switch (the_type) { \ PARROTS_PRIVATE_CASE_TYPE(Prim::Float64, double, __VA_ARGS__) \ PARROTS_PRIVATE_CASE_TYPE(Prim::Float32, float, __VA_ARGS__) \ default: \ PARROTS_NOTSUPPORTED; \ } \ }() #define PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(TYPE, ...) \ [&] { \ const auto& the_type = TYPE; \ switch (the_type) { \ PARROTS_PRIVATE_CASE_TYPE(Prim::Float64, double, __VA_ARGS__) \ PARROTS_PRIVATE_CASE_TYPE(Prim::Float32, float, __VA_ARGS__) \ PARROTS_PRIVATE_CASE_TYPE(Prim::Float16, float16, __VA_ARGS__) \ default: \ PARROTS_NOTSUPPORTED; \ } \ }() /** atomicAdd **/ #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600 static __inline__ __device__ double atomicAdd(double* address, double val) { unsigned long long int* address_as_ull = (unsigned long long int*)address; unsigned long long int old = *address_as_ull, assumed; if (val == 0.0) return __longlong_as_double(old); do { assumed = old; old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); } while (assumed != old); return __longlong_as_double(old); } #endif static __inline__ __device__ float16 atomicAdd(float16* address, float16 val) { unsigned int* aligned = (unsigned int*)((size_t)address - ((size_t)address & 2)); unsigned int old = *aligned; unsigned int assumed; unsigned short old_as_us; do { assumed = old; old_as_us = (unsigned short)((size_t)address & 2 ? old >> 16 : old & 0xffff); #if __CUDACC_VER_MAJOR__ >= 9 float16 tmp; tmp.x = old_as_us; float16 sum = tmp + val; unsigned short sum_as_us = sum.x; // half sum = __float2half_rn(__half2float(__ushort_as_half(old_as_us)) // + (float)(val)); unsigned short sum_as_us = __half_as_ushort(sum); #else unsigned short sum_as_us = __float2half_rn(__half2float(old_as_us) + (float)(val)); #endif unsigned int sum_as_ui = (size_t)address & 2 ? (sum_as_us << 16) | (old & 0xffff) : (old & 0xffff0000) | sum_as_us; old = atomicCAS(aligned, assumed, sum_as_ui); } while (assumed != old); //__half_raw raw = {old_as_us}; // return float16(raw); return *reinterpret_cast(&old_as_us); } #endif // PARROTS_CUDA_HELPER ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/pytorch_cpp_helper.hpp ================================================ #ifndef PYTORCH_CPP_HELPER #define PYTORCH_CPP_HELPER #include #include using namespace at; #define CHECK_CUDA(x) \ TORCH_CHECK(x.device().is_cuda(), #x " must be a CUDA tensor") #define CHECK_CPU(x) \ TORCH_CHECK(!x.device().is_cuda(), #x " must be a CPU tensor") #define CHECK_CONTIGUOUS(x) \ TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") #define CHECK_CUDA_INPUT(x) \ CHECK_CUDA(x); \ CHECK_CONTIGUOUS(x) #define CHECK_CPU_INPUT(x) \ CHECK_CPU(x); \ CHECK_CONTIGUOUS(x) #endif // PYTORCH_CPP_HELPER ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/pytorch_cuda_helper.hpp ================================================ #ifndef PYTORCH_CUDA_HELPER #define PYTORCH_CUDA_HELPER #include #include #include #include #include #include "common_cuda_helper.hpp" using at::Half; using at::Tensor; using phalf = at::Half; #define __PHALF(x) (x) #endif // PYTORCH_CUDA_HELPER ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/pytorch_device_registry.hpp ================================================ #ifndef PYTORCH_DEVICE_REGISTRY_H #define PYTORCH_DEVICE_REGISTRY_H // Using is recommended in the official documentation in // https://pytorch.org/tutorials/advanced/cpp_extension.html#writing-the-c-op. // However, we use for compatibility with CUDA 9.0 // Read https://github.com/pytorch/extension-cpp/issues/35 for more details. #include #include #include #include #include inline std::string GetDeviceStr(const at::Device& device) { std::string str = DeviceTypeName(device.type(), true); if (device.has_index()) { str.push_back(':'); str.append(std::to_string(device.index())); } return str; } // Registry template class DeviceRegistry; template class DeviceRegistry { public: using FunctionType = Ret (*)(Args...); static const int MAX_DEVICE_TYPES = int8_t(at::DeviceType::COMPILE_TIME_MAX_DEVICE_TYPES); void Register(at::DeviceType device, FunctionType function) { funcs_[int8_t(device)] = function; } FunctionType Find(at::DeviceType device) const { return funcs_[int8_t(device)]; } static DeviceRegistry& instance() { static DeviceRegistry inst; return inst; } private: DeviceRegistry() { for (size_t i = 0; i < MAX_DEVICE_TYPES; ++i) { funcs_[i] = nullptr; } }; FunctionType funcs_[MAX_DEVICE_TYPES]; }; // get device of first tensor param template , at::Tensor>::value, bool> = true> at::Device GetFirstTensorDevice(T&& t, Args&&... args) { return std::forward(t).device(); } template , at::Tensor>::value, bool> = true> at::Device GetFirstTensorDevice(T&& t, Args&&... args) { return GetFirstTensorDevice(std::forward(args)...); } // check device consistency inline std::pair CheckDeviceConsistency( const at::Device& device, int index) { return {index, device}; } template , at::Tensor>::value, bool> = true> std::pair CheckDeviceConsistency(const at::Device& device, int index, T&& t, Args&&... args); template , at::Tensor>::value, bool> = true> std::pair CheckDeviceConsistency(const at::Device& device, int index, T&& t, Args&&... args) { auto new_device = std::forward(t).device(); if (new_device.type() != device.type() || new_device.index() != device.index()) { return {index, new_device}; } return CheckDeviceConsistency(device, index + 1, std::forward(args)...); } template < typename T, typename... Args, std::enable_if_t, at::Tensor>::value, bool>> std::pair CheckDeviceConsistency(const at::Device& device, int index, T&& t, Args&&... args) { return CheckDeviceConsistency(device, index + 1, std::forward(args)...); } // dispatch template auto Dispatch(const R& registry, const char* name, Args&&... args) { auto device = GetFirstTensorDevice(std::forward(args)...); auto inconsist = CheckDeviceConsistency(device, 0, std::forward(args)...); TORCH_CHECK(inconsist.first >= int(sizeof...(Args)), name, ": at param ", inconsist.first, ", inconsistent device: ", GetDeviceStr(inconsist.second).c_str(), " vs ", GetDeviceStr(device).c_str(), "\n") auto f_ptr = registry.Find(device.type()); TORCH_CHECK(f_ptr != nullptr, name, ": implementation for device ", GetDeviceStr(device).c_str(), " not found.\n") return f_ptr(std::forward(args)...); } // helper macro #define DEVICE_REGISTRY(key) DeviceRegistry::instance() #define REGISTER_DEVICE_IMPL(key, device, value) \ struct key##_##device##_registerer { \ key##_##device##_registerer() { \ DEVICE_REGISTRY(key).Register(at::k##device, value); \ } \ }; \ static key##_##device##_registerer _##key##_##device##_registerer; #define DISPATCH_DEVICE_IMPL(key, ...) \ Dispatch(DEVICE_REGISTRY(key), #key, __VA_ARGS__) #endif // PYTORCH_DEVICE_REGISTRY ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/corner_pool.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef ONNXRUNTIME_CORNER_POOL_H #define ONNXRUNTIME_CORNER_POOL_H #include #include struct MMCVCornerPoolKernel { public: MMCVCornerPoolKernel(Ort::CustomOpApi ort, const OrtKernelInfo* info) : ort_(ort) { mode_ = ort_.KernelInfoGetAttribute(info, "mode"); } void Compute(OrtKernelContext* context); private: Ort::CustomOpApi ort_; int64_t mode_; }; struct MMCVCornerPoolCustomOp : Ort::CustomOpBase { void* CreateKernel(Ort::CustomOpApi api, const OrtKernelInfo* info) const { return new MMCVCornerPoolKernel(api, info); } const char* GetName() const { return "MMCVCornerPool"; } size_t GetInputTypeCount() const { return 1; } ONNXTensorElementDataType GetInputType(size_t) const { return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; } size_t GetOutputTypeCount() const { return 1; } ONNXTensorElementDataType GetOutputType(size_t) const { return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; } // force cpu const char* GetExecutionProviderType() const { return "CPUExecutionProvider"; } }; #endif // ONNXRUNTIME_CORNER_POOL_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/cpu/corner_pool.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "corner_pool.h" #include "../ort_mmcv_utils.h" void TopPoolForwardCPU(const float *input, float *output, const int batch_size, const int channels, const int height, const int width) { for (int n = 0; n < batch_size; n++) { int index_n = n * channels * width * height; for (int c = 0; c < channels; c++) { int index_n_c = index_n + c * width * height; for (int w = 0; w < width; w++) { // directly copy the most bottom value from input to output output[index_n_c + (height - 1) * width + w] = input[index_n_c + (height - 1) * width + w]; // do top_pool for (int h = height - 2; h >= 0; h--) { output[index_n_c + h * width + w] = std::max(output[index_n_c + (h + 1) * width + w], input[index_n_c + h * width + w]); } // for h } // for w } // for c } // for n } void BottomPoolForwardCPU(const float *input, float *output, const int batch_size, const int channels, const int height, const int width) { for (int n = 0; n < batch_size; n++) { int index_n = n * channels * width * height; for (int c = 0; c < channels; c++) { int index_n_c = index_n + c * width * height; for (int w = 0; w < width; w++) { // directly copy the most top value from input to output output[index_n_c + w] = input[index_n_c + w]; // do top_pool for (int h = 1; h < height; h++) { output[index_n_c + h * width + w] = std::max(output[index_n_c + (h - 1) * width + w], input[index_n_c + h * width + w]); } // for h } // for w } // for c } // for n } void LeftPoolForwardCPU(const float *input, float *output, const int batch_size, const int channels, const int height, const int width) { for (int n = 0; n < batch_size; n++) { int index_n = n * channels * width * height; for (int c = 0; c < channels; c++) { int index_n_c = index_n + c * width * height; for (int h = 0; h < height; h++) { // directly copy the most right value from input to output output[index_n_c + h * width + width - 1] = input[index_n_c + h * width + width - 1]; // do left_pool for (int w = width - 2; w >= 0; w--) { output[index_n_c + h * width + w] = std::max(output[index_n_c + h * width + w + 1], input[index_n_c + h * width + w]); } // for w } // for h } // for c } // for n } void RightPoolForwardCPU(const float *input, float *output, const int batch_size, const int channels, const int height, const int width) { for (int n = 0; n < batch_size; n++) { int index_n = n * channels * width * height; for (int c = 0; c < channels; c++) { int index_n_c = index_n + c * width * height; for (int h = 0; h < height; h++) { // directly copy the most left value from input to output output[index_n_c + h * width] = input[index_n_c + h * width]; // do right_pool for (int w = 1; w < width; w++) { output[index_n_c + h * width + w] = std::max(output[index_n_c + h * width + w - 1], input[index_n_c + h * width + w]); } // for w } // for h } // for c } // for n } void MMCVCornerPoolKernel::Compute(OrtKernelContext *context) { const int mode = int(mode_); typedef float T; const OrtValue *input = ort_.KernelContext_GetInput(context, 0); const T *input_data = reinterpret_cast(ort_.GetTensorData(input)); // get output memory OrtTensorDimensions out_dimensions(ort_, input); OrtValue *output = ort_.KernelContext_GetOutput( context, 0, out_dimensions.data(), out_dimensions.size()); T *output_data = ort_.GetTensorMutableData(output); // 'top': 0, 'bottom': 1, 'left': 2, 'right':3 assert(mode == 0 || mode == 1 || mode == 2 || mode == 3); // do corner_pool int batch_size = out_dimensions.data()[0]; int input_channels = out_dimensions.data()[1]; int input_height = out_dimensions.data()[2]; int input_width = out_dimensions.data()[3]; if (mode == 0) TopPoolForwardCPU(input_data, output_data, batch_size, input_channels, input_height, input_width); else if (mode == 1) BottomPoolForwardCPU(input_data, output_data, batch_size, input_channels, input_height, input_width); else if (mode == 2) LeftPoolForwardCPU(input_data, output_data, batch_size, input_channels, input_height, input_width); else RightPoolForwardCPU(input_data, output_data, batch_size, input_channels, input_height, input_width); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/cpu/deform_conv.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "deform_conv.h" #include #include #include "../ort_mmcv_utils.h" void gemm_ref_fp32_deform(const float *A, const float *B, const float *V, const float *H, const int32_t trans_A, const int32_t trans_B, const int32_t M, const int32_t N, const int32_t K, const float alpha, const float beta, float *Y) { if (!trans_A && !trans_B) { // MK, KN; NN for (int64_t m = 0; m < M; ++m) { for (int64_t n = 0; n < N; ++n) { float y = 0.0f; for (int64_t k = 0; k < K; ++k) { y += A[m * K + k] * B[k * N + n]; } y *= alpha; if (V) y += beta * V[n]; if (H) y += beta * H[m * N + n]; Y[m * N + n] = y; } } } if (trans_A && !trans_B) { // KM, KN; TN for (int64_t m = 0; m < M; ++m) { for (int64_t n = 0; n < N; ++n) { float y = 0.0f; for (int64_t k = 0; k < K; ++k) { y += A[k * M + m] * B[k * N + n]; } y *= alpha; if (V) y += beta * V[n]; if (H) y += beta * H[m * N + n]; Y[m * N + n] = y; } } } if (trans_A && trans_B) { // KM, NK; TT for (int64_t m = 0; m < M; ++m) { for (int64_t n = 0; n < N; ++n) { float y = 0.0f; for (int64_t k = 0; k < K; ++k) { y += A[k * M + m] * B[n * K + k]; } y *= alpha; if (V) y += beta * V[n]; if (H) y += beta * H[m * N + n]; Y[m * N + n] = y; } } } if (!trans_A && trans_B) { // MK, NK; NT for (int64_t m = 0; m < M; ++m) { for (int64_t n = 0; n < N; ++n) { float y = 0.0f; for (int64_t k = 0; k < K; ++k) { y += A[m * K + k] * B[n * K + k]; } y *= alpha; if (V) y += beta * V[n]; if (H) y += beta * H[m * N + n]; Y[m * N + n] = y; } } } } float bilinear_interpolate(const float *src, const int64_t src_h, const int64_t src_w, const float h, const float w) { if (h <= -1 || src_h <= h || w <= -1 || src_w <= w) { return 0; } int64_t h_low = floor(h); int64_t w_low = floor(w); int64_t h_high = h_low + 1; int64_t w_high = w_low + 1; float lh = h - h_low; float lw = w - w_low; float hh = 1 - lh; float hw = 1 - lw; float v1 = 0; if (h_low >= 0 && w_low >= 0) v1 = src[h_low * src_w + w_low]; float v2 = 0; if (h_low >= 0 && w_high <= src_w - 1) v2 = src[h_low * src_w + w_high]; float v3 = 0; if (h_high <= src_h - 1 && w_low >= 0) v3 = src[h_high * src_w + w_low]; float v4 = 0; if (h_high <= src_h - 1 && w_high <= src_w - 1) v4 = src[h_high * src_w + w_high]; float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; float val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); return val; } void deformable_im2col(const float *input, const float *offset, const int64_t src_h, const int64_t src_w, const int64_t kernel_h, const int64_t kernel_w, const int64_t pad_h, const int64_t pad_w, const int64_t stride_h, const int64_t stride_w, const int64_t dilation_h, const int64_t dilation_w, const int64_t channels, const int64_t offset_groups, const int64_t dst_h, const int64_t dst_w, float *columns) { const int64_t indices = channels * dst_h * dst_w; for (int64_t index = 0; index != indices; ++index) { const int64_t w_col = index % dst_w; const int64_t h_col = (index / dst_w) % dst_h; const int64_t c_im = index / (dst_w * dst_h); const int64_t c_col = c_im * kernel_h * kernel_w; int64_t c_per_offset_grp = channels / offset_groups; const int64_t grp_idx = c_im / c_per_offset_grp; auto columns_ptr = columns + (c_col * (dst_h * dst_w) + h_col * dst_w + w_col); auto input_ptr = input + c_im * (src_h * src_w); auto offset_ptr = offset + grp_idx * 2 * kernel_h * kernel_w * dst_h * dst_w; for (int64_t kh = 0; kh < kernel_h; ++kh) { for (int64_t kw = 0; kw < kernel_w; ++kw) { const int data_offset_h_ptr = ((2 * (kh * kernel_w + kw)) * dst_h + h_col) * dst_w + w_col; const int data_offset_w_ptr = ((2 * (kh * kernel_w + kw) + 1) * dst_h + h_col) * dst_w + w_col; const float offset_h = offset_ptr[data_offset_h_ptr]; const float offset_w = offset_ptr[data_offset_w_ptr]; const float ih = (h_col * stride_h - pad_h) + kh * dilation_h + offset_h; const float iw = (w_col * stride_w - pad_w) + kw * dilation_w + offset_w; *columns_ptr = bilinear_interpolate(input_ptr, src_h, src_w, ih, iw); columns_ptr += dst_h * dst_w; } } } } void deformable_conv_forward( const float *src, const float *offset, const float *filter, const int64_t batch, const int64_t src_c, const int64_t src_h, const int64_t src_w, const int64_t dst_c, const int64_t dst_h, const int64_t dst_w, const int64_t group, const int64_t offset_group, const int64_t channels, const int64_t num_output, const int64_t kernel_h, const int64_t kernel_w, const int64_t stride_h, const int64_t stride_w, const int64_t pad_h, const int64_t pad_w, const int64_t dilation_h, const int64_t dilation_w, float *columns, float *dst) { const int64_t ic_per_gp = channels / group; const int64_t oc_per_gp = num_output / group; for (int64_t b = 0; b < batch; ++b) { for (int64_t g = 0; g < group; ++g) { deformable_im2col( src + b * src_c * src_h * src_w + g * ic_per_gp * src_h * src_w, offset + b * offset_group * 2 * kernel_h * kernel_w * dst_h * dst_w, src_h, src_w, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, ic_per_gp, offset_group, dst_h, dst_w, columns); float *dst_ptr = dst + b * dst_c * dst_h * dst_w + g * oc_per_gp * dst_h * dst_w; memset(dst_ptr, 0.0f, sizeof(float) * oc_per_gp * dst_h * dst_w); gemm_ref_fp32_deform( filter + g * oc_per_gp * ic_per_gp * kernel_h * kernel_w, columns, nullptr, dst_ptr, 0, 0, oc_per_gp, dst_h * dst_w, ic_per_gp * kernel_h * kernel_w, 1.0f, 1.0f, dst_ptr); } } } MMCVDeformConvKernel::MMCVDeformConvKernel(OrtApi api, const OrtKernelInfo *info) : api_(api), ort_(api_), info_(info) { std::vector stride = ort_.KernelInfoGetAttribute>(info, "stride"); stride_height_ = stride[0]; stride_width_ = stride[1]; std::vector padding = ort_.KernelInfoGetAttribute>(info, "padding"); padding_height_ = padding[0]; padding_width_ = padding[1]; std::vector dilation = ort_.KernelInfoGetAttribute>(info, "dilation"); dilation_height_ = dilation[0]; dilation_width_ = dilation[1]; deformable_group_ = ort_.KernelInfoGetAttribute(info, "deform_groups"); group_ = ort_.KernelInfoGetAttribute(info, "groups"); // create allocator allocator_ = Ort::AllocatorWithDefaultOptions(); } void MMCVDeformConvKernel::Compute(OrtKernelContext *context) { const int64_t stride_height = stride_height_; const int64_t stride_width = stride_width_; const int64_t padding_height = padding_height_; const int64_t padding_width = padding_width_; const int64_t dilation_height = dilation_height_; const int64_t dilation_width = dilation_width_; const int64_t deformable_group = deformable_group_; const int64_t group = group_; const OrtValue *input = ort_.KernelContext_GetInput(context, 0); const float *input_data = reinterpret_cast(ort_.GetTensorData(input)); const OrtValue *offset = ort_.KernelContext_GetInput(context, 1); const float *offset_data = reinterpret_cast(ort_.GetTensorData(offset)); const OrtValue *filter = ort_.KernelContext_GetInput(context, 2); const float *filter_data = reinterpret_cast(ort_.GetTensorData(filter)); OrtTensorDimensions input_dims(ort_, input); OrtTensorDimensions filter_dims(ort_, filter); int64_t batch_size = input_dims[0]; int64_t in_channels = input_dims[1]; int64_t in_height = input_dims[2]; int64_t in_width = input_dims[3]; int64_t out_channels = filter_dims[0]; int64_t kernel_height = filter_dims[2]; int64_t kernel_width = filter_dims[3]; // get output memory int64_t out_height = floor((in_height + 2 * padding_height - dilation_height * (kernel_height - 1) - 1) / stride_height + 1); int64_t out_width = floor( (in_width + 2 * padding_width - dilation_width * (kernel_width - 1) - 1) / stride_width + 1); std::vector output_dims = {batch_size, out_channels, out_height, out_width}; OrtValue *output = ort_.KernelContext_GetOutput( context, 0, output_dims.data(), output_dims.size()); float *out_ptr = ort_.GetTensorMutableData(output); // allocate tmp memory int64_t column_len = (in_channels / group) * kernel_height * kernel_width * out_height * out_width; float *columns = (float *)allocator_.Alloc(sizeof(float) * column_len); deformable_conv_forward( input_data, offset_data, filter_data, batch_size, in_channels, in_height, in_width, out_channels, out_height, out_width, group, deformable_group, in_channels, out_channels, kernel_height, kernel_width, stride_height, stride_width, padding_height, padding_width, dilation_height, dilation_width, columns, out_ptr); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include "../ort_mmcv_utils.h" #include "grid_sample.h" #define MIN(a, b) (((a) < (b)) ? (a) : (b)) #define MAX(a, b) (((a) < (b)) ? (b) : (a)) #define CLIP_COORDINATES(in, out, clip_limit) \ out = MIN((clip_limit - 1), MAX(in, 0)) // modified from // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/GridSampler.cpp GridSampleKernel::GridSampleKernel(OrtApi api, const OrtKernelInfo *info) : api_(api), ort_(api_), info_(info) { align_corners_ = ort_.KernelInfoGetAttribute(info, "align_corners"); interpolation_mode_ = ort_.KernelInfoGetAttribute(info, "interpolation_mode"); padding_mode_ = ort_.KernelInfoGetAttribute(info, "padding_mode"); allocator_ = Ort::AllocatorWithDefaultOptions(); } enum GridSamplerInterpolation { Bilinear = 0, Nearest = 1, Bicubic = 2 }; enum GridSamplerPadding { Zeros = 0, Border = 1, Reflection = 2 }; template static inline scalar_t grid_sampler_unnormalize(scalar_t coord, int64_t size, bool align_corners) { if (align_corners) { return ((coord + 1) / 2) * (size - 1); } else { return ((coord + 1) * size - 1) / 2; } } // Clips coordinates to between 0 and clip_limit - 1 template static inline scalar_t clip_coordinates(scalar_t in, int64_t clip_limit) { return std::min(static_cast(clip_limit - 1), std::max(in, static_cast(0))); } // Reflects coordinates until they fall between low and high (inclusive). // The bounds are passed as twice their value so that half-integer values // can be represented as ints. template static inline scalar_t reflect_coordinates(scalar_t in, int64_t twice_low, int64_t twice_high) { if (twice_low == twice_high) { return static_cast(0); } scalar_t min = static_cast(twice_low) / 2; scalar_t span = static_cast(twice_high - twice_low) / 2; in = std::fabs(in - min); // `fmod` returns same sign as `in`, which is positive after the `fabs` above. scalar_t extra = std::fmod(in, span); int flips = static_cast(std::floor(in / span)); if (flips % 2 == 0) { return extra + min; } else { return span - extra + min; } } template static inline scalar_t compute_coordinates(scalar_t coord, int64_t size, int64_t padding_mode, bool align_corners) { if (padding_mode == GridSamplerPadding::Border) { coord = clip_coordinates(coord, size); } else if (padding_mode == GridSamplerPadding::Reflection) { if (align_corners) { coord = reflect_coordinates(coord, 0, 2 * (size - 1)); } else { coord = reflect_coordinates(coord, -1, 2 * size - 1); } coord = clip_coordinates(coord, size); } return coord; } // Computes the pixel source index value for a grid coordinate template static inline scalar_t grid_sampler_compute_source_index(scalar_t coord, int64_t size, int64_t padding_mode, bool align_corners) { coord = grid_sampler_unnormalize(coord, size, align_corners); coord = compute_coordinates(coord, size, padding_mode, align_corners); return coord; } static inline bool within_bounds_2d(int64_t h, int64_t w, int64_t H, int64_t W) { return h >= 0 && h < H && w >= 0 && w < W; } template static inline scalar_t get_value_bounded(const scalar_t *data, scalar_t x, scalar_t y, int64_t W, int64_t H, int64_t sW, int64_t sH, int64_t padding_mode, bool align_corners) { x = compute_coordinates(x, W, padding_mode, align_corners); y = compute_coordinates(y, H, padding_mode, align_corners); int64_t ix = static_cast(x); int64_t iy = static_cast(y); if (within_bounds_2d(iy, ix, H, W)) { return data[iy * sH + ix * sW]; } return static_cast(0); } template static inline scalar_t cubic_convolution1(scalar_t x, scalar_t A) { return ((A + 2) * x - (A + 3)) * x * x + 1; } template static inline scalar_t cubic_convolution2(scalar_t x, scalar_t A) { return ((A * x - 5 * A) * x + 8 * A) * x - 4 * A; } template static inline void get_cubic_upsample_coefficients(scalar_t coeffs[4], scalar_t t) { scalar_t A = -0.75; scalar_t x1 = t; coeffs[0] = cubic_convolution2(x1 + 1.0, A); coeffs[1] = cubic_convolution1(x1, A); // opposite coefficients scalar_t x2 = 1.0 - t; coeffs[2] = cubic_convolution1(x2, A); coeffs[3] = cubic_convolution2(x2 + 1.0, A); } template static inline scalar_t cubic_interp1d(scalar_t x0, scalar_t x1, scalar_t x2, scalar_t x3, scalar_t t) { scalar_t coeffs[4]; get_cubic_upsample_coefficients(coeffs, t); return x0 * coeffs[0] + x1 * coeffs[1] + x2 * coeffs[2] + x3 * coeffs[3]; } void GridSampleKernel::Compute(OrtKernelContext *context) { const bool align_corners = align_corners_; const int64_t padding_mode = padding_mode_; const int64_t interpolation_mode = interpolation_mode_; const OrtValue *input = ort_.KernelContext_GetInput(context, 0); const float *input_data = reinterpret_cast(ort_.GetTensorData(input)); const OrtValue *grid = ort_.KernelContext_GetInput(context, 1); const float *grid_data = reinterpret_cast(ort_.GetTensorData(grid)); OrtTensorDimensions input_dims(ort_, input); OrtTensorDimensions grid_dims(ort_, grid); int64_t N = input_dims[0]; int64_t C = input_dims[1]; int64_t inp_H = input_dims[2]; int64_t inp_W = input_dims[3]; int64_t out_H = grid_dims[1]; int64_t out_W = grid_dims[2]; std::vector output_dims = {N, C, out_H, out_W}; OrtValue *output = ort_.KernelContext_GetOutput( context, 0, output_dims.data(), output_dims.size()); float *out_ptr = ort_.GetTensorMutableData(output); int64_t inp_sN = input_dims[1] * input_dims[2] * input_dims[3]; int64_t inp_sC = input_dims[2] * input_dims[3]; int64_t inp_sH = input_dims[3]; int64_t inp_sW = 1; int64_t grid_sN = grid_dims[1] * grid_dims[2] * grid_dims[3]; int64_t grid_sH = grid_dims[2] * grid_dims[3]; int64_t grid_sW = grid_dims[3]; int64_t grid_sCoor = 1; int64_t out_sN = output_dims[1] * output_dims[2] * output_dims[3]; int64_t out_sC = output_dims[2] * output_dims[3]; int64_t out_sH = output_dims[3]; int64_t out_sW = 1; // loop over each output pixel for (int64_t n = 0; n < N; ++n) { const float *grid_ptr_N = grid_data + n * grid_sN; const float *inp_ptr_N = input_data + n * inp_sN; for (int64_t h = 0; h < out_H; ++h) { for (int64_t w = 0; w < out_W; ++w) { const float *grid_ptr_NHW = grid_ptr_N + h * grid_sH + w * grid_sW; float x = *grid_ptr_NHW; float y = grid_ptr_NHW[grid_sCoor]; float ix = grid_sampler_compute_source_index(x, inp_W, padding_mode, align_corners); float iy = grid_sampler_compute_source_index(y, inp_H, padding_mode, align_corners); if (interpolation_mode == GridSamplerInterpolation::Bilinear) { // get corner pixel values from (x, y) // for 4d, we use north-east-south-west int64_t ix_nw = static_cast(std::floor(ix)); int64_t iy_nw = static_cast(std::floor(iy)); int64_t ix_ne = ix_nw + 1; int64_t iy_ne = iy_nw; int64_t ix_sw = ix_nw; int64_t iy_sw = iy_nw + 1; int64_t ix_se = ix_nw + 1; int64_t iy_se = iy_nw + 1; // get surfaces to each neighbor: float nw = (ix_se - ix) * (iy_se - iy); float ne = (ix - ix_sw) * (iy_sw - iy); float sw = (ix_ne - ix) * (iy - iy_ne); float se = (ix - ix_nw) * (iy - iy_nw); // calculate bilinear weighted pixel value and set output pixel const float *inp_ptr_NC = inp_ptr_N; float *out_ptr_NCHW = out_ptr + n * out_sN + h * out_sH + w * out_sW; for (int64_t c = 0; c < C; ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) { auto res = static_cast(0); if (within_bounds_2d(iy_nw, ix_nw, inp_H, inp_W)) { res += inp_ptr_NC[iy_nw * inp_sH + ix_nw * inp_sW] * nw; } if (within_bounds_2d(iy_ne, ix_ne, inp_H, inp_W)) { res += inp_ptr_NC[iy_ne * inp_sH + ix_ne * inp_sW] * ne; } if (within_bounds_2d(iy_sw, ix_sw, inp_H, inp_W)) { res += inp_ptr_NC[iy_sw * inp_sH + ix_sw * inp_sW] * sw; } if (within_bounds_2d(iy_se, ix_se, inp_H, inp_W)) { res += inp_ptr_NC[iy_se * inp_sH + ix_se * inp_sW] * se; } *out_ptr_NCHW = res; } } else if (interpolation_mode == GridSamplerInterpolation::Nearest) { int64_t ix_nearest = static_cast(std::nearbyint(ix)); int64_t iy_nearest = static_cast(std::nearbyint(iy)); // assign nearest neighbor pixel value to output pixel float *out_ptr_NCHW = out_ptr + n * out_sN + h * out_sH + w * out_sW; const float *inp_ptr_NC = inp_ptr_N; for (int64_t c = 0; c < C; ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) { if (within_bounds_2d(iy_nearest, ix_nearest, inp_H, inp_W)) { *out_ptr_NCHW = inp_ptr_NC[iy_nearest * inp_sH + ix_nearest * inp_sW]; } else { *out_ptr_NCHW = static_cast(0); } } } else if (interpolation_mode == GridSamplerInterpolation::Bicubic) { // grid_sampler_compute_source_index will "clip the value" of idx // depends on the padding, // which would cause calculation to be wrong, // for example x = -0.1 -> ix = 0 for zero padding, but in bicubic ix // = floor(x) = -1 // There would be more problem in reflection padding, since the -1 and // +1 direction is not fixed in boundary condition ix = grid_sampler_unnormalize(x, inp_W, align_corners); iy = grid_sampler_unnormalize(y, inp_H, align_corners); float ix_nw = std::floor(ix); float iy_nw = std::floor(iy); const float tx = ix - ix_nw; const float ty = iy - iy_nw; const float *inp_ptr_NC = inp_ptr_N; float *out_ptr_NCHW = out_ptr + n * out_sN + h * out_sH + w * out_sW; for (int64_t c = 0; c < C; ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) { float coefficients[4]; // Interpolate 4 values in the x direction for (int64_t i = 0; i < 4; ++i) { coefficients[i] = cubic_interp1d( get_value_bounded(inp_ptr_NC, ix_nw - 1, iy_nw - 1 + i, inp_W, inp_H, inp_sW, inp_sH, padding_mode, align_corners), get_value_bounded(inp_ptr_NC, ix_nw + 0, iy_nw - 1 + i, inp_W, inp_H, inp_sW, inp_sH, padding_mode, align_corners), get_value_bounded(inp_ptr_NC, ix_nw + 1, iy_nw - 1 + i, inp_W, inp_H, inp_sW, inp_sH, padding_mode, align_corners), get_value_bounded(inp_ptr_NC, ix_nw + 2, iy_nw - 1 + i, inp_W, inp_H, inp_sW, inp_sH, padding_mode, align_corners), tx); } // Interpolate in the y direction *out_ptr_NCHW = cubic_interp1d(coefficients[0], coefficients[1], coefficients[2], coefficients[3], ty); } } } } } } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/cpu/modulated_deform_conv.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "modulated_deform_conv.h" #include #include #include "../ort_mmcv_utils.h" float bilinear_interpolate_2d(const float *src, const int64_t src_h, const int64_t src_w, const float h, const float w) { if (h <= -1 || src_h <= h || w <= -1 || src_w <= w) { return 0; } int64_t h_low = floor(h); int64_t w_low = floor(w); int64_t h_high = h_low + 1; int64_t w_high = w_low + 1; float lh = h - h_low; float lw = w - w_low; float hh = 1 - lh; float hw = 1 - lw; float v1 = 0; if (h_low >= 0 && w_low >= 0) v1 = src[h_low * src_w + w_low]; float v2 = 0; if (h_low >= 0 && w_high <= src_w - 1) v2 = src[h_low * src_w + w_high]; float v3 = 0; if (h_high <= src_h - 1 && w_low >= 0) v3 = src[h_high * src_w + w_low]; float v4 = 0; if (h_high <= src_h - 1 && w_high <= src_w - 1) v4 = src[h_high * src_w + w_high]; float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; float val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); return val; } // output: (channels * kernel_h * kernel_w, dst_h * dst_w) void deformable_im2col_2d(const float *input, const float *offset, const float *mask, const int64_t src_h, const int64_t src_w, const int64_t kernel_h, const int64_t kernel_w, const int64_t pad_h, const int64_t pad_w, const int64_t stride_h, const int64_t stride_w, const int64_t dilation_h, const int64_t dilation_w, const int64_t channels, const int64_t offset_groups, const int64_t dst_h, const int64_t dst_w, const bool use_mask, float *columns) { const int64_t workload = channels * dst_h * dst_w; for (int64_t index = 0; index != workload; ++index) { const int64_t ow = index % dst_w; const int64_t oh = (index / dst_w) % dst_h; const int64_t ic = index / (dst_w * dst_h); const int64_t oc = ic * kernel_h * kernel_w; int64_t c_per_offset_grp = channels / offset_groups; const int64_t grp_idx = ic / c_per_offset_grp; auto columns_ptr = columns + (oc * (dst_h * dst_w) + oh * dst_w + ow); auto input_ptr = input + ic * (src_h * src_w); auto offset_ptr = offset + grp_idx * 2 * kernel_h * kernel_w * dst_h * dst_w; auto mask_ptr = mask; if (use_mask) { mask_ptr += grp_idx * kernel_h * kernel_w * dst_h * dst_w; } for (int64_t kh = 0; kh < kernel_h; ++kh) { for (int64_t kw = 0; kw < kernel_w; ++kw) { const int64_t mask_idx = kh * kernel_w + kw; const int64_t offset_idx = 2 * mask_idx; float mask_value = 1; if (use_mask) { mask_value = mask_ptr[mask_idx * (dst_h * dst_w) + oh * dst_w + ow]; } const float offset_h = offset_ptr[offset_idx * (dst_h * dst_w) + oh * dst_w + ow]; const float offset_w = offset_ptr[(offset_idx + 1) * (dst_h * dst_w) + oh * dst_w + ow]; const float ih = (oh * stride_h - pad_h) + kh * dilation_h + offset_h; const float iw = (ow * stride_w - pad_w) + kw * dilation_w + offset_w; *columns_ptr = mask_value * bilinear_interpolate_2d(input_ptr, src_h, src_w, ih, iw); columns_ptr += dst_h * dst_w; } } } } void gemm_ref_fp32(const float *A, const float *B, const float *V, const float *H, const int32_t trans_A, const int32_t trans_B, const int32_t M, const int32_t N, const int32_t K, const float alpha, const float beta, float *Y) { if (!trans_A && !trans_B) { // MK, KN; NN for (int64_t m = 0; m < M; ++m) { for (int64_t n = 0; n < N; ++n) { float y = 0.0f; for (int64_t k = 0; k < K; ++k) { y += A[m * K + k] * B[k * N + n]; } y *= alpha; if (V) y += beta * V[n]; if (H) y += beta * H[m * N + n]; Y[m * N + n] = y; } } } if (trans_A && !trans_B) { // KM, KN; TN for (int64_t m = 0; m < M; ++m) { for (int64_t n = 0; n < N; ++n) { float y = 0.0f; for (int64_t k = 0; k < K; ++k) { y += A[k * M + m] * B[k * N + n]; } y *= alpha; if (V) y += beta * V[n]; if (H) y += beta * H[m * N + n]; Y[m * N + n] = y; } } } if (trans_A && trans_B) { // KM, NK; TT for (int64_t m = 0; m < M; ++m) { for (int64_t n = 0; n < N; ++n) { float y = 0.0f; for (int64_t k = 0; k < K; ++k) { y += A[k * M + m] * B[n * K + k]; } y *= alpha; if (V) y += beta * V[n]; if (H) y += beta * H[m * N + n]; Y[m * N + n] = y; } } } if (!trans_A && trans_B) { // MK, NK; NT for (int64_t m = 0; m < M; ++m) { for (int64_t n = 0; n < N; ++n) { float y = 0.0f; for (int64_t k = 0; k < K; ++k) { y += A[m * K + k] * B[n * K + k]; } y *= alpha; if (V) y += beta * V[n]; if (H) y += beta * H[m * N + n]; Y[m * N + n] = y; } } } } void deformable_conv2d_ref_fp32( const float *src, const float *offset, const float *mask, const float *filter, const float *bias, const int64_t batch, const int64_t src_c, const int64_t src_h, const int64_t src_w, const int64_t dst_c, const int64_t dst_h, const int64_t dst_w, const int64_t group, const int64_t offset_group, const int64_t channels, const int64_t num_output, const int64_t kernel_h, const int64_t kernel_w, const int64_t stride_h, const int64_t stride_w, const int64_t pad_h, const int64_t pad_w, const int64_t dilation_h, const int64_t dilation_w, float *columns, float *dst) { const int64_t ic_per_gp = channels / group; const int64_t oc_per_gp = num_output / group; for (int64_t b = 0; b < batch; ++b) { for (int64_t g = 0; g < group; ++g) { deformable_im2col_2d( src + b * src_c * src_h * src_w + g * ic_per_gp * src_h * src_w, offset + b * offset_group * 2 * kernel_h * kernel_w * dst_h * dst_w, mask + b * offset_group * kernel_h * kernel_w * dst_h * dst_w, src_h, src_w, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, ic_per_gp, offset_group, dst_h, dst_w, mask != nullptr, columns); float *dst_ptr = dst + b * dst_c * dst_h * dst_w + g * oc_per_gp * dst_h * dst_w; if (bias != nullptr) { const float *bias_ptr = bias + g * oc_per_gp; for (int64_t oc = 0; oc < oc_per_gp; ++oc) { for (int64_t hw = 0; hw < dst_h * dst_w; ++hw) { dst_ptr[oc * dst_h * dst_w + hw] = bias_ptr[oc]; } } } else { memset(dst_ptr, 0.0f, sizeof(float) * oc_per_gp * dst_h * dst_w); } gemm_ref_fp32(filter + g * oc_per_gp * ic_per_gp * kernel_h * kernel_w, columns, nullptr, dst_ptr, 0, 0, oc_per_gp, dst_h * dst_w, ic_per_gp * kernel_h * kernel_w, 1.0f, 1.0f, dst_ptr); } } } MMCVModulatedDeformConvKernel::MMCVModulatedDeformConvKernel( OrtApi api, const OrtKernelInfo *info) : api_(api), ort_(api_), info_(info) { std::vector stride = ort_.KernelInfoGetAttribute>(info, "stride"); stride_height_ = stride[0]; stride_width_ = stride[1]; std::vector padding = ort_.KernelInfoGetAttribute>(info, "padding"); padding_height_ = padding[0]; padding_width_ = padding[1]; std::vector dilation = ort_.KernelInfoGetAttribute>(info, "dilation"); dilation_height_ = dilation[0]; dilation_width_ = dilation[1]; deformable_group_ = ort_.KernelInfoGetAttribute(info, "deform_groups"); group_ = ort_.KernelInfoGetAttribute(info, "groups"); // create allocator allocator_ = Ort::AllocatorWithDefaultOptions(); } void MMCVModulatedDeformConvKernel::Compute(OrtKernelContext *context) { const int64_t stride_height = stride_height_; const int64_t stride_width = stride_width_; const int64_t padding_height = padding_height_; const int64_t padding_width = padding_width_; const int64_t dilation_height = dilation_height_; const int64_t dilation_width = dilation_width_; const int64_t deformable_group = deformable_group_; const int64_t group = group_; const OrtValue *input = ort_.KernelContext_GetInput(context, 0); const float *input_data = reinterpret_cast(ort_.GetTensorData(input)); const OrtValue *offset = ort_.KernelContext_GetInput(context, 1); const float *offset_data = reinterpret_cast(ort_.GetTensorData(offset)); const OrtValue *mask = ort_.KernelContext_GetInput(context, 2); const float *mask_data = reinterpret_cast(ort_.GetTensorData(mask)); const OrtValue *filter = ort_.KernelContext_GetInput(context, 3); const float *filter_data = reinterpret_cast(ort_.GetTensorData(filter)); const OrtValue *bias = ort_.KernelContext_GetInput(context, 4); const float *bias_data = (bias != nullptr) ? reinterpret_cast(ort_.GetTensorData(bias)) : nullptr; // const float *bias_data = nullptr; OrtTensorDimensions input_dims(ort_, input); OrtTensorDimensions filter_dims(ort_, filter); int64_t batch = input_dims[0]; int64_t channels = input_dims[1]; int64_t in_height = input_dims[2]; int64_t in_width = input_dims[3]; int64_t num_output = filter_dims[0]; int64_t kernel_height = filter_dims[2]; int64_t kernel_width = filter_dims[3]; // get output memory int64_t out_height = floor((in_height + 2 * padding_height - dilation_height * (kernel_height - 1) - 1) / stride_height + 1); int64_t out_width = floor( (in_width + 2 * padding_width - dilation_width * (kernel_width - 1) - 1) / stride_width + 1); std::vector output_dims = {batch, num_output, out_height, out_width}; OrtValue *output = ort_.KernelContext_GetOutput( context, 0, output_dims.data(), output_dims.size()); float *out_ptr = ort_.GetTensorMutableData(output); // allocate tmp memory int64_t column_len = (channels / group) * kernel_height * kernel_width * out_height * out_width; float *columns = (float *)allocator_.Alloc(sizeof(float) * column_len); deformable_conv2d_ref_fp32( input_data, offset_data, mask_data, filter_data, bias_data, batch, channels, in_height, in_width, num_output, out_height, out_width, group, deformable_group, channels, num_output, kernel_height, kernel_width, stride_height, stride_width, padding_height, padding_width, dilation_height, dilation_width, columns, out_ptr); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/cpu/nms.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "nms.h" #include #include #include #include #include #include // std::iota #include #include "../ort_mmcv_utils.h" NmsKernel::NmsKernel(OrtApi api, const OrtKernelInfo *info) : api_(api), ort_(api_), info_(info) { iou_threshold_ = ort_.KernelInfoGetAttribute(info, "iou_threshold"); offset_ = ort_.KernelInfoGetAttribute(info, "offset"); // create allocator allocator_ = Ort::AllocatorWithDefaultOptions(); } void NmsKernel::Compute(OrtKernelContext *context) { const float iou_threshold = iou_threshold_; const int64_t offset = offset_; const OrtValue *boxes = ort_.KernelContext_GetInput(context, 0); const float *boxes_data = reinterpret_cast(ort_.GetTensorData(boxes)); const OrtValue *scores = ort_.KernelContext_GetInput(context, 1); const float *scores_data = reinterpret_cast(ort_.GetTensorData(scores)); OrtTensorDimensions boxes_dim(ort_, boxes); OrtTensorDimensions scores_dim(ort_, scores); int64_t nboxes = boxes_dim[0]; assert(boxes_dim[1] == 4); // allocate tmp memory float *tmp_boxes = (float *)allocator_.Alloc(sizeof(float) * nboxes * 4); float *sc = (float *)allocator_.Alloc(sizeof(float) * nboxes); float *areas = (float *)allocator_.Alloc(sizeof(float) * nboxes); bool *select = (bool *)allocator_.Alloc(sizeof(bool) * nboxes); for (int64_t i = 0; i < nboxes; i++) { select[i] = true; } memcpy(tmp_boxes, boxes_data, sizeof(float) * nboxes * 4); memcpy(sc, scores_data, sizeof(float) * nboxes); // sort scores std::vector tmp_sc; for (int i = 0; i < nboxes; i++) { tmp_sc.push_back(sc[i]); } std::vector order(tmp_sc.size()); std::iota(order.begin(), order.end(), 0); std::sort(order.begin(), order.end(), [&tmp_sc](int64_t id1, int64_t id2) { return tmp_sc[id1] > tmp_sc[id2]; }); // area = (x2 - x1 + offset) * (y2 - y1 + offset) for (int64_t i = 0; i < nboxes; i++) { areas[i] = (tmp_boxes[i * 4 + 2] - tmp_boxes[i * 4 + 0] + offset) * (tmp_boxes[i * 4 + 3] - tmp_boxes[i * 4 + 1] + offset); } for (int64_t _i = 0; _i < nboxes; _i++) { if (select[_i] == false) continue; auto i = order[_i]; auto ix1 = tmp_boxes[i * 4 + 0]; auto iy1 = tmp_boxes[i * 4 + 1]; auto ix2 = tmp_boxes[i * 4 + 2]; auto iy2 = tmp_boxes[i * 4 + 3]; auto iarea = areas[i]; for (int64_t _j = _i + 1; _j < nboxes; _j++) { if (select[_j] == false) continue; auto j = order[_j]; auto xx1 = std::max(ix1, tmp_boxes[j * 4 + 0]); auto yy1 = std::max(iy1, tmp_boxes[j * 4 + 1]); auto xx2 = std::min(ix2, tmp_boxes[j * 4 + 2]); auto yy2 = std::min(iy2, tmp_boxes[j * 4 + 3]); auto w = std::max(0.f, xx2 - xx1 + offset); auto h = std::max(0.f, yy2 - yy1 + offset); auto inter = w * h; auto ovr = inter / (iarea + areas[j] - inter); if (ovr > iou_threshold) select[_j] = false; } } std::vector res_order; for (int i = 0; i < nboxes; i++) { if (select[i]) { res_order.push_back(order[i]); } } std::vector inds_dims({res_order.size()}); OrtValue *res = ort_.KernelContext_GetOutput(context, 0, inds_dims.data(), inds_dims.size()); int64_t *res_data = ort_.GetTensorMutableData(res); memcpy(res_data, res_order.data(), sizeof(int64_t) * res_order.size()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "onnxruntime_register.h" #include "corner_pool.h" #include "deform_conv.h" #include "grid_sample.h" #include "modulated_deform_conv.h" #include "nms.h" #include "ort_mmcv_utils.h" #include "reduce_ops.h" #include "roi_align.h" #include "roi_align_rotated.h" #include "soft_nms.h" const char *c_MMCVOpDomain = "mmcv"; SoftNmsOp c_SoftNmsOp; NmsOp c_NmsOp; MMCVRoiAlignCustomOp c_MMCVRoiAlignCustomOp; MMCVRoIAlignRotatedCustomOp c_MMCVRoIAlignRotatedCustomOp; GridSampleOp c_GridSampleOp; MMCVCumMaxCustomOp c_MMCVCumMaxCustomOp; MMCVCumMinCustomOp c_MMCVCumMinCustomOp; MMCVCornerPoolCustomOp c_MMCVCornerPoolCustomOp; MMCVModulatedDeformConvOp c_MMCVModulatedDeformConvOp; MMCVDeformConvOp c_MMCVDeformConvOp; OrtStatus *ORT_API_CALL RegisterCustomOps(OrtSessionOptions *options, const OrtApiBase *api) { OrtCustomOpDomain *domain = nullptr; const OrtApi *ortApi = api->GetApi(ORT_API_VERSION); if (auto status = ortApi->CreateCustomOpDomain(c_MMCVOpDomain, &domain)) { return status; } if (auto status = ortApi->CustomOpDomain_Add(domain, &c_SoftNmsOp)) { return status; } if (auto status = ortApi->CustomOpDomain_Add(domain, &c_NmsOp)) { return status; } if (auto status = ortApi->CustomOpDomain_Add(domain, &c_MMCVRoiAlignCustomOp)) { return status; } if (auto status = ortApi->CustomOpDomain_Add(domain, &c_MMCVRoIAlignRotatedCustomOp)) { return status; } if (auto status = ortApi->CustomOpDomain_Add(domain, &c_GridSampleOp)) { return status; } if (auto status = ortApi->CustomOpDomain_Add(domain, &c_MMCVCornerPoolCustomOp)) { return status; } if (auto status = ortApi->CustomOpDomain_Add(domain, &c_MMCVCumMaxCustomOp)) { return status; } if (auto status = ortApi->CustomOpDomain_Add(domain, &c_MMCVCumMinCustomOp)) { return status; } if (auto status = ortApi->CustomOpDomain_Add(domain, &c_MMCVModulatedDeformConvOp)) { return status; } if (auto status = ortApi->CustomOpDomain_Add(domain, &c_MMCVDeformConvOp)) { return status; } return ortApi->AddCustomOpDomain(options, domain); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/cpu/reduce_ops.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "reduce_ops.h" #include #include #include "../ort_mmcv_utils.h" // modified from // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/ReduceOps.cpp static inline int64_t maybe_wrap_dim(int64_t dim, int64_t ndims) { int64_t min = -ndims; int64_t max = ndims - 1; assert(dim >= min && dim <= max); if (dim < 0) dim += ndims; return dim; } static inline int64_t get_dim_stride(const int64_t dim, const int64_t ndims, const int64_t *reversed_dim_cumprod) { return dim == ndims - 1 ? 1 : reversed_dim_cumprod[dim + 1]; } static inline int64_t get_dim_size(const int64_t dim, const int64_t ndims, const int64_t *reversed_dim_cumprod) { return dim == ndims - 1 ? reversed_dim_cumprod[dim] : reversed_dim_cumprod[dim] / reversed_dim_cumprod[dim + 1]; } template void cummax_cummin_helper(const T1 *input, T1 *output, T2 *indices, const int64_t input_dim_size, const int64_t stride) { Operation op; T1 out = input[0]; int64_t idx = 0; for (int64_t i = 0; i < input_dim_size; i++) { T1 curr_elem = input[i * stride]; if (op(curr_elem, out)) { out = curr_elem; idx = i; } output[i * stride] = out; indices[i * stride] = idx; } } // modified `tensor_dim_apply3` from // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/TensorDimApply.h. // the difference is that: (1) use `reversed_dim_cumprod` for fast computing of // tensor `size` and `stride`. (2) the same `stride` is used for input, output, // and indices, since it's unnecessary to use separate values. currently // `tensor_dim_apply3` is only used for `cummax` and `cummin`, according to the // official pytorch projects: https://github.com/pytorch/pytorch. template void tensor_dim_apply3(const T1 *input, T1 *output, T2 *indices, const int64_t dim, const int64_t ndims, const int64_t *reversed_dim_cumprod, Function func) { int dim_apply_finished = 0; int64_t input_dim_size = get_dim_size(dim, ndims, reversed_dim_cumprod); // the same stride is used for input, output and indices int64_t stride = get_dim_stride(dim, ndims, reversed_dim_cumprod); std::vector counter(ndims, 0); while (!dim_apply_finished) { // call `func` once to update output and indices func(input, output, indices, input_dim_size, stride); if (ndims == 1) break; for (int64_t dim_i = 0; dim_i < ndims; dim_i++) { if (dim_i == dim) { if (dim_i == (ndims - 1)) { dim_apply_finished = 1; break; } continue; } counter[dim_i]++; // the same stride is used for input, output, and indices int64_t stride_dim_i = get_dim_stride(dim_i, ndims, reversed_dim_cumprod); input += stride_dim_i; output += stride_dim_i; indices += stride_dim_i; if (counter[dim_i] == get_dim_size(dim_i, ndims, reversed_dim_cumprod)) { if (dim_i == ndims - 1) { dim_apply_finished = 1; break; } else { input -= counter[dim_i] * stride_dim_i; output -= counter[dim_i] * stride_dim_i; indices -= counter[dim_i] * stride_dim_i; counter[dim_i] = 0; } } else { break; } // if } // for } // while } template void CumMax_CumMin_CPU(const T1 *input, T1 *output, T2 *indices, int64_t *reversed_dim_cumprod, const int64_t dim, const OrtTensorDimensions &out_dimensions) { // calculate numel const int64_t ndims = out_dimensions.size(); int64_t numel = 1; for (int64_t dim_i = 0; dim_i < ndims; dim_i++) { numel *= out_dimensions.data()[dim_i]; } // cummax is only applied to input which is non-zero dim and non-empty if (numel) { // compute the cumulative production on dimension size, // which is then used for computing the stride or size of a specific `dim`. reversed_dim_cumprod[ndims - 1] = out_dimensions.data()[ndims - 1]; for (int64_t dim_i = ndims - 2; dim_i >= 0; dim_i--) { reversed_dim_cumprod[dim_i] = reversed_dim_cumprod[dim_i + 1] * out_dimensions.data()[dim_i]; } // do cummax or cummin based on `Operation` type tensor_dim_apply3( input, output, indices, dim, ndims, reversed_dim_cumprod, cummax_cummin_helper); } } void MMCVCumMaxKernel::Compute(OrtKernelContext *context) { // get input const OrtValue *input = ort_.KernelContext_GetInput(context, 0); const float *input_data = reinterpret_cast(ort_.GetTensorData(input)); // get output OrtTensorDimensions out_dimensions(ort_, input); OrtValue *output = ort_.KernelContext_GetOutput( context, 0, out_dimensions.data(), out_dimensions.size()); float *output_data = ort_.GetTensorMutableData(output); OrtValue *indices = ort_.KernelContext_GetOutput( context, 1, out_dimensions.data(), out_dimensions.size()); int64_t *indices_data = ort_.GetTensorMutableData(indices); // allocate tmp memory for computing the cumulative production on dimension // size const int64_t ndims = out_dimensions.size(); assert(ndims > 0); int64_t *reversed_dim_cumprod = (int64_t *)allocator_.Alloc(sizeof(int64_t) * ndims); // dim should be wrapped if it's negative (e.g. -1) const int64_t dim = maybe_wrap_dim(dim_, ndims); CumMax_CumMin_CPU>( input_data, output_data, indices_data, reversed_dim_cumprod, dim, out_dimensions); } void MMCVCumMinKernel::Compute(OrtKernelContext *context) { // get input const OrtValue *input = ort_.KernelContext_GetInput(context, 0); const float *input_data = reinterpret_cast(ort_.GetTensorData(input)); // get output OrtTensorDimensions out_dimensions(ort_, input); OrtValue *output = ort_.KernelContext_GetOutput( context, 0, out_dimensions.data(), out_dimensions.size()); float *output_data = ort_.GetTensorMutableData(output); OrtValue *indices = ort_.KernelContext_GetOutput( context, 1, out_dimensions.data(), out_dimensions.size()); int64_t *indices_data = ort_.GetTensorMutableData(indices); // allocate tmp memory for computing the cumulative production on dimension // size const int64_t ndims = out_dimensions.size(); assert(ndims > 0); int64_t *reversed_dim_cumprod = (int64_t *)allocator_.Alloc(sizeof(int64_t) * ndims); // dim should be wrapped if it's negative (e.g. -1) const int64_t dim = maybe_wrap_dim(dim_, ndims); CumMax_CumMin_CPU>( input_data, output_data, indices_data, reversed_dim_cumprod, dim, out_dimensions); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/cpu/roi_align.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "roi_align.h" #include "../ort_mmcv_utils.h" // implementation taken from Caffe2 struct PreCalc { int pos1; int pos2; int pos3; int pos4; float w1; float w2; float w3; float w4; }; void pre_calc_for_bilinear_interpolate( const int height, const int width, const int pooled_height, const int pooled_width, const int iy_upper, const int ix_upper, float roi_start_h, float roi_start_w, float bin_size_h, float bin_size_w, int roi_bin_grid_h, int roi_bin_grid_w, std::vector &pre_calc) { int pre_calc_index = 0; for (int ph = 0; ph < pooled_height; ph++) { for (int pw = 0; pw < pooled_width; pw++) { for (int iy = 0; iy < iy_upper; iy++) { const float yy = roi_start_h + ph * bin_size_h + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 for (int ix = 0; ix < ix_upper; ix++) { const float xx = roi_start_w + pw * bin_size_w + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); float x = xx; float y = yy; // deal with: inverse elements are out of feature map boundary if (y < -1.0 || y > height || x < -1.0 || x > width) { // empty PreCalc pc; pc.pos1 = 0; pc.pos2 = 0; pc.pos3 = 0; pc.pos4 = 0; pc.w1 = 0; pc.w2 = 0; pc.w3 = 0; pc.w4 = 0; pre_calc[pre_calc_index] = pc; pre_calc_index += 1; continue; } if (y <= 0) { y = 0; } if (x <= 0) { x = 0; } int y_low = (int)y; int x_low = (int)x; int y_high; int x_high; if (y_low >= height - 1) { y_high = y_low = height - 1; y = (float)y_low; } else { y_high = y_low + 1; } if (x_low >= width - 1) { x_high = x_low = width - 1; x = (float)x_low; } else { x_high = x_low + 1; } float ly = y - y_low; float lx = x - x_low; float hy = 1. - ly, hx = 1. - lx; float w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; // save weights and indices PreCalc pc; pc.pos1 = y_low * width + x_low; pc.pos2 = y_low * width + x_high; pc.pos3 = y_high * width + x_low; pc.pos4 = y_high * width + x_high; pc.w1 = w1; pc.w2 = w2; pc.w3 = w3; pc.w4 = w4; pre_calc[pre_calc_index] = pc; pre_calc_index += 1; } } } } } void ROIAlignForwardCPU(const int nthreads, const float *input, const float *rois, float *output, float *argmax_y, float *argmax_x, const int pooled_height, const int pooled_width, const float spatial_scale, const int sampling_ratio, const int pool_mode, // 0 - max pool, 1 - avg pool const bool aligned, const int channels, const int height, const int width) { int n_rois = nthreads / channels / pooled_width / pooled_height; // (n, c, ph, pw) is an element in the pooled output // can be parallelized using omp // #pragma omp parallel for num_threads(32) for (int n = 0; n < n_rois; n++) { int index_n = n * channels * pooled_width * pooled_height; const float *offset_rois = rois + n * 5; int roi_batch_ind = offset_rois[0]; // Do not use rounding; this implementation detail is critical float offset = aligned ? (float)0.5 : (float)0.0; float roi_start_w = offset_rois[1] * spatial_scale - offset; float roi_start_h = offset_rois[2] * spatial_scale - offset; float roi_end_w = offset_rois[3] * spatial_scale - offset; float roi_end_h = offset_rois[4] * spatial_scale - offset; float roi_width = roi_end_w - roi_start_w; float roi_height = roi_end_h - roi_start_h; if (aligned) { /*AT_ASSERTM(roi_width >= 0 && roi_height >= 0, "ROIs in ROIAlign cannot have non-negative size!");*/ assert(roi_width >= 0 && roi_height >= 0); } else { // for backward-compatibility only roi_width = std::max(roi_width, (float)1.); roi_height = std::max(roi_height, (float)1.); } float bin_size_h = static_cast(roi_height) / static_cast(pooled_height); float bin_size_w = static_cast(roi_width) / static_cast(pooled_width); // We use roi_bin_grid to sample the grid and mimic integral int roi_bin_grid_h = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_height / pooled_height); // e.g., = 2 int roi_bin_grid_w = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); // When the grid is empty, output zeros == 0/1, instead of NaN. const float count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4 // we want to precalculate indices and weights shared by all channels, // this is the key point of optimization std::vector pre_calc(roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height); pre_calc_for_bilinear_interpolate( height, width, pooled_height, pooled_width, roi_bin_grid_h, roi_bin_grid_w, roi_start_h, roi_start_w, bin_size_h, bin_size_w, roi_bin_grid_h, roi_bin_grid_w, pre_calc); for (int c = 0; c < channels; c++) { int index_n_c = index_n + c * pooled_width * pooled_height; const float *offset_input = input + (roi_batch_ind * channels + c) * height * width; int pre_calc_index = 0; for (int ph = 0; ph < pooled_height; ph++) { for (int pw = 0; pw < pooled_width; pw++) { int index = index_n_c + ph * pooled_width + pw; float output_val = 0.; float maxval = -10000; float maxidx_y = -1.f, maxidx_x = -1.f; for (int iy = 0; iy < roi_bin_grid_h; iy++) { const float y = roi_start_h + ph * bin_size_h + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); for (int ix = 0; ix < roi_bin_grid_w; ix++) { const float x = roi_start_w + pw * bin_size_w + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); PreCalc pc = pre_calc[pre_calc_index]; float val = pc.w1 * offset_input[pc.pos1] + pc.w2 * offset_input[pc.pos2] + pc.w3 * offset_input[pc.pos3] + pc.w4 * offset_input[pc.pos4]; if (val > maxval) { maxval = val; maxidx_y = y; maxidx_x = x; } output_val += val; pre_calc_index += 1; } } if (pool_mode == 0) { // We do max pooling inside a bin output[index] = maxval; argmax_y[index] = maxidx_y; argmax_x[index] = maxidx_x; } else if (pool_mode == 1) { // We do average (integral) pooling inside a bin output[index] = output_val / count; } // if } // for pw } // for ph } // for c } // for n } void MMCVRoiAlignKernel::Compute(OrtKernelContext *context) { // Setup inputs const OrtValue *input_X = ort_.KernelContext_GetInput(context, 0); const float *X_data = reinterpret_cast(ort_.GetTensorData(input_X)); const OrtValue *input_rois = ort_.KernelContext_GetInput(context, 1); const float *rois = reinterpret_cast( ort_.GetTensorData(input_rois)); // Setup output OrtTensorDimensions out_dimensions(ort_, input_X); OrtTensorDimensions roi_dimensions(ort_, input_rois); int batch_size = out_dimensions.data()[0]; int input_channels = out_dimensions.data()[1]; int input_height = out_dimensions.data()[2]; int input_width = out_dimensions.data()[3]; out_dimensions.data()[0] = roi_dimensions.data()[0]; out_dimensions.data()[2] = aligned_height_; out_dimensions.data()[3] = aligned_width_; OrtValue *output = ort_.KernelContext_GetOutput( context, 0, out_dimensions.data(), out_dimensions.size()); float *out = ort_.GetTensorMutableData(output); OrtTensorTypeAndShapeInfo *output_info = ort_.GetTensorTypeAndShape(output); ort_.ReleaseTensorTypeAndShapeInfo(output_info); // TODO: forward here int output_size = out_dimensions.data()[0]; for (auto i = 1; i < out_dimensions.size(); ++i) { output_size *= out_dimensions.data()[i]; } int poolMod = 1; if (pool_mode_ == "max") poolMod = 0; float *argmax_x = nullptr, *argmax_y = nullptr; if (poolMod == 0) { argmax_y = new float[output_size]; argmax_x = new float[output_size]; } ROIAlignForwardCPU(output_size, X_data, rois, out, argmax_y, argmax_x, aligned_height_, aligned_width_, spatial_scale_, sampling_ratio_, poolMod, aligned_, input_channels, input_height, input_width); if (argmax_x) delete argmax_x; if (argmax_y) delete argmax_y; } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/cpu/roi_align_rotated.cpp ================================================ // Modified from // https://github.com/facebookresearch/detectron2/tree/master/detectron2/layers/csrc/ROIAlignRotated // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved #include "roi_align_rotated.h" #include "../ort_mmcv_utils.h" struct PreCalc { int pos1; int pos2; int pos3; int pos4; float w1; float w2; float w3; float w4; }; void pre_calc_for_bilinear_interpolate( const int height, const int width, const int pooled_height, const int pooled_width, const int iy_upper, const int ix_upper, float roi_start_h, float roi_start_w, float bin_size_h, float bin_size_w, int roi_bin_grid_h, int roi_bin_grid_w, float roi_center_h, float roi_center_w, float cos_theta, float sin_theta, std::vector &pre_calc) { int pre_calc_index = 0; for (int ph = 0; ph < pooled_height; ph++) { for (int pw = 0; pw < pooled_width; pw++) { for (int iy = 0; iy < iy_upper; iy++) { const float yy = roi_start_h + ph * bin_size_h + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 for (int ix = 0; ix < ix_upper; ix++) { const float xx = roi_start_w + pw * bin_size_w + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); // Rotate by theta around the center and translate // In image space, (y, x) is the order for Right Handed System, // and this is essentially multiplying the point by a rotation matrix // to rotate it counterclockwise through angle theta. float y = yy * cos_theta - xx * sin_theta + roi_center_h; float x = yy * sin_theta + xx * cos_theta + roi_center_w; // deal with: inverse elements are out of feature map boundary if (y < -1.0 || y > height || x < -1.0 || x > width) { // empty PreCalc pc; pc.pos1 = 0; pc.pos2 = 0; pc.pos3 = 0; pc.pos4 = 0; pc.w1 = 0; pc.w2 = 0; pc.w3 = 0; pc.w4 = 0; pre_calc[pre_calc_index] = pc; pre_calc_index += 1; continue; } if (y < 0) { y = 0; } if (x < 0) { x = 0; } int y_low = (int)y; int x_low = (int)x; int y_high; int x_high; if (y_low >= height - 1) { y_high = y_low = height - 1; y = (float)y_low; } else { y_high = y_low + 1; } if (x_low >= width - 1) { x_high = x_low = width - 1; x = (float)x_low; } else { x_high = x_low + 1; } float ly = y - y_low; float lx = x - x_low; float hy = 1. - ly, hx = 1. - lx; float w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; // save weights and indices PreCalc pc; pc.pos1 = y_low * width + x_low; pc.pos2 = y_low * width + x_high; pc.pos3 = y_high * width + x_low; pc.pos4 = y_high * width + x_high; pc.w1 = w1; pc.w2 = w2; pc.w3 = w3; pc.w4 = w4; pre_calc[pre_calc_index] = pc; pre_calc_index += 1; } } } } } void ROIAlignRotatedForwardCPU(const int nthreads, const float *input, const float *rois, float *output, const float &spatial_scale, const int aligned, const int clockwise, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const int sampling_ratio) { int n_rois = nthreads / channels / pooled_width / pooled_height; // (n, c, ph, pw) is an element in the pooled output // can be parallelized using omp // #pragma omp parallel for num_threads(32) for (int n = 0; n < n_rois; n++) { int index_n = n * channels * pooled_width * pooled_height; const float *current_roi = rois + n * 6; int roi_batch_ind = current_roi[0]; // Do not use rounding; this implementation detail is critical float offset = aligned ? (float)0.5 : (float)0.0; float roi_center_w = current_roi[1] * spatial_scale - offset; float roi_center_h = current_roi[2] * spatial_scale - offset; float roi_width = current_roi[3] * spatial_scale; float roi_height = current_roi[4] * spatial_scale; // float theta = current_roi[5] * M_PI / 180.0; float theta = current_roi[5]; // Radian angle by default if (clockwise) { theta = -theta; } float cos_theta = cos(theta); float sin_theta = sin(theta); if (!aligned) { // for backward-compatibility only roi_width = std::max(roi_width, (float)1.); roi_height = std::max(roi_height, (float)1.); } float bin_size_h = static_cast(roi_height) / static_cast(pooled_height); float bin_size_w = static_cast(roi_width) / static_cast(pooled_width); // We use roi_bin_grid to sample the grid and mimic integral int roi_bin_grid_h = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_height / pooled_height); // e.g., = 2 int roi_bin_grid_w = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); // We do average (integral) pooling inside a bin const float count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4 // we want to precalculate indices and weights shared by all channels, // this is the key point of optimization std::vector pre_calc(roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height); // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). // Appropriate translation needs to be applied after. float roi_start_h = -roi_height / 2.0; float roi_start_w = -roi_width / 2.0; pre_calc_for_bilinear_interpolate( height, width, pooled_height, pooled_width, roi_bin_grid_h, roi_bin_grid_w, roi_start_h, roi_start_w, bin_size_h, bin_size_w, roi_bin_grid_h, roi_bin_grid_w, roi_center_h, roi_center_w, cos_theta, sin_theta, pre_calc); for (int c = 0; c < channels; c++) { int index_n_c = index_n + c * pooled_width * pooled_height; const float *offset_input = input + (roi_batch_ind * channels + c) * height * width; int pre_calc_index = 0; for (int ph = 0; ph < pooled_height; ph++) { for (int pw = 0; pw < pooled_width; pw++) { int index = index_n_c + ph * pooled_width + pw; float output_val = 0.; for (int iy = 0; iy < roi_bin_grid_h; iy++) { for (int ix = 0; ix < roi_bin_grid_w; ix++) { PreCalc pc = pre_calc[pre_calc_index]; output_val += pc.w1 * offset_input[pc.pos1] + pc.w2 * offset_input[pc.pos2] + pc.w3 * offset_input[pc.pos3] + pc.w4 * offset_input[pc.pos4]; pre_calc_index += 1; } } output_val /= count; output[index] = output_val; } // for pw } // for ph } // for c } // for n } void MMCVRoIAlignRotatedKernel::Compute(OrtKernelContext *context) { // Setup inputs const OrtValue *input_X = ort_.KernelContext_GetInput(context, 0); const float *X_data = reinterpret_cast(ort_.GetTensorData(input_X)); const OrtValue *input_rois = ort_.KernelContext_GetInput(context, 1); const float *rois = reinterpret_cast( ort_.GetTensorData(input_rois)); // Setup output OrtTensorDimensions out_dimensions(ort_, input_X); OrtTensorDimensions roi_dimensions(ort_, input_rois); int batch_size = out_dimensions.data()[0]; int input_channels = out_dimensions.data()[1]; int input_height = out_dimensions.data()[2]; int input_width = out_dimensions.data()[3]; out_dimensions.data()[0] = roi_dimensions.data()[0]; out_dimensions.data()[2] = aligned_height_; out_dimensions.data()[3] = aligned_width_; OrtValue *output = ort_.KernelContext_GetOutput( context, 0, out_dimensions.data(), out_dimensions.size()); float *out = ort_.GetTensorMutableData(output); OrtTensorTypeAndShapeInfo *output_info = ort_.GetTensorTypeAndShape(output); ort_.ReleaseTensorTypeAndShapeInfo(output_info); // TODO: forward here int output_size = out_dimensions.data()[0]; for (auto i = 1; i < out_dimensions.size(); ++i) { output_size *= out_dimensions.data()[i]; } ROIAlignRotatedForwardCPU(output_size, X_data, rois, out, spatial_scale_, aligned_, clockwise_, input_channels, input_height, input_width, aligned_height_, aligned_width_, sampling_ratio_); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/cpu/soft_nms.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "soft_nms.h" #include #include #include #include "../ort_mmcv_utils.h" SoftNmsKernel::SoftNmsKernel(OrtApi api, const OrtKernelInfo *info) : api_(api), ort_(api_), info_(info) { iou_threshold_ = ort_.KernelInfoGetAttribute(info, "iou_threshold"); sigma_ = ort_.KernelInfoGetAttribute(info, "sigma"); min_score_ = ort_.KernelInfoGetAttribute(info, "min_score"); method_ = ort_.KernelInfoGetAttribute(info, "method"); offset_ = ort_.KernelInfoGetAttribute(info, "offset"); // create allocator allocator_ = Ort::AllocatorWithDefaultOptions(); } void SoftNmsKernel::Compute(OrtKernelContext *context) { typedef float T; const T iou_threshold = T(iou_threshold_); const T sigma = T(sigma_); const T min_score = T(min_score_); const int method = int(method_); const T offset = T(offset_); const OrtValue *boxes = ort_.KernelContext_GetInput(context, 0); const T *boxes_data = reinterpret_cast(ort_.GetTensorData(boxes)); const OrtValue *scores = ort_.KernelContext_GetInput(context, 1); const T *scores_data = reinterpret_cast(ort_.GetTensorData(scores)); OrtTensorDimensions boxes_dim(ort_, boxes); OrtTensorDimensions scores_dim(ort_, scores); int64_t nboxes = boxes_dim[0]; assert(boxes_dim[1] == 4); // allocate tmp memory T *tmp_boxes = (T *)allocator_.Alloc(sizeof(T) * nboxes * 4); T *x1 = tmp_boxes; T *y1 = tmp_boxes + 1; T *x2 = tmp_boxes + 2; T *y2 = tmp_boxes + 3; T *sc = (T *)allocator_.Alloc(sizeof(T) * nboxes); T *areas = (T *)allocator_.Alloc(sizeof(T) * nboxes); T *de = (T *)allocator_.Alloc(sizeof(T) * nboxes * 5); int64_t *inds = (int64_t *)allocator_.Alloc(sizeof(int64_t) * nboxes); memcpy(tmp_boxes, boxes_data, sizeof(T) * nboxes * 4); memcpy(sc, scores_data, sizeof(T) * nboxes); // init inds as arange(nboxes) std::generate(inds, inds + nboxes, [n = 0]() mutable { return n++; }); // area = (x2-x1+offset)*(y2-y1+offset) for (int64_t i = 0; i < nboxes; i++) { areas[i] = (x2[i * 4] - x1[i * 4] + offset) * (y2[i * 4] - y1[i * 4] + offset); } int64_t pos = 0; for (int64_t i = 0; i < nboxes; i++) { auto max_score = sc[i]; auto max_pos = i; pos = i + 1; // get max box while (pos < nboxes) { if (max_score < sc[pos]) { max_score = sc[pos]; max_pos = pos; } pos = pos + 1; } // swap auto ix1 = de[i * 5 + 0] = x1[max_pos * 4]; auto iy1 = de[i * 5 + 1] = y1[max_pos * 4]; auto ix2 = de[i * 5 + 2] = x2[max_pos * 4]; auto iy2 = de[i * 5 + 3] = y2[max_pos * 4]; auto iscore = de[i * 5 + 4] = sc[max_pos]; auto iarea = areas[max_pos]; auto iind = inds[max_pos]; x1[max_pos * 4] = x1[i * 4]; y1[max_pos * 4] = y1[i * 4]; x2[max_pos * 4] = x2[i * 4]; y2[max_pos * 4] = y2[i * 4]; sc[max_pos] = sc[i]; areas[max_pos] = areas[i]; inds[max_pos] = inds[i]; x1[i * 4] = ix1; y1[i * 4] = iy1; x2[i * 4] = ix2; y2[i * 4] = iy2; sc[i] = iscore; areas[i] = iarea; inds[i] = iind; pos = i + 1; while (pos < nboxes) { auto xx1 = std::max(ix1, x1[pos * 4]); auto yy1 = std::max(iy1, y1[pos * 4]); auto xx2 = std::min(ix2, x2[pos * 4]); auto yy2 = std::min(iy2, y2[pos * 4]); auto w = std::max(0.f, xx2 - xx1 + offset); auto h = std::max(0.f, yy2 - yy1 + offset); auto inter = w * h; auto ovr = inter / (iarea + areas[pos] - inter); float weight = 1.; if (method == 0) { if (ovr >= iou_threshold) weight = 0; } else if (method == 1) { if (ovr >= iou_threshold) weight = 1 - ovr; } else if (method == 2) { weight = std::exp(-(ovr * ovr) / sigma); } sc[pos] *= weight; // if box score falls below threshold, discard the box by // swapping with last box update N if (sc[pos] < min_score) { x1[pos * 4] = x1[(nboxes - 1) * 4]; y1[pos * 4] = y1[(nboxes - 1) * 4]; x2[pos * 4] = x2[(nboxes - 1) * 4]; y2[pos * 4] = y2[(nboxes - 1) * 4]; sc[pos] = sc[nboxes - 1]; areas[pos] = areas[nboxes - 1]; inds[pos] = inds[nboxes - 1]; nboxes = nboxes - 1; pos = pos - 1; } pos = pos + 1; } } std::vector dets_dim({nboxes, 5}); OrtValue *dets = ort_.KernelContext_GetOutput(context, 0, dets_dim.data(), dets_dim.size()); T *dets_data = ort_.GetTensorMutableData(dets); std::vector inds_dim({nboxes}); OrtValue *inds_ov = ort_.KernelContext_GetOutput(context, 1, inds_dim.data(), inds_dim.size()); int64_t *inds_data = ort_.GetTensorMutableData(inds_ov); memcpy(dets_data, de, sizeof(T) * nboxes * 5); memcpy(inds_data, inds, sizeof(int64_t) * nboxes); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/deform_conv.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef ONNXRUNTIME_DEFORM_CONV_H #define ONNXRUNTIME_DEFORM_CONV_H #include struct MMCVDeformConvKernel { MMCVDeformConvKernel(OrtApi api, const OrtKernelInfo *info); void Compute(OrtKernelContext *context); protected: OrtApi api_; Ort::CustomOpApi ort_; const OrtKernelInfo *info_; Ort::AllocatorWithDefaultOptions allocator_; int64_t stride_height_; int64_t stride_width_; int64_t padding_height_; int64_t padding_width_; int64_t dilation_height_; int64_t dilation_width_; int64_t deformable_group_; int64_t group_; int64_t im2col_step_; }; struct MMCVDeformConvOp : Ort::CustomOpBase { void *CreateKernel(OrtApi api, const OrtKernelInfo *info) const { return new MMCVDeformConvKernel(api, info); } const char *GetName() const { return "MMCVDeformConv2d"; }; size_t GetInputTypeCount() const { return 3; }; ONNXTensorElementDataType GetInputType(size_t /*index*/) const { return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; }; OrtCustomOpInputOutputCharacteristic GetInputCharacteristic( size_t index) const { return OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_REQUIRED; } size_t GetOutputTypeCount() const { return 1; }; ONNXTensorElementDataType GetOutputType(size_t /*index*/) const { return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; }; // force cpu const char *GetExecutionProviderType() const { return "CPUExecutionProvider"; }; }; #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/grid_sample.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef ONNXRUNTIME_GRIDSAMPLE_H #define ONNXRUNTIME_GRIDSAMPLE_H #include struct GridSampleKernel { GridSampleKernel(OrtApi api, const OrtKernelInfo *info); void Compute(OrtKernelContext *context); protected: OrtApi api_; Ort::CustomOpApi ort_; const OrtKernelInfo *info_; Ort::AllocatorWithDefaultOptions allocator_; int64_t align_corners_; int64_t interpolation_mode_; int64_t padding_mode_; }; struct GridSampleOp : Ort::CustomOpBase { void *CreateKernel(OrtApi api, const OrtKernelInfo *info) const { return new GridSampleKernel(api, info); }; const char *GetName() const { return "grid_sampler"; }; size_t GetInputTypeCount() const { return 2; }; ONNXTensorElementDataType GetInputType(size_t /*index*/) const { return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; }; size_t GetOutputTypeCount() const { return 1; }; ONNXTensorElementDataType GetOutputType(size_t /*index*/) const { return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; }; const char *GetExecutionProviderType() const { return "CPUExecutionProvider"; }; }; #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/modulated_deform_conv.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef ONNXRUNTIME_MODULATED_DEFORM_CONV_H #define ONNXRUNTIME_MODULATED_DEFORM_CONV_H #include struct MMCVModulatedDeformConvKernel { MMCVModulatedDeformConvKernel(OrtApi api, const OrtKernelInfo *info); void Compute(OrtKernelContext *context); protected: OrtApi api_; Ort::CustomOpApi ort_; const OrtKernelInfo *info_; Ort::AllocatorWithDefaultOptions allocator_; int64_t stride_height_; int64_t stride_width_; int64_t padding_height_; int64_t padding_width_; int64_t dilation_height_; int64_t dilation_width_; int64_t deformable_group_; int64_t group_; }; struct MMCVModulatedDeformConvOp : Ort::CustomOpBase { void *CreateKernel(OrtApi api, const OrtKernelInfo *info) const { return new MMCVModulatedDeformConvKernel(api, info); } const char *GetName() const { return "MMCVModulatedDeformConv2d"; }; size_t GetInputTypeCount() const { return 5; }; ONNXTensorElementDataType GetInputType(size_t /*index*/) const { return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; }; OrtCustomOpInputOutputCharacteristic GetInputCharacteristic( size_t index) const { // The last input (index == 4) is optional, which is bias if (index == 4) return OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_OPTIONAL; return OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_REQUIRED; } size_t GetOutputTypeCount() const { return 1; }; ONNXTensorElementDataType GetOutputType(size_t /*index*/) const { return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; }; // force cpu const char *GetExecutionProviderType() const { return "CPUExecutionProvider"; }; }; #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/nms.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef ONNXRUNTIME_NMS_H #define ONNXRUNTIME_NMS_H #include struct NmsKernel { NmsKernel(OrtApi api, const OrtKernelInfo *info); void Compute(OrtKernelContext *context); protected: OrtApi api_; Ort::CustomOpApi ort_; const OrtKernelInfo *info_; Ort::AllocatorWithDefaultOptions allocator_; float iou_threshold_; int64_t offset_; }; struct NmsOp : Ort::CustomOpBase { void *CreateKernel(OrtApi api, const OrtKernelInfo *info) const { return new NmsKernel(api, info); }; const char *GetName() const { return "NonMaxSuppression"; }; size_t GetInputTypeCount() const { return 2; }; ONNXTensorElementDataType GetInputType(size_t /*index*/) const { return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; }; size_t GetOutputTypeCount() const { return 1; }; ONNXTensorElementDataType GetOutputType(size_t index) const { return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64; } // force cpu const char *GetExecutionProviderType() const { return "CPUExecutionProvider"; } }; #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/onnxruntime_register.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef ONNXRUNTIME_REGISTER_H #define ONNXRUNTIME_REGISTER_H #include #ifdef __cplusplus extern "C" { #endif OrtStatus *ORT_API_CALL RegisterCustomOps(OrtSessionOptions *options, const OrtApiBase *api); #ifdef __cplusplus } #endif #endif // ONNXRUNTIME_REGISTER_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/onnxruntime_session_options_config_keys.h ================================================ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #ifndef ONNXRUNTIME_SESSION_OPTIONS_CONFIG_KEYS_H #define ONNXRUNTIME_SESSION_OPTIONS_CONFIG_KEYS_H /* * This file defines SessionOptions Config Keys and format of the Config Values. * * The Naming Convention for a SessionOptions Config Key, * "[Area][.[SubArea1].[SubArea2]...].[Keyname]" * Such as "ep.cuda.use_arena" * The Config Key cannot be empty * The maximum length of the Config Key is 128 * * The string format of a SessionOptions Config Value is defined individually * for each Config. The maximum length of the Config Value is 1024 */ // Key for disable PrePacking, // If the config value is set to "1" then the prepacking is disabled, otherwise // prepacking is enabled (default value) static const char* const kOrtSessionOptionsConfigDisablePrepacking = "session.disable_prepacking"; // A value of "1" means allocators registered in the env will be used. "0" means // the allocators created in the session will be used. Use this to override the // usage of env allocators on a per session level. static const char* const kOrtSessionOptionsConfigUseEnvAllocators = "session.use_env_allocators"; // Set to 'ORT' (case sensitive) to load an ORT format model. // If unset, model type will default to ONNX unless inferred from filename // ('.ort' == ORT format) or bytes to be ORT static const char* const kOrtSessionOptionsConfigLoadModelFormat = "session.load_model_format"; // Set to 'ORT' (case sensitive) to save optimized model in ORT format when // SessionOptions.optimized_model_path is set. If unset, format will default to // ONNX unless optimized_model_filepath ends in '.ort'. static const char* const kOrtSessionOptionsConfigSaveModelFormat = "session.save_model_format"; #endif // ONNXRUNTIME_SESSION_OPTIONS_CONFIG_KEYS_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/ort_mmcv_utils.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef ORT_MMCV_UTILS_H #define ORT_MMCV_UTILS_H #include #include struct OrtTensorDimensions : std::vector { OrtTensorDimensions(Ort::CustomOpApi ort, const OrtValue* value) { OrtTensorTypeAndShapeInfo* info = ort.GetTensorTypeAndShape(value); std::vector::operator=(ort.GetTensorShape(info)); ort.ReleaseTensorTypeAndShapeInfo(info); } }; #endif // ORT_MMCV_UTILS_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/reduce_ops.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef ONNXRUNTIME_REDUCE_OPS_H #define ONNXRUNTIME_REDUCE_OPS_H #include struct MMCVCumMaxKernel { public: MMCVCumMaxKernel(Ort::CustomOpApi ort, const OrtKernelInfo* info) : ort_(ort) { dim_ = ort_.KernelInfoGetAttribute(info, "dim"); // create allocator allocator_ = Ort::AllocatorWithDefaultOptions(); } void Compute(OrtKernelContext* context); private: Ort::CustomOpApi ort_; Ort::AllocatorWithDefaultOptions allocator_; int64_t dim_; }; struct MMCVCumMinKernel { public: MMCVCumMinKernel(Ort::CustomOpApi ort, const OrtKernelInfo* info) : ort_(ort) { dim_ = ort_.KernelInfoGetAttribute(info, "dim"); // create allocator allocator_ = Ort::AllocatorWithDefaultOptions(); } void Compute(OrtKernelContext* context); private: Ort::CustomOpApi ort_; Ort::AllocatorWithDefaultOptions allocator_; int64_t dim_; }; struct MMCVCumMaxCustomOp : Ort::CustomOpBase { void* CreateKernel(Ort::CustomOpApi api, const OrtKernelInfo* info) const { return new MMCVCumMaxKernel(api, info); } const char* GetName() const { return "cummax"; } size_t GetInputTypeCount() const { return 1; } ONNXTensorElementDataType GetInputType(size_t) const { return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; }; size_t GetOutputTypeCount() const { return 2; } ONNXTensorElementDataType GetOutputType(size_t index) const { if (index == 1) return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64; return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; }; // force cpu const char* GetExecutionProviderType() const { return "CPUExecutionProvider"; }; }; struct MMCVCumMinCustomOp : Ort::CustomOpBase { void* CreateKernel(Ort::CustomOpApi api, const OrtKernelInfo* info) const { return new MMCVCumMinKernel(api, info); } const char* GetName() const { return "cummin"; } size_t GetInputTypeCount() const { return 1; } ONNXTensorElementDataType GetInputType(size_t) const { return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; }; size_t GetOutputTypeCount() const { return 2; } ONNXTensorElementDataType GetOutputType(size_t index) const { if (index == 1) return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64; return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; }; // force cpu const char* GetExecutionProviderType() const { return "CPUExecutionProvider"; }; }; #endif // ONNXRUNTIME_REDUCE_OPS_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/roi_align.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef ONNXRUNTIME_ROI_ALIGN_H #define ONNXRUNTIME_ROI_ALIGN_H #include #include #include #include #include #include struct MMCVRoiAlignKernel { public: MMCVRoiAlignKernel(Ort::CustomOpApi ort, const OrtKernelInfo* info) : ort_(ort) { aligned_ = ort_.KernelInfoGetAttribute(info, "aligned"); aligned_height_ = ort_.KernelInfoGetAttribute(info, "output_height"); aligned_width_ = ort_.KernelInfoGetAttribute(info, "output_width"); pool_mode_ = ort_.KernelInfoGetAttribute(info, "mode"); sampling_ratio_ = ort_.KernelInfoGetAttribute(info, "sampling_ratio"); spatial_scale_ = ort_.KernelInfoGetAttribute(info, "spatial_scale"); } void Compute(OrtKernelContext* context); private: Ort::CustomOpApi ort_; int aligned_height_; int aligned_width_; float spatial_scale_; int sampling_ratio_; std::string pool_mode_; int aligned_; }; struct MMCVRoiAlignCustomOp : Ort::CustomOpBase { void* CreateKernel(Ort::CustomOpApi api, const OrtKernelInfo* info) const { return new MMCVRoiAlignKernel(api, info); } const char* GetName() const { return "MMCVRoiAlign"; } size_t GetInputTypeCount() const { return 2; } ONNXTensorElementDataType GetInputType(size_t) const { return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; } size_t GetOutputTypeCount() const { return 1; } ONNXTensorElementDataType GetOutputType(size_t) const { return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; } // force cpu const char* GetExecutionProviderType() const { return "CPUExecutionProvider"; } }; #endif // ONNXRUNTIME_ROI_ALIGN_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/roi_align_rotated.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef ONNXRUNTIME_ROI_ALIGN_ROTATED_H #define ONNXRUNTIME_ROI_ALIGN_ROTATED_H #include #include #include #include #include #include struct MMCVRoIAlignRotatedKernel { public: MMCVRoIAlignRotatedKernel(Ort::CustomOpApi ort, const OrtKernelInfo* info) : ort_(ort) { aligned_height_ = ort_.KernelInfoGetAttribute(info, "output_height"); aligned_width_ = ort_.KernelInfoGetAttribute(info, "output_width"); sampling_ratio_ = ort_.KernelInfoGetAttribute(info, "sampling_ratio"); spatial_scale_ = ort_.KernelInfoGetAttribute(info, "spatial_scale"); aligned_ = ort_.KernelInfoGetAttribute(info, "aligned"); clockwise_ = ort_.KernelInfoGetAttribute(info, "clockwise"); } void Compute(OrtKernelContext* context); private: Ort::CustomOpApi ort_; int aligned_height_; int aligned_width_; float spatial_scale_; int sampling_ratio_; int aligned_; int clockwise_; }; struct MMCVRoIAlignRotatedCustomOp : Ort::CustomOpBase { void* CreateKernel(Ort::CustomOpApi api, const OrtKernelInfo* info) const { return new MMCVRoIAlignRotatedKernel(api, info); } const char* GetName() const { return "MMCVRoIAlignRotated"; } size_t GetInputTypeCount() const { return 2; } ONNXTensorElementDataType GetInputType(size_t) const { return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; } size_t GetOutputTypeCount() const { return 1; } ONNXTensorElementDataType GetOutputType(size_t) const { return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; } // force cpu const char* GetExecutionProviderType() const { return "CPUExecutionProvider"; } }; #endif // ONNXRUNTIME_ROI_ALIGN_ROTATED_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/soft_nms.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef ONNXRUNTIME_SOFT_NMS_H #define ONNXRUNTIME_SOFT_NMS_H #include struct SoftNmsKernel { SoftNmsKernel(OrtApi api, const OrtKernelInfo *info); void Compute(OrtKernelContext *context); protected: OrtApi api_; Ort::CustomOpApi ort_; const OrtKernelInfo *info_; Ort::AllocatorWithDefaultOptions allocator_; float iou_threshold_; float sigma_; float min_score_; int64_t method_; int64_t offset_; }; struct SoftNmsOp : Ort::CustomOpBase { void *CreateKernel(OrtApi api, const OrtKernelInfo *info) const { return new SoftNmsKernel(api, info); }; const char *GetName() const { return "SoftNonMaxSuppression"; }; size_t GetInputTypeCount() const { return 2; }; ONNXTensorElementDataType GetInputType(size_t /*index*/) const { return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; }; size_t GetOutputTypeCount() const { return 2; }; ONNXTensorElementDataType GetOutputType(size_t index) const { if (index == 1) { return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64; } return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; }; // force cpu const char *GetExecutionProviderType() const { return "CPUExecutionProvider"; }; }; #endif // ONNXRUNTIME_SOFT_NMS_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/active_rotated_filter.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved. // Modified from // https://github.com/csuhan/s2anet/blob/master/mmdet/ops/orn/src/ActiveRotatingFilter.h #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void active_rotated_filter_forward_impl(const Tensor input, const Tensor indices, Tensor output) { DISPATCH_DEVICE_IMPL(active_rotated_filter_forward_impl, input, indices, output); } void active_rotated_filter_backward_impl(const Tensor grad_out, const Tensor indices, Tensor grad_in) { DISPATCH_DEVICE_IMPL(active_rotated_filter_backward_impl, grad_out, indices, grad_in); } void active_rotated_filter_forward(const Tensor input, const Tensor indices, Tensor output) { active_rotated_filter_forward_impl(input, indices, output); } void active_rotated_filter_backward(const Tensor grad_out, const Tensor indices, Tensor grad_in) { active_rotated_filter_backward_impl(grad_out, indices, grad_in); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/active_rotated_filter_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "active_rotated_filter_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void active_rotated_filter_forward_cuda_parrots( CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { auto input = buildATensor(ctx, ins[0]); auto indices = buildATensor(ctx, ins[1]); auto output = buildATensor(ctx, outs[0]); active_rotated_filter_forward(input, indices, output); } void active_rotated_filter_backward_cuda_parrots( CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { auto grad_out = buildATensor(ctx, ins[0]); auto indices = buildATensor(ctx, ins[1]); auto grad_in = buildATensor(ctx, outs[0]); active_rotated_filter_backward(grad_out, indices, grad_in); } #endif void active_rotated_filter_forward_cpu_parrots( HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { auto input = buildATensor(ctx, ins[0]); auto indices = buildATensor(ctx, ins[1]); auto output = buildATensor(ctx, outs[0]); active_rotated_filter_forward(input, indices, output); } void active_rotated_filter_backward_cpu_parrots( HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { auto grad_out = buildATensor(ctx, ins[0]); auto indices = buildATensor(ctx, ins[1]); auto grad_in = buildATensor(ctx, outs[0]); active_rotated_filter_backward(grad_out, indices, grad_in); } PARROTS_EXTENSION_REGISTER(active_rotated_filter_forward) .input(2) .output(1) .apply(active_rotated_filter_forward_cpu_parrots) #ifdef MMCV_WITH_CUDA .apply(active_rotated_filter_forward_cuda_parrots) #endif .done(); PARROTS_EXTENSION_REGISTER(active_rotated_filter_backward) .input(2) .output(1) .apply(active_rotated_filter_backward_cpu_parrots) #ifdef MMCV_WITH_CUDA .apply(active_rotated_filter_backward_cuda_parrots) #endif .done(); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/active_rotated_filter_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef ACTIVE_ROTATED_FILTER_PYTORCH_H #define ACTIVE_ROTATED_FILTER_PYTORCH_H #include using namespace at; void active_rotated_filter_forward(const Tensor input, const Tensor indices, Tensor output); void active_rotated_filter_backward(const Tensor grad_out, const Tensor indices, Tensor grad_in); #endif // ACTIVE_ROTATED_FILTER_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/assign_score_withk.cpp ================================================ // Modified from // https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/paconv_lib/src/gpu #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void assign_score_withk_forward_impl(int B, int N0, int N1, int M, int K, int O, int aggregate, const Tensor& points, const Tensor& centers, const Tensor& scores, const Tensor& knn_idx, Tensor& output) { DISPATCH_DEVICE_IMPL(assign_score_withk_forward_impl, B, N0, N1, M, K, O, aggregate, points, centers, scores, knn_idx, output); } void assign_score_withk_backward_impl( int B, int N0, int N1, int M, int K, int O, int aggregate, const Tensor& grad_out, const Tensor& points, const Tensor& centers, const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points, Tensor& grad_centers, Tensor& grad_scores) { DISPATCH_DEVICE_IMPL(assign_score_withk_backward_impl, B, N0, N1, M, K, O, aggregate, grad_out, points, centers, scores, knn_idx, grad_points, grad_centers, grad_scores); } void assign_score_withk_forward(const Tensor& points, const Tensor& centers, const Tensor& scores, const Tensor& knn_idx, Tensor& output, int B, int N0, int N1, int M, int K, int O, int aggregate) { assign_score_withk_forward_impl(B, N0, N1, M, K, O, aggregate, points, centers, scores, knn_idx, output); } void assign_score_withk_backward(const Tensor& grad_out, const Tensor& points, const Tensor& centers, const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points, Tensor& grad_centers, Tensor& grad_scores, int B, int N0, int N1, int M, int K, int O, int aggregate) { assign_score_withk_backward_impl(B, N0, N1, M, K, O, aggregate, grad_out, points, centers, scores, knn_idx, grad_points, grad_centers, grad_scores); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/assign_score_withk_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "assign_score_withk_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void assign_score_withk_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int B, N0, N1, M, K, O, aggregate; SSAttrs(attr) .get("B", B) .get("N0", N0) .get("N1", N1) .get("M", M) .get("K", K) .get("O", O) .get("aggregate", aggregate) .done(); const auto& points = buildATensor(ctx, ins[0]); const auto& centers = buildATensor(ctx, ins[1]); const auto& scores = buildATensor(ctx, ins[2]); const auto& knn_idx = buildATensor(ctx, ins[3]); auto output = buildATensor(ctx, outs[0]); assign_score_withk_forward(points, centers, scores, knn_idx, output, B, N0, N1, M, K, O, aggregate); } void assign_score_withk_backward_cuda_parrots( CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int B, N0, N1, M, K, O, aggregate; SSAttrs(attr) .get("B", B) .get("N0", N0) .get("N1", N1) .get("M", M) .get("K", K) .get("O", O) .get("aggregate", aggregate) .done(); const auto& grad_out = buildATensor(ctx, ins[0]); const auto& points = buildATensor(ctx, ins[1]); const auto& centers = buildATensor(ctx, ins[2]); const auto& scores = buildATensor(ctx, ins[3]); const auto& knn_idx = buildATensor(ctx, ins[4]); auto grad_points = buildATensor(ctx, outs[0]); auto grad_centers = buildATensor(ctx, outs[1]); auto grad_scores = buildATensor(ctx, outs[2]); assign_score_withk_backward(grad_out, points, centers, scores, knn_idx, grad_points, grad_centers, grad_scores, B, N0, N1, M, K, O, aggregate); } PARROTS_EXTENSION_REGISTER(assign_score_withk_forward) .attr("B") .attr("N0") .attr("N1") .attr("M") .attr("K") .attr("O") .attr("aggregate") .input(4) .output(1) .apply(assign_score_withk_forward_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(assign_score_withk_backward) .attr("B") .attr("N0") .attr("N1") .attr("M") .attr("K") .attr("O") .attr("aggregate") .input(5) .output(3) .apply(assign_score_withk_backward_cuda_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/assign_score_withk_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef ASSIGN_SCORE_WITHK_PYTORCH_H #define ASSIGN_SCORE_WITHK_PYTORCH_H #include using namespace at; void assign_score_withk_forward(const Tensor& points, const Tensor& centers, const Tensor& scores, const Tensor& knn_idx, Tensor& output, int B, int N0, int N1, int M, int K, int O, int aggregate); void assign_score_withk_backward(const Tensor& grad_out, const Tensor& points, const Tensor& centers, const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points, Tensor& grad_centers, Tensor& grad_scores, int B, int N0, int N1, int M, int K, int O, int aggregate); #endif // ASSIGN_SCORE_WITHK_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/ball_query._parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "ball_query_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void ball_query_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int b, n, m, nsample; float min_radius, max_radius; SSAttrs(attr) .get("b", b) .get("n", n) .get("m", m) .get("nsample", nsample) .get("min_radius", min_radius) .get("max_radius", max_radius) .done(); const auto& center_xyz = buildATensor(ctx, ins[0]); const auto& xyz = buildATensor(ctx, ins[1]); auto idx = buildATensor(ctx, outs[0]); ball_query_forward(center_xyz, xyz, idx, b, n, m, min_radius, max_radius, nsample); } PARROTS_EXTENSION_REGISTER(ball_query_forward) .attr("b") .attr("n") .attr("m") .attr("nsample") .attr("min_radius") .attr("max_radius") .input(2) .output(1) .apply(ball_query_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/ball_query.cpp ================================================ // Modified from // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/ball_query.cpp #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void ball_query_forward_impl(int b, int n, int m, float min_radius, float max_radius, int nsample, const Tensor new_xyz, const Tensor xyz, Tensor idx) { DISPATCH_DEVICE_IMPL(ball_query_forward_impl, b, n, m, min_radius, max_radius, nsample, new_xyz, xyz, idx); } void ball_query_forward(Tensor new_xyz_tensor, Tensor xyz_tensor, Tensor idx_tensor, int b, int n, int m, float min_radius, float max_radius, int nsample) { ball_query_forward_impl(b, n, m, min_radius, max_radius, nsample, new_xyz_tensor, xyz_tensor, idx_tensor); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/ball_query_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef BALL_QUERY_PYTORCH_H #define BALL_QUERY_PYTORCH_H #include using namespace at; void ball_query_forward(const Tensor new_xyz, const Tensor xyz, Tensor idx, int b, int n, int m, float min_radius, float max_radius, int nsample); #endif // BALL_QUERY_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/bbox_overlaps.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void bbox_overlaps_impl(const Tensor bboxes1, const Tensor bboxes2, Tensor ious, const int mode, const bool aligned, const int offset) { DISPATCH_DEVICE_IMPL(bbox_overlaps_impl, bboxes1, bboxes2, ious, mode, aligned, offset); } void bbox_overlaps(const Tensor bboxes1, const Tensor bboxes2, Tensor ious, const int mode, const bool aligned, const int offset) { bbox_overlaps_impl(bboxes1, bboxes2, ious, mode, aligned, offset); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/bbox_overlaps_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "bbox_overlaps_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA /* * void bbox_overlaps_cuda(const Tensor bboxes1, const Tensor bboxes2, Tensor * ious, const int mode, const bool aligned, const int offset); */ void bbox_overlaps_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int mode, offset; bool aligned; SSAttrs(attr) .get("mode", mode) .get("aligned", aligned) .get("offset", offset) .done(); const auto& bboxes1 = buildATensor(ctx, ins[0]); const auto& bboxes2 = buildATensor(ctx, ins[1]); auto ious = buildATensor(ctx, outs[0]); bbox_overlaps_cuda(bboxes1, bboxes2, ious, mode, aligned, offset); } PARROTS_EXTENSION_REGISTER(bbox_overlaps) .attr("mode") .attr("aligned") .attr("offset") .input(2) .output(1) .apply(bbox_overlaps_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/bbox_overlaps_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef BBOX_OVERLAPS_PYTORCH_H #define BBOX_OVERLAPS_PYTORCH_H #include using namespace at; void bbox_overlaps_cuda(const Tensor bboxes1, const Tensor bboxes2, Tensor ious, const int mode, const bool aligned, const int offset); #endif // BBOX_OVERLAPS_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/border_align.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void border_align_forward_impl(const Tensor &input, const Tensor &boxes, Tensor output, Tensor argmax_idx, const int pool_size) { DISPATCH_DEVICE_IMPL(border_align_forward_impl, input, boxes, output, argmax_idx, pool_size); } void border_align_backward_impl(const Tensor &grad_output, const Tensor &boxes, const Tensor &argmax_idx, Tensor grad_input, const int pool_size) { DISPATCH_DEVICE_IMPL(border_align_backward_impl, grad_output, boxes, argmax_idx, grad_input, pool_size); } void border_align_forward(const Tensor &input, const Tensor &boxes, Tensor output, Tensor argmax_idx, const int pool_size) { border_align_forward_impl(input, boxes, output, argmax_idx, pool_size); } void border_align_backward(const Tensor &grad_output, const Tensor &boxes, const Tensor &argmax_idx, Tensor grad_input, const int pool_size) { border_align_backward_impl(grad_output, boxes, argmax_idx, grad_input, pool_size); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/border_align_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "border_align_pytorch.h" using namespace parrots; void border_align_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int pool_size; SSAttrs(attr).get("pool_size", pool_size).done(); const auto& input = buildATensor(ctx, ins[0]); const auto& boxes = buildATensor(ctx, ins[1]); auto output = buildATensor(ctx, outs[0]); auto argmax_idx = buildATensor(ctx, outs[1]); border_align_forward_cuda(input, boxes, output, argmax_idx, pool_size); } void border_align_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int pool_size; SSAttrs(attr).get("pool_size", pool_size).done(); const auto& top_grad = buildATensor(ctx, ins[0]); const auto& boxes = buildATensor(ctx, ins[1]); const auto& argmax_idx = buildATensor(ctx, ins[2]); auto bottom_grad = buildATensor(ctx, outs[0]); border_align_backward_cuda(top_grad, boxes, argmax_idx, bottom_grad, pool_size); } PARROTS_EXTENSION_REGISTER(border_align_forward) .attr("pool_size") .input(2) .output(2) .apply(border_align_forward_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(border_align_backward) .attr("pool_size") .input(3) .output(1) .apply(border_align_backward_cuda_parrots) .done(); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/border_align_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef BORDER_ALIGN_PYTORCH_H #define BORDER_ALIGN_PYTORCH_H #include using namespace at; #ifdef MMCV_WITH_CUDA void border_align_forward_cuda(const Tensor &input, const Tensor &boxes, Tensor output, Tensor argmax_idx, const int pool_size); void border_align_backward_cuda(const Tensor &grad_output, const Tensor &boxes, const Tensor &argmax_idx, Tensor grad_input, const int pool_size); #endif #endif // BORDER_ALIGN_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/box_iou_rotated.cpp ================================================ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved // modified from // https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void box_iou_rotated_impl(const Tensor boxes1, const Tensor boxes2, Tensor ious, const int mode_flag, const bool aligned) { DISPATCH_DEVICE_IMPL(box_iou_rotated_impl, boxes1, boxes2, ious, mode_flag, aligned); } // Interface for Python // inline is needed to prevent multiple function definitions when this header is // included by different cpps void box_iou_rotated(const Tensor boxes1, const Tensor boxes2, Tensor ious, const int mode_flag, const bool aligned) { box_iou_rotated_impl(boxes1, boxes2, ious, mode_flag, aligned); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/box_iou_rotated_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "box_iou_rotated_pytorch.h" using namespace parrots; /* * void box_iou_rotated_cpu(const Tensor boxes1, const Tensor boxes2, Tensor * ious, const int mode_flag, const bool aligned); */ void box_iou_rotated_cpu_parrots(HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { bool aligned; int mode_flag; SSAttrs(attr) .get("aligned", aligned) .get("mode_flag", mode_flag) .done(); const auto& boxes1 = buildATensor(ctx, ins[0]); const auto& boxes2 = buildATensor(ctx, ins[1]); auto ious = buildATensor(ctx, outs[0]); box_iou_rotated_cpu(boxes1, boxes2, ious, mode_flag, aligned); } #ifdef MMCV_WITH_CUDA /* * void box_iou_rotated_cuda(const Tensor boxes1, const Tensor boxes2, Tensor * ious, const int mode_flag, const bool aligned); */ void box_iou_rotated_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { bool aligned; int mode_flag; SSAttrs(attr) .get("aligned", aligned) .get("mode_flag", mode_flag) .done(); const auto& boxes1 = buildATensor(ctx, ins[0]); const auto& boxes2 = buildATensor(ctx, ins[1]); auto ious = buildATensor(ctx, outs[0]); box_iou_rotated_cuda(boxes1, boxes2, ious, mode_flag, aligned); } #endif PARROTS_EXTENSION_REGISTER(box_iou_rotated) .attr("aligned") .attr("mode_flag") .input(2) .output(1) .apply(box_iou_rotated_cpu_parrots) #ifdef MMCV_WITH_CUDA .apply(box_iou_rotated_cuda_parrots) #endif .done(); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/box_iou_rotated_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef BOX_IOU_ROTATED_PYTORCH_H #define BOX_IOU_ROTATED_PYTORCH_H #include using namespace at; void box_iou_rotated_cpu(const Tensor boxes1, const Tensor boxes2, Tensor ious, const int mode_flag, const bool aligned); #ifdef MMCV_WITH_CUDA void box_iou_rotated_cuda(const Tensor boxes1, const Tensor boxes2, Tensor ious, const int mode_flag, const bool aligned); #endif #endif // BOX_IOU_ROTATED_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/carafe.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void carafe_forward_impl(Tensor features, Tensor masks, Tensor rfeatures, Tensor routput, Tensor rmasks, Tensor output, int kernel_size, int group_size, int scale_factor) { DISPATCH_DEVICE_IMPL(carafe_forward_impl, features, masks, rfeatures, routput, rmasks, output, kernel_size, group_size, scale_factor); } void carafe_backward_impl(Tensor top_grad, Tensor rfeatures, Tensor masks, Tensor rtop_grad, Tensor rbottom_grad_hs, Tensor rbottom_grad, Tensor rmask_grad, Tensor bottom_grad, Tensor mask_grad, int kernel_size, int group_size, int scale_factor) { DISPATCH_DEVICE_IMPL(carafe_backward_impl, top_grad, rfeatures, masks, rtop_grad, rbottom_grad_hs, rbottom_grad, rmask_grad, bottom_grad, mask_grad, kernel_size, group_size, scale_factor); } void carafe_forward(Tensor features, Tensor masks, Tensor rfeatures, Tensor routput, Tensor rmasks, Tensor output, int kernel_size, int group_size, int scale_factor) { carafe_forward_impl(features, masks, rfeatures, routput, rmasks, output, kernel_size, group_size, scale_factor); } void carafe_backward(Tensor top_grad, Tensor rfeatures, Tensor masks, Tensor rtop_grad, Tensor rbottom_grad_hs, Tensor rbottom_grad, Tensor rmask_grad, Tensor bottom_grad, Tensor mask_grad, int kernel_size, int group_size, int scale_factor) { carafe_backward_impl(top_grad, rfeatures, masks, rtop_grad, rbottom_grad_hs, rbottom_grad, rmask_grad, bottom_grad, mask_grad, kernel_size, group_size, scale_factor); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/carafe_naive.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void carafe_naive_forward_impl(Tensor features, Tensor masks, Tensor output, int kernel_size, int group_size, int scale_factor) { DISPATCH_DEVICE_IMPL(carafe_naive_forward_impl, features, masks, output, kernel_size, group_size, scale_factor); } void carafe_naive_backward_impl(Tensor top_grad, Tensor features, Tensor masks, Tensor bottom_grad, Tensor mask_grad, int kernel_size, int group_size, int scale_factor) { DISPATCH_DEVICE_IMPL(carafe_naive_backward_impl, top_grad, features, masks, bottom_grad, mask_grad, kernel_size, group_size, scale_factor); } void carafe_naive_forward(Tensor features, Tensor masks, Tensor output, int kernel_size, int group_size, int scale_factor) { carafe_naive_forward_impl(features, masks, output, kernel_size, group_size, scale_factor); } void carafe_naive_backward(Tensor top_grad, Tensor features, Tensor masks, Tensor bottom_grad, Tensor mask_grad, int kernel_size, int group_size, int scale_factor) { carafe_naive_backward_impl(top_grad, features, masks, bottom_grad, mask_grad, kernel_size, group_size, scale_factor); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/carafe_naive_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "carafe_naive_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA /*void carafe_naive_forward_cuda(Tensor features, Tensor masks, Tensor output, * int kernel_size, int group_size, * int scale_factor) */ void carafe_naive_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int kernel_size, group_size, scale_factor; SSAttrs(attr) .get("kernel_size", kernel_size) .get("group_size", group_size) .get("scale_factor", scale_factor) .done(); const auto& features = buildATensor(ctx, ins[0]); const auto& masks = buildATensor(ctx, ins[1]); auto output = buildATensor(ctx, outs[0]); carafe_naive_forward_cuda(features, masks, output, kernel_size, group_size, scale_factor); } /*void carafe_naive_backward_cuda(Tensor top_grad, Tensor features, Tensor * masks, Tensor bottom_grad, Tensor mask_grad, int kernel_size, int group_size, * int scale_factor); */ void carafe_naive_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int kernel_size, group_size, scale_factor; SSAttrs(attr) .get("kernel_size", kernel_size) .get("group_size", group_size) .get("scale_factor", scale_factor) .done(); const auto& top_grad = buildATensor(ctx, ins[0]); const auto& features = buildATensor(ctx, ins[1]); const auto& masks = buildATensor(ctx, ins[2]); auto bottom_grad = buildATensor(ctx, outs[0]); auto mask_grad = buildATensor(ctx, outs[1]); carafe_naive_backward_cuda(top_grad, features, masks, bottom_grad, mask_grad, kernel_size, group_size, scale_factor); } PARROTS_EXTENSION_REGISTER(carafe_naive_forward) .attr("kernel_size") .attr("group_size") .attr("scale_factor") .input(2) .output(1) .apply(carafe_naive_forward_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(carafe_naive_backward) .attr("kernel_size") .attr("group_size") .attr("scale_factor") .input(3) .output(2) .apply(carafe_naive_backward_cuda_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/carafe_naive_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef CARAFE_NAIVE_PYTORCH_H #define CARAFE_NAIVE_PYTORCH_H #include using namespace at; void carafe_naive_forward_cuda(Tensor features, Tensor masks, Tensor output, int kernel_size, int group_size, int scale_factor); void carafe_naive_backward_cuda(Tensor top_grad, Tensor features, Tensor masks, Tensor bottom_grad, Tensor mask_grad, int kernel_size, int group_size, int scale_factor); #endif // CARAFE_NAIVE_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/carafe_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "carafe_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA /* * void carafe_forward_cuda(Tensor features, Tensor masks, Tensor rfeatures, * Tensor routput, Tensor rmasks, Tensor output, * int kernel_size, int group_size, int scale_factor); */ void carafe_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int kernel_size, group_size, scale_factor; SSAttrs(attr) .get("kernel_size", kernel_size) .get("group_size", group_size) .get("scale_factor", scale_factor) .done(); const auto& features = buildATensor(ctx, ins[0]); const auto& masks = buildATensor(ctx, ins[1]); auto rfeatures = buildATensor(ctx, outs[0]); auto routput = buildATensor(ctx, outs[1]); auto rmasks = buildATensor(ctx, outs[2]); auto output = buildATensor(ctx, outs[3]); carafe_forward_cuda(features, masks, rfeatures, routput, rmasks, output, kernel_size, group_size, scale_factor); } /* * void carafe_backward_cuda(Tensor top_grad, Tensor rfeatures, Tensor masks, * Tensor rtop_grad, Tensor rbottom_grad_hs, * Tensor rbottom_grad, Tensor rmask_grad, * Tensor bottom_grad, Tensor mask_grad, int * kernel_size, int group_size, int scale_factor); */ void carafe_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int kernel_size, group_size, scale_factor; SSAttrs(attr) .get("kernel_size", kernel_size) .get("group_size", group_size) .get("scale_factor", scale_factor) .done(); const auto& top_grad = buildATensor(ctx, ins[0]); const auto& rfeatures = buildATensor(ctx, ins[1]); const auto& masks = buildATensor(ctx, ins[2]); auto rtop_grad = buildATensor(ctx, outs[0]); auto rbottom_grad_hs = buildATensor(ctx, outs[1]); auto rbottom_grad = buildATensor(ctx, outs[2]); auto rmask_grad = buildATensor(ctx, outs[3]); auto bottom_grad = buildATensor(ctx, outs[4]); auto mask_grad = buildATensor(ctx, outs[5]); carafe_backward_cuda(top_grad, rfeatures, masks, rtop_grad, rbottom_grad_hs, rbottom_grad, rmask_grad, bottom_grad, mask_grad, kernel_size, group_size, scale_factor); } PARROTS_EXTENSION_REGISTER(carafe_forward) .attr("kernel_size") .attr("group_size") .attr("scale_factor") .input(2) .output(4) .apply(carafe_forward_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(carafe_backward) .attr("kernel_size") .attr("group_size") .attr("scale_factor") .input(3) .output(6) .apply(carafe_backward_cuda_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/carafe_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef CARAFE_PYTORCH_H #define CARAFE_PYTORCH_H #include using namespace at; void carafe_forward_cuda(Tensor features, Tensor masks, Tensor rfeatures, Tensor routput, Tensor rmasks, Tensor output, int kernel_size, int group_size, int scale_factor); void carafe_backward_cuda(Tensor top_grad, Tensor rfeatures, Tensor masks, Tensor rtop_grad, Tensor rbottom_grad_hs, Tensor rbottom_grad, Tensor rmask_grad, Tensor bottom_grad, Tensor mask_grad, int kernel_size, int group_size, int scale_factor); #endif // CARAFE_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/contour_expand.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved // It is modified from https://github.com/whai362/PSENet #include #include #include "pytorch_cpp_helper.hpp" using namespace std; class Point2d { public: int x; int y; Point2d() : x(0), y(0) {} Point2d(int _x, int _y) : x(_x), y(_y) {} }; void kernel_dilate(const uint8_t *data, IntArrayRef data_shape, const int *label_map, int &label_num, int &min_area, vector> &text_line) { std::vector area(label_num + 1); int kernel_num = data_shape[0]; int height = data_shape[1]; int width = data_shape[2]; for (int x = 0; x < height; ++x) { for (int y = 0; y < width; ++y) { int label = label_map[x * width + y]; if (label == 0) continue; area[label] += 1; } } queue queue, next_queue; for (int x = 0; x < height; ++x) { vector row(width); for (int y = 0; y < width; ++y) { int label = label_map[x * width + y]; if (label == 0) continue; if (area[label] < min_area) continue; Point2d point(x, y); queue.push(point); row[y] = label; } text_line.emplace_back(row); } int dx[] = {-1, 1, 0, 0}; int dy[] = {0, 0, -1, 1}; vector kernel_step(kernel_num); std::for_each(kernel_step.begin(), kernel_step.end(), [=](int &k) { return k * height * width; }); for (int kernel_id = kernel_num - 2; kernel_id >= 0; --kernel_id) { while (!queue.empty()) { Point2d point = queue.front(); queue.pop(); int x = point.x; int y = point.y; int label = text_line[x][y]; bool is_edge = true; for (int d = 0; d < 4; ++d) { int tmp_x = x + dx[d]; int tmp_y = y + dy[d]; if (tmp_x < 0 || tmp_x >= height) continue; if (tmp_y < 0 || tmp_y >= width) continue; int kernel_value = data[kernel_step[kernel_id] + tmp_x * width + tmp_y]; if (kernel_value == 0) continue; if (text_line[tmp_x][tmp_y] > 0) continue; Point2d point(tmp_x, tmp_y); queue.push(point); text_line[tmp_x][tmp_y] = label; is_edge = false; } if (is_edge) { next_queue.push(point); } } swap(queue, next_queue); } } std::vector> contour_expand(Tensor kernel_mask, Tensor internal_kernel_label, int min_kernel_area, int kernel_num) { kernel_mask = kernel_mask.contiguous(); internal_kernel_label = internal_kernel_label.contiguous(); assert(kernel_mask.dim() == 3); assert(internal_kernel_label.dim() == 2); assert(kernel_mask.size(1) == internal_kernel_label.size(0)); assert(kernel_mask.size(2) == internal_kernel_label.size(1)); CHECK_CPU_INPUT(kernel_mask); CHECK_CPU_INPUT(internal_kernel_label); auto ptr_data = kernel_mask.data_ptr(); IntArrayRef data_shape = kernel_mask.sizes(); auto data_label_map = internal_kernel_label.data_ptr(); IntArrayRef label_map_shape = internal_kernel_label.sizes(); vector> text_line; kernel_dilate(ptr_data, data_shape, data_label_map, kernel_num, min_kernel_area, text_line); return text_line; } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/contour_expand_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "contour_expand_pytorch.h" using namespace parrots; using namespace std; template void contour_expand_parrots(T& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int min_kernel_area, kernel_num; SSAttrs(attr) .get("min_kernel_area", min_kernel_area) .get("kernel_num", kernel_num) .done(); at::Tensor kernel_mask; at::Tensor internal_kernel_label; kernel_mask = buildATensor(ctx, ins[0]); internal_kernel_label = buildATensor(ctx, ins[1]); auto out = contour_expand(kernel_mask, internal_kernel_label, min_kernel_area, kernel_num); int n = out.size(), m = 0; for (int i = 0; i < n; ++i) if (m < out[i].size()) m = out[i].size(); auto options = torch::TensorOptions().dtype(at::kInt); auto tensor = torch::zeros({n, m}, options); for (int i = 0; i < n; i++) tensor.slice(0, i, i + 1) = torch::from_blob(out[i].data(), {out[i].size()}, options); updateDArray(ctx, tensor, outs[0]); } PARROTS_EXTENSION_REGISTER(contour_expand) .attr("min_kernel_area") .attr("kernel_num") .input(2) .output(1) .apply(contour_expand_parrots) .done(); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/contour_expand_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef CONTOUR_EXPAND_PYTORCH_H #define CONTOUR_EXPAND_PYTORCH_H #include using namespace at; std::vector> contour_expand(Tensor kernel_mask, Tensor internal_kernel_label, int min_kernel_area, int kernel_num); #endif // CONTOUR_EXPAND_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/convex_iou.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved // modified from // https://github.com/SDL-GuoZonghao/BeyondBoundingBox/tree/main/mmdet/ops/iou/src #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void convex_iou_impl(const Tensor pointsets, const Tensor polygons, Tensor ious) { DISPATCH_DEVICE_IMPL(convex_iou_impl, pointsets, polygons, ious); } void convex_iou(const Tensor pointsets, const Tensor polygons, Tensor ious) { convex_iou_impl(pointsets, polygons, ious); } void convex_giou_impl(const Tensor pointsets, const Tensor polygons, Tensor output) { DISPATCH_DEVICE_IMPL(convex_giou_impl, pointsets, polygons, output); } void convex_giou(const Tensor pointsets, const Tensor polygons, Tensor output) { convex_giou_impl(pointsets, polygons, output); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/convex_iou_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "convex_iou_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void convex_iou_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { auto pointsets = buildATensor(ctx, ins[0]); auto polygons = buildATensor(ctx, ins[1]); auto ious = buildATensor(ctx, outs[0]); convex_iou(pointsets, polygons, ious); } void convex_giou_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { auto pointsets = buildATensor(ctx, ins[0]); auto polygons = buildATensor(ctx, ins[1]); auto output = buildATensor(ctx, outs[0]); convex_giou(pointsets, polygons, output); } PARROTS_EXTENSION_REGISTER(convex_iou) .input(2) .output(1) .apply(convex_iou_forward_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(convex_giou) .input(2) .output(1) .apply(convex_giou_forward_cuda_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/convex_iou_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef CONVEX_IOU_PYTORCH_H #define CONVEX_IOU_PYTORCH_H #include using namespace at; void convex_iou(const Tensor pointsets, const Tensor polygons, Tensor ious); void convex_giou(const Tensor pointsets, const Tensor polygons, Tensor output); #endif // RIROI_ALIGN_ROTATED_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/corner_pool.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved // Modified from // https://github.com/princeton-vl/CornerNet-Lite/tree/master/core/models/py_utils/_cpools/src #include "pytorch_cpp_helper.hpp" Tensor bottom_pool_forward(Tensor input) { // Initialize output Tensor output = at::zeros_like(input); // Get height int64_t height = input.size(2); output.copy_(input); for (int64_t ind = 1; ind < height; ind <<= 1) { Tensor max_temp = at::slice(output, 2, ind, height); Tensor cur_temp = at::slice(output, 2, ind, height).clone(); Tensor next_temp = at::slice(output, 2, 0, height - ind).clone(); at::max_out(max_temp, cur_temp, next_temp); } return output; } Tensor bottom_pool_backward(Tensor input, Tensor grad_output) { auto output = at::zeros_like(input); int32_t batch = input.size(0); int32_t channel = input.size(1); int32_t height = input.size(2); int32_t width = input.size(3); auto max_val = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kFloat)); auto max_ind = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kLong)); auto input_temp = input.select(2, 0); max_val.copy_(input_temp); max_ind.fill_(0); auto output_temp = output.select(2, 0); auto grad_output_temp = grad_output.select(2, 0); output_temp.copy_(grad_output_temp); auto un_max_ind = max_ind.unsqueeze(2); auto gt_mask = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kBool)); auto max_temp = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kFloat)); for (int32_t ind = 0; ind < height - 1; ++ind) { input_temp = input.select(2, ind + 1); at::gt_out(gt_mask, input_temp, max_val); at::masked_select_out(max_temp, input_temp, gt_mask); max_val.masked_scatter_(gt_mask, max_temp); max_ind.masked_fill_(gt_mask, ind + 1); grad_output_temp = grad_output.select(2, ind + 1).unsqueeze(2); output.scatter_add_(2, un_max_ind, grad_output_temp); } return output; } Tensor left_pool_forward(Tensor input) { // Initialize output Tensor output = at::zeros_like(input); // Get width int64_t width = input.size(3); output.copy_(input); for (int64_t ind = 1; ind < width; ind <<= 1) { Tensor max_temp = at::slice(output, 3, 0, width - ind); Tensor cur_temp = at::slice(output, 3, 0, width - ind).clone(); Tensor next_temp = at::slice(output, 3, ind, width).clone(); at::max_out(max_temp, cur_temp, next_temp); } return output; } Tensor left_pool_backward(Tensor input, Tensor grad_output) { auto output = at::zeros_like(input); int32_t batch = input.size(0); int32_t channel = input.size(1); int32_t height = input.size(2); int32_t width = input.size(3); auto max_val = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kFloat)); auto max_ind = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kLong)); auto input_temp = input.select(3, width - 1); max_val.copy_(input_temp); max_ind.fill_(width - 1); auto output_temp = output.select(3, width - 1); auto grad_output_temp = grad_output.select(3, width - 1); output_temp.copy_(grad_output_temp); auto un_max_ind = max_ind.unsqueeze(3); auto gt_mask = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kBool)); auto max_temp = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kFloat)); for (int32_t ind = 1; ind < width; ++ind) { input_temp = input.select(3, width - ind - 1); at::gt_out(gt_mask, input_temp, max_val); at::masked_select_out(max_temp, input_temp, gt_mask); max_val.masked_scatter_(gt_mask, max_temp); max_ind.masked_fill_(gt_mask, width - ind - 1); grad_output_temp = grad_output.select(3, width - ind - 1).unsqueeze(3); output.scatter_add_(3, un_max_ind, grad_output_temp); } return output; } Tensor right_pool_forward(Tensor input) { // Initialize output Tensor output = at::zeros_like(input); // Get width int64_t width = input.size(3); output.copy_(input); for (int64_t ind = 1; ind < width; ind <<= 1) { Tensor max_temp = at::slice(output, 3, ind, width); Tensor cur_temp = at::slice(output, 3, ind, width).clone(); Tensor next_temp = at::slice(output, 3, 0, width - ind).clone(); at::max_out(max_temp, cur_temp, next_temp); } return output; } Tensor right_pool_backward(Tensor input, Tensor grad_output) { Tensor output = at::zeros_like(input); int32_t batch = input.size(0); int32_t channel = input.size(1); int32_t height = input.size(2); int32_t width = input.size(3); auto max_val = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kFloat)); auto max_ind = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kLong)); auto input_temp = input.select(3, 0); max_val.copy_(input_temp); max_ind.fill_(0); auto output_temp = output.select(3, 0); auto grad_output_temp = grad_output.select(3, 0); output_temp.copy_(grad_output_temp); auto un_max_ind = max_ind.unsqueeze(3); auto gt_mask = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kBool)); auto max_temp = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kFloat)); for (int32_t ind = 0; ind < width - 1; ++ind) { input_temp = input.select(3, ind + 1); at::gt_out(gt_mask, input_temp, max_val); at::masked_select_out(max_temp, input_temp, gt_mask); max_val.masked_scatter_(gt_mask, max_temp); max_ind.masked_fill_(gt_mask, ind + 1); grad_output_temp = grad_output.select(3, ind + 1).unsqueeze(3); output.scatter_add_(3, un_max_ind, grad_output_temp); } return output; } Tensor top_pool_forward(Tensor input) { // Initialize output Tensor output = at::zeros_like(input); // Get height int64_t height = input.size(2); output.copy_(input); for (int64_t ind = 1; ind < height; ind <<= 1) { Tensor max_temp = at::slice(output, 2, 0, height - ind); Tensor cur_temp = at::slice(output, 2, 0, height - ind).clone(); Tensor next_temp = at::slice(output, 2, ind, height).clone(); at::max_out(max_temp, cur_temp, next_temp); } return output; } Tensor top_pool_backward(Tensor input, Tensor grad_output) { auto output = at::zeros_like(input); int32_t batch = input.size(0); int32_t channel = input.size(1); int32_t height = input.size(2); int32_t width = input.size(3); auto max_val = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kFloat)); auto max_ind = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kLong)); auto input_temp = input.select(2, height - 1); max_val.copy_(input_temp); max_ind.fill_(height - 1); auto output_temp = output.select(2, height - 1); auto grad_output_temp = grad_output.select(2, height - 1); output_temp.copy_(grad_output_temp); auto un_max_ind = max_ind.unsqueeze(2); auto gt_mask = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kBool)); auto max_temp = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kFloat)); for (int32_t ind = 1; ind < height; ++ind) { input_temp = input.select(2, height - ind - 1); at::gt_out(gt_mask, input_temp, max_val); at::masked_select_out(max_temp, input_temp, gt_mask); max_val.masked_scatter_(gt_mask, max_temp); max_ind.masked_fill_(gt_mask, height - ind - 1); grad_output_temp = grad_output.select(2, height - ind - 1).unsqueeze(2); output.scatter_add_(2, un_max_ind, grad_output_temp); } return output; } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/corner_pool_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "corner_pool_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void bottom_pool_forward_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { at::Tensor input; input = buildATensor(ctx, ins[0]); auto out = bottom_pool_forward(input); updateDArray(ctx, out, outs[0]); } void bottom_pool_backward_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { at::Tensor input, grad_output; input = buildATensor(ctx, ins[0]); grad_output = buildATensor(ctx, ins[1]); auto out = bottom_pool_backward(input, grad_output); updateDArray(ctx, out, outs[0]); } void left_pool_forward_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { at::Tensor input; input = buildATensor(ctx, ins[0]); auto out = left_pool_forward(input); updateDArray(ctx, out, outs[0]); } void left_pool_backward_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { at::Tensor input, grad_output; input = buildATensor(ctx, ins[0]); grad_output = buildATensor(ctx, ins[1]); auto out = left_pool_backward(input, grad_output); updateDArray(ctx, out, outs[0]); } void right_pool_forward_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { at::Tensor input; input = buildATensor(ctx, ins[0]); auto out = right_pool_forward(input); updateDArray(ctx, out, outs[0]); } void right_pool_backward_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { at::Tensor input, grad_output; input = buildATensor(ctx, ins[0]); grad_output = buildATensor(ctx, ins[1]); auto out = right_pool_backward(input, grad_output); updateDArray(ctx, out, outs[0]); } void top_pool_forward_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { at::Tensor input; input = buildATensor(ctx, ins[0]); auto out = top_pool_forward(input); updateDArray(ctx, out, outs[0]); } void top_pool_backward_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { at::Tensor input, grad_output; input = buildATensor(ctx, ins[0]); grad_output = buildATensor(ctx, ins[1]); auto out = top_pool_backward(input, grad_output); updateDArray(ctx, out, outs[0]); } #endif void bottom_pool_forward_parrots_cpu(HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { at::Tensor input; input = buildATensor(ctx, ins[0]); auto out = bottom_pool_forward(input); updateDArray(ctx, out, outs[0]); } void bottom_pool_backward_parrots_cpu(HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { at::Tensor input, grad_output; input = buildATensor(ctx, ins[0]); grad_output = buildATensor(ctx, ins[1]); auto out = bottom_pool_backward(input, grad_output); updateDArray(ctx, out, outs[0]); } void left_pool_forward_parrots_cpu(HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { at::Tensor input; input = buildATensor(ctx, ins[0]); auto out = left_pool_forward(input); updateDArray(ctx, out, outs[0]); } void left_pool_backward_parrots_cpu(HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { at::Tensor input, grad_output; input = buildATensor(ctx, ins[0]); grad_output = buildATensor(ctx, ins[1]); auto out = left_pool_backward(input, grad_output); updateDArray(ctx, out, outs[0]); } void right_pool_forward_parrots_cpu(HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { at::Tensor input; input = buildATensor(ctx, ins[0]); auto out = right_pool_forward(input); updateDArray(ctx, out, outs[0]); } void right_pool_backward_parrots_cpu(HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { at::Tensor input, grad_output; input = buildATensor(ctx, ins[0]); grad_output = buildATensor(ctx, ins[1]); auto out = right_pool_backward(input, grad_output); updateDArray(ctx, out, outs[0]); } void top_pool_forward_parrots_cpu(HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { at::Tensor input; input = buildATensor(ctx, ins[0]); auto out = top_pool_forward(input); updateDArray(ctx, out, outs[0]); } void top_pool_backward_parrots_cpu(HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { at::Tensor input, grad_output; input = buildATensor(ctx, ins[0]); grad_output = buildATensor(ctx, ins[1]); auto out = top_pool_backward(input, grad_output); updateDArray(ctx, out, outs[0]); } PARROTS_EXTENSION_REGISTER(bottom_pool_forward) .input(1) .output(1) #ifdef MMCV_WITH_CUDA .apply(bottom_pool_forward_parrots) #endif .apply(bottom_pool_forward_parrots_cpu) .done(); PARROTS_EXTENSION_REGISTER(bottom_pool_backward) .input(2) .output(1) #ifdef MMCV_WITH_CUDA .apply(bottom_pool_backward_parrots) #endif .apply(bottom_pool_backward_parrots_cpu) .done(); PARROTS_EXTENSION_REGISTER(top_pool_forward) .input(1) .output(1) #ifdef MMCV_WITH_CUDA .apply(top_pool_forward_parrots) #endif .apply(top_pool_forward_parrots_cpu) .done(); PARROTS_EXTENSION_REGISTER(top_pool_backward) .input(2) .output(1) #ifdef MMCV_WITH_CUDA .apply(top_pool_backward_parrots) #endif .apply(top_pool_backward_parrots_cpu) .done(); PARROTS_EXTENSION_REGISTER(left_pool_forward) .input(1) .output(1) #ifdef MMCV_WITH_CUDA .apply(left_pool_forward_parrots) #endif .apply(left_pool_forward_parrots_cpu) .done(); PARROTS_EXTENSION_REGISTER(left_pool_backward) .input(2) .output(1) #ifdef MMCV_WITH_CUDA .apply(left_pool_backward_parrots) #endif .apply(left_pool_backward_parrots_cpu) .done(); PARROTS_EXTENSION_REGISTER(right_pool_forward) .input(1) .output(1) #ifdef MMCV_WITH_CUDA .apply(right_pool_forward_parrots) #endif .apply(right_pool_forward_parrots_cpu) .done(); PARROTS_EXTENSION_REGISTER(right_pool_backward) .input(2) .output(1) #ifdef MMCV_WITH_CUDA .apply(right_pool_backward_parrots) #endif .apply(right_pool_backward_parrots_cpu) .done(); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/corner_pool_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef CORNER_POOL_PYTORCH_H #define CORNER_POOL_PYTORCH_H #include at::Tensor bottom_pool_forward(at::Tensor input); at::Tensor bottom_pool_backward(at::Tensor input, at::Tensor grad_output); at::Tensor left_pool_forward(at::Tensor input); at::Tensor left_pool_backward(at::Tensor input, at::Tensor grad_output); at::Tensor right_pool_forward(at::Tensor input); at::Tensor right_pool_backward(at::Tensor input, at::Tensor grad_output); at::Tensor top_pool_forward(at::Tensor input); at::Tensor top_pool_backward(at::Tensor input, at::Tensor grad_output); #endif // CORNER_POOL_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/correlation.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved. #include #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void correlation_forward_impl(Tensor input1, Tensor input2, Tensor output, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW) { DISPATCH_DEVICE_IMPL(correlation_forward_impl, input1, input2, output, kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH, dilation_patchW, dH, dW); } void correlation_backward_impl(Tensor grad_output, Tensor input1, Tensor input2, Tensor grad_input1, Tensor grad_input2, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW) { DISPATCH_DEVICE_IMPL(correlation_backward_impl, grad_output, input1, input2, grad_input1, grad_input2, kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH, dilation_patchW, dH, dW); } void correlation_forward(Tensor input1, Tensor input2, Tensor output, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW) { correlation_forward_impl(input1, input2, output, kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH, dilation_patchW, dH, dW); } void correlation_backward(Tensor grad_output, Tensor input1, Tensor input2, Tensor grad_input1, Tensor grad_input2, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW) { correlation_backward_impl(grad_output, input1, input2, grad_input1, grad_input2, kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH, dilation_patchW, dH, dW); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/correlation_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "correlation_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void correlation_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH, dilation_patchW, dH, dW; SSAttrs(attr) .get("kH", kH) .get("kW", kW) .get("patchH", patchH) .get("patchW", patchW) .get("padH", padH) .get("padW", padW) .get("dilationH", dilationH) .get("dilationW", dilationW) .get("dilation_patchH", dilation_patchH) .get("dilation_patchW", dilation_patchW) .get("dH", dH) .get("dW", dW) .done(); auto input1 = buildATensor(ctx, ins[0]); auto input2 = buildATensor(ctx, ins[1]); auto output = buildATensor(ctx, outs[0]); correlation_forward(input1, input2, output, kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH, dilation_patchW, dH, dW); } void correlation_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH, dilation_patchW, dH, dW; SSAttrs(attr) .get("kH", kH) .get("kW", kW) .get("patchH", patchH) .get("patchW", patchW) .get("padH", padH) .get("padW", padW) .get("dilationH", dilationH) .get("dilationW", dilationW) .get("dilation_patchH", dilation_patchH) .get("dilation_patchW", dilation_patchW) .get("dH", dH) .get("dW", dW) .done(); auto grad_output = buildATensor(ctx, ins[0]); auto input1 = buildATensor(ctx, ins[1]); auto input2 = buildATensor(ctx, ins[2]); auto grad_input1 = buildATensor(ctx, outs[0]); auto grad_input2 = buildATensor(ctx, outs[1]); correlation_backward(grad_output, input1, input2, grad_input1, grad_input2, kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH, dilation_patchW, dH, dW); } #endif void correlation_forward_cpu_parrots(HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH, dilation_patchW, dH, dW; SSAttrs(attr) .get("kH", kH) .get("kW", kW) .get("patchH", patchH) .get("patchW", patchW) .get("padH", padH) .get("padW", padW) .get("dilationH", dilationH) .get("dilationW", dilationW) .get("dilation_patchH", dilation_patchH) .get("dilation_patchW", dilation_patchW) .get("dH", dH) .get("dW", dW) .done(); auto input1 = buildATensor(ctx, ins[0]); auto input2 = buildATensor(ctx, ins[1]); auto output = buildATensor(ctx, outs[0]); correlation_forward(input1, input2, output, kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH, dilation_patchW, dH, dW); } void correlation_backward_cpu_parrots(HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH, dilation_patchW, dH, dW; SSAttrs(attr) .get("kH", kH) .get("kW", kW) .get("patchH", patchH) .get("patchW", patchW) .get("padH", padH) .get("padW", padW) .get("dilationH", dilationH) .get("dilationW", dilationW) .get("dilation_patchH", dilation_patchH) .get("dilation_patchW", dilation_patchW) .get("dH", dH) .get("dW", dW) .done(); auto grad_output = buildATensor(ctx, ins[0]); auto input1 = buildATensor(ctx, ins[1]); auto input2 = buildATensor(ctx, ins[2]); auto grad_input1 = buildATensor(ctx, outs[0]); auto grad_input2 = buildATensor(ctx, outs[1]); correlation_backward(grad_output, input1, input2, grad_input1, grad_input2, kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH, dilation_patchW, dH, dW); } PARROTS_EXTENSION_REGISTER(correlation_forward) .attr("kH") .attr("kW") .attr("patchH") .attr("patchW") .attr("padH") .attr("padW") .attr("dilationH") .attr("dilationW") .attr("dilation_patchH") .attr("dilation_patchW") .attr("dH") .attr("dW") .input(2) .output(1) .apply(correlation_forward_cpu_parrots) #ifdef MMCV_WITH_CUDA .apply(correlation_forward_cuda_parrots) #endif .done(); PARROTS_EXTENSION_REGISTER(correlation_backward) .attr("kH") .attr("kW") .attr("patchH") .attr("patchW") .attr("padH") .attr("padW") .attr("dilationH") .attr("dilationW") .attr("dilation_patchH") .attr("dilation_patchW") .attr("dH") .attr("dW") .input(3) .output(2) .apply(correlation_backward_cpu_parrots) #ifdef MMCV_WITH_CUDA .apply(correlation_backward_cuda_parrots) #endif .done(); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/correlation_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef CORRELATION_PYTORCH_H #define CORRELATION_PYTORCH_H #include using namespace at; void correlation_forward(Tensor input1, Tensor input2, Tensor output, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW); void correlation_backward(Tensor grad_output, Tensor input1, Tensor input2, Tensor grad_input1, Tensor grad_input2, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW); #endif // CORRELATION_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/cudabind.cpp ================================================ #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void AssignScoreWithKForwardCUDAKernelLauncher( int B, int N0, int N1, int M, int K, int O, int aggregate, const Tensor& points, const Tensor& centers, const Tensor& scores, const Tensor& knn_idx, Tensor& output); void AssignScoreWithKBackwardCUDAKernelLauncher( int B, int N0, int N1, int M, int K, int O, int aggregate, const Tensor& grad_out, const Tensor& points, const Tensor& centers, const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points, Tensor& grad_centers, Tensor& grad_scores); void assign_score_withk_forward_cuda(int B, int N0, int N1, int M, int K, int O, int aggregate, const Tensor& points, const Tensor& centers, const Tensor& scores, const Tensor& knn_idx, Tensor& output) { AssignScoreWithKForwardCUDAKernelLauncher( B, N0, N1, M, K, O, aggregate, points, centers, scores, knn_idx, output); }; void assign_score_withk_backward_cuda( int B, int N0, int N1, int M, int K, int O, int aggregate, const Tensor& grad_out, const Tensor& points, const Tensor& centers, const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points, Tensor& grad_centers, Tensor& grad_scores) { AssignScoreWithKBackwardCUDAKernelLauncher( B, N0, N1, M, K, O, aggregate, grad_out, points, centers, scores, knn_idx, grad_points, grad_centers, grad_scores); }; void assign_score_withk_forward_impl(int B, int N0, int N1, int M, int K, int O, int aggregate, const Tensor& points, const Tensor& centers, const Tensor& scores, const Tensor& knn_idx, Tensor& output); void assign_score_withk_backward_impl( int B, int N0, int N1, int M, int K, int O, int aggregate, const Tensor& grad_out, const Tensor& points, const Tensor& centers, const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points, Tensor& grad_centers, Tensor& grad_scores); REGISTER_DEVICE_IMPL(assign_score_withk_forward_impl, CUDA, assign_score_withk_forward_cuda); REGISTER_DEVICE_IMPL(assign_score_withk_backward_impl, CUDA, assign_score_withk_backward_cuda); void BallQueryForwardCUDAKernelLauncher(int b, int n, int m, float min_radius, float max_radius, int nsample, const Tensor new_xyz, const Tensor xyz, Tensor idx); void ball_query_forward_cuda(int b, int n, int m, float min_radius, float max_radius, int nsample, const Tensor new_xyz, const Tensor xyz, Tensor idx) { BallQueryForwardCUDAKernelLauncher(b, n, m, min_radius, max_radius, nsample, new_xyz, xyz, idx); }; void ball_query_forward_impl(int b, int n, int m, float min_radius, float max_radius, int nsample, const Tensor new_xyz, const Tensor xyz, Tensor idx); REGISTER_DEVICE_IMPL(ball_query_forward_impl, CUDA, ball_query_forward_cuda); void BBoxOverlapsCUDAKernelLauncher(const Tensor bboxes1, const Tensor bboxes2, Tensor ious, const int mode, const bool aligned, const int offset); void bbox_overlaps_cuda(const Tensor bboxes1, const Tensor bboxes2, Tensor ious, const int mode, const bool aligned, const int offset) { BBoxOverlapsCUDAKernelLauncher(bboxes1, bboxes2, ious, mode, aligned, offset); } void bbox_overlaps_impl(const Tensor bboxes1, const Tensor bboxes2, Tensor ious, const int mode, const bool aligned, const int offset); REGISTER_DEVICE_IMPL(bbox_overlaps_impl, CUDA, bbox_overlaps_cuda); void BorderAlignForwardCUDAKernelLauncher(const Tensor& input, const Tensor& boxes, Tensor output, Tensor argmax_idx, const int pool_size); void BorderAlignBackwardCUDAKernelLauncher(const Tensor& grad_output, const Tensor& boxes, const Tensor& argmax_idx, Tensor grad_input, const int pool_size); void border_align_forward_cuda(const Tensor& input, const Tensor& boxes, Tensor output, Tensor argmax_idx, const int pool_size) { BorderAlignForwardCUDAKernelLauncher(input, boxes, output, argmax_idx, pool_size); } void border_align_backward_cuda(const Tensor& grad_output, const Tensor& boxes, const Tensor& argmax_idx, Tensor grad_input, const int pool_size) { BorderAlignBackwardCUDAKernelLauncher(grad_output, boxes, argmax_idx, grad_input, pool_size); } void border_align_forward_impl(const Tensor& input, const Tensor& boxes, Tensor output, Tensor argmax_idx, const int pool_size); void border_align_backward_impl(const Tensor& grad_output, const Tensor& boxes, const Tensor& argmax_idx, Tensor grad_input, const int pool_size); REGISTER_DEVICE_IMPL(border_align_forward_impl, CUDA, border_align_forward_cuda); REGISTER_DEVICE_IMPL(border_align_backward_impl, CUDA, border_align_backward_cuda); void box_iou_rotated_cuda(const Tensor boxes1, const Tensor boxes2, Tensor ious, const int mode_flag, const bool aligned); void box_iou_rotated_impl(const Tensor boxes1, const Tensor boxes2, Tensor ious, const int mode_flag, const bool aligned); REGISTER_DEVICE_IMPL(box_iou_rotated_impl, CUDA, box_iou_rotated_cuda); void CARAFEForwardCUDAKernelLauncher(const Tensor features, const Tensor masks, Tensor rfeatures, Tensor routput, Tensor rmasks, Tensor output, const int kernel_size, const int group_size, const int scale_factor); void CARAFEBackwardCUDAKernelLauncher( const Tensor top_grad, const Tensor rfeatures, const Tensor masks, Tensor rtop_grad, Tensor rbottom_grad_hs, Tensor rbottom_grad, Tensor rmask_grad, Tensor bottom_grad, Tensor mask_grad, const int kernel_size, const int group_size, const int scale_factor); void carafe_forward_cuda(Tensor features, Tensor masks, Tensor rfeatures, Tensor routput, Tensor rmasks, Tensor output, int kernel_size, int group_size, int scale_factor) { CARAFEForwardCUDAKernelLauncher(features, masks, rfeatures, routput, rmasks, output, kernel_size, group_size, scale_factor); } void carafe_backward_cuda(Tensor top_grad, Tensor rfeatures, Tensor masks, Tensor rtop_grad, Tensor rbottom_grad_hs, Tensor rbottom_grad, Tensor rmask_grad, Tensor bottom_grad, Tensor mask_grad, int kernel_size, int group_size, int scale_factor) { CARAFEBackwardCUDAKernelLauncher(top_grad, rfeatures, masks, rtop_grad, rbottom_grad_hs, rbottom_grad, rmask_grad, bottom_grad, mask_grad, kernel_size, group_size, scale_factor); } void carafe_forward_impl(Tensor features, Tensor masks, Tensor rfeatures, Tensor routput, Tensor rmasks, Tensor output, int kernel_size, int group_size, int scale_factor); void carafe_backward_impl(Tensor top_grad, Tensor rfeatures, Tensor masks, Tensor rtop_grad, Tensor rbottom_grad_hs, Tensor rbottom_grad, Tensor rmask_grad, Tensor bottom_grad, Tensor mask_grad, int kernel_size, int group_size, int scale_factor); REGISTER_DEVICE_IMPL(carafe_forward_impl, CUDA, carafe_forward_cuda); REGISTER_DEVICE_IMPL(carafe_backward_impl, CUDA, carafe_backward_cuda); void CARAFENAIVEForwardCUDAKernelLauncher(const Tensor features, const Tensor masks, Tensor output, const int kernel_size, const int group_size, const int scale_factor); void CARAFENAIVEBackwardCUDAKernelLauncher( const Tensor top_grad, const Tensor features, const Tensor masks, Tensor bottom_grad, Tensor mask_grad, const int kernel_size, const int group_size, const int scale_factor); void carafe_naive_forward_cuda(Tensor features, Tensor masks, Tensor output, int kernel_size, int group_size, int scale_factor) { CARAFENAIVEForwardCUDAKernelLauncher(features, masks, output, kernel_size, group_size, scale_factor); } void carafe_naive_backward_cuda(Tensor top_grad, Tensor features, Tensor masks, Tensor bottom_grad, Tensor mask_grad, int kernel_size, int group_size, int scale_factor) { CARAFENAIVEBackwardCUDAKernelLauncher(top_grad, features, masks, bottom_grad, mask_grad, kernel_size, group_size, scale_factor); } void carafe_naive_forward_impl(Tensor features, Tensor masks, Tensor output, int kernel_size, int group_size, int scale_factor); void carafe_naive_backward_impl(Tensor top_grad, Tensor features, Tensor masks, Tensor bottom_grad, Tensor mask_grad, int kernel_size, int group_size, int scale_factor); REGISTER_DEVICE_IMPL(carafe_naive_forward_impl, CUDA, carafe_naive_forward_cuda); REGISTER_DEVICE_IMPL(carafe_naive_backward_impl, CUDA, carafe_naive_backward_cuda); void CorrelationForwardCUDAKernelLauncher(Tensor input1, Tensor input2, Tensor output, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW); void CorrelationBackwardCUDAKernelLauncher(Tensor grad_output, Tensor input1, Tensor input2, Tensor grad_input1, Tensor grad_input2, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW); void correlation_forward_cuda(Tensor input1, Tensor input2, Tensor output, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW) { CorrelationForwardCUDAKernelLauncher( input1, input2, output, kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH, dilation_patchW, dH, dW); } void correlation_backward_cuda(Tensor grad_output, Tensor input1, Tensor input2, Tensor grad_input1, Tensor grad_input2, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW) { CorrelationBackwardCUDAKernelLauncher( grad_output, input1, input2, grad_input1, grad_input2, kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH, dilation_patchW, dH, dW); } void correlation_forward_impl(Tensor input1, Tensor input2, Tensor output, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW); void correlation_backward_impl(Tensor grad_output, Tensor input1, Tensor input2, Tensor grad_input1, Tensor grad_input2, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW); REGISTER_DEVICE_IMPL(correlation_forward_impl, CUDA, correlation_forward_cuda); REGISTER_DEVICE_IMPL(correlation_backward_impl, CUDA, correlation_backward_cuda); void deformable_im2col_cuda(Tensor data_im, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor data_col); void deformable_col2im_cuda(Tensor data_col, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor grad_im); void deformable_col2im_coord_cuda( Tensor data_col, Tensor data_im, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor grad_offset); void deformable_im2col_impl(Tensor data_im, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor data_col); void deformable_col2im_impl(Tensor data_col, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor grad_im); void deformable_col2im_coord_impl( Tensor data_col, Tensor data_im, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor grad_offset); REGISTER_DEVICE_IMPL(deformable_im2col_impl, CUDA, deformable_im2col_cuda); REGISTER_DEVICE_IMPL(deformable_col2im_impl, CUDA, deformable_col2im_cuda); REGISTER_DEVICE_IMPL(deformable_col2im_coord_impl, CUDA, deformable_col2im_coord_cuda); void DeformRoIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor offset, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma); void DeformRoIPoolBackwardCUDAKernelLauncher( Tensor grad_output, Tensor input, Tensor rois, Tensor offset, Tensor grad_input, Tensor grad_offset, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma); void deform_roi_pool_forward_cuda(Tensor input, Tensor rois, Tensor offset, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma) { DeformRoIPoolForwardCUDAKernelLauncher(input, rois, offset, output, pooled_height, pooled_width, spatial_scale, sampling_ratio, gamma); } void deform_roi_pool_backward_cuda(Tensor grad_output, Tensor input, Tensor rois, Tensor offset, Tensor grad_input, Tensor grad_offset, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma) { DeformRoIPoolBackwardCUDAKernelLauncher( grad_output, input, rois, offset, grad_input, grad_offset, pooled_height, pooled_width, spatial_scale, sampling_ratio, gamma); } void deform_roi_pool_forward_impl(Tensor input, Tensor rois, Tensor offset, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma); void deform_roi_pool_backward_impl(Tensor grad_output, Tensor input, Tensor rois, Tensor offset, Tensor grad_input, Tensor grad_offset, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma); REGISTER_DEVICE_IMPL(deform_roi_pool_forward_impl, CUDA, deform_roi_pool_forward_cuda); REGISTER_DEVICE_IMPL(deform_roi_pool_backward_impl, CUDA, deform_roi_pool_backward_cuda); void SigmoidFocalLossForwardCUDAKernelLauncher(Tensor input, Tensor target, Tensor weight, Tensor output, const float gamma, const float alpha); void SigmoidFocalLossBackwardCUDAKernelLauncher(Tensor input, Tensor target, Tensor weight, Tensor grad_input, const float gamma, const float alpha); void SoftmaxFocalLossForwardCUDAKernelLauncher(Tensor softmax, Tensor target, Tensor weight, Tensor output, const float gamma, const float alpha); void SoftmaxFocalLossBackwardCUDAKernelLauncher(Tensor softmax, Tensor target, Tensor weight, Tensor buff, Tensor grad_input, const float gamma, const float alpha); void sigmoid_focal_loss_forward_cuda(Tensor input, Tensor target, Tensor weight, Tensor output, float gamma, float alpha) { SigmoidFocalLossForwardCUDAKernelLauncher(input, target, weight, output, gamma, alpha); } void sigmoid_focal_loss_backward_cuda(Tensor input, Tensor target, Tensor weight, Tensor grad_input, float gamma, float alpha) { SigmoidFocalLossBackwardCUDAKernelLauncher(input, target, weight, grad_input, gamma, alpha); } void softmax_focal_loss_forward_cuda(Tensor input, Tensor target, Tensor weight, Tensor output, float gamma, float alpha) { SoftmaxFocalLossForwardCUDAKernelLauncher(input, target, weight, output, gamma, alpha); } void softmax_focal_loss_backward_cuda(Tensor input, Tensor target, Tensor weight, Tensor buff, Tensor grad_input, float gamma, float alpha) { SoftmaxFocalLossBackwardCUDAKernelLauncher(input, target, weight, buff, grad_input, gamma, alpha); } void sigmoid_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight, Tensor output, float gamma, float alpha); void sigmoid_focal_loss_backward_impl(Tensor input, Tensor target, Tensor weight, Tensor grad_input, float gamma, float alpha); void softmax_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight, Tensor output, float gamma, float alpha); void softmax_focal_loss_backward_impl(Tensor input, Tensor target, Tensor weight, Tensor buff, Tensor grad_input, float gamma, float alpha); REGISTER_DEVICE_IMPL(sigmoid_focal_loss_forward_impl, CUDA, sigmoid_focal_loss_forward_cuda); REGISTER_DEVICE_IMPL(sigmoid_focal_loss_backward_impl, CUDA, sigmoid_focal_loss_backward_cuda); REGISTER_DEVICE_IMPL(softmax_focal_loss_forward_impl, CUDA, softmax_focal_loss_forward_cuda); REGISTER_DEVICE_IMPL(softmax_focal_loss_backward_impl, CUDA, softmax_focal_loss_backward_cuda); void FurthestPointSamplingForwardCUDAKernelLauncher(int b, int n, int m, const float* dataset, float* temp, int* idxs); void FurthestPointSamplingWithDistForwardCUDAKernelLauncher( int b, int n, int m, const float* dataset, float* temp, int* idxs); void furthest_point_sampling_forward_cuda(Tensor points_tensor, Tensor temp_tensor, Tensor idx_tensor, int b, int n, int m) { const float* dataset = points_tensor.data_ptr(); float* temp = temp_tensor.data_ptr(); int* idxs = idx_tensor.data_ptr(); FurthestPointSamplingForwardCUDAKernelLauncher(b, n, m, dataset, temp, idxs); } void furthest_point_sampling_with_dist_forward_cuda(Tensor points_tensor, Tensor temp_tensor, Tensor idx_tensor, int b, int n, int m) { const float* dataset = points_tensor.data_ptr(); float* temp = temp_tensor.data_ptr(); int* idxs = idx_tensor.data_ptr(); FurthestPointSamplingWithDistForwardCUDAKernelLauncher(b, n, m, dataset, temp, idxs); } void furthest_point_sampling_forward_impl(Tensor points_tensor, Tensor temp_tensor, Tensor idx_tensor, int b, int n, int m); void furthest_point_sampling_with_dist_forward_impl(Tensor points_tensor, Tensor temp_tensor, Tensor idx_tensor, int b, int n, int m); REGISTER_DEVICE_IMPL(furthest_point_sampling_forward_impl, CUDA, furthest_point_sampling_forward_cuda); REGISTER_DEVICE_IMPL(furthest_point_sampling_with_dist_forward_impl, CUDA, furthest_point_sampling_with_dist_forward_cuda); torch::Tensor fused_bias_leakyrelu_op(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer, int act, int grad, float alpha, float scale); torch::Tensor fused_bias_leakyrelu_op_impl(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer, int act, int grad, float alpha, float scale); REGISTER_DEVICE_IMPL(fused_bias_leakyrelu_op_impl, CUDA, fused_bias_leakyrelu_op); void GatherPointsForwardCUDAKernelLauncher(int b, int c, int n, int npoints, const Tensor points, const Tensor idx, Tensor out); void GatherPointsBackwardCUDAKernelLauncher(int b, int c, int n, int npoints, const Tensor grad_out, const Tensor idx, Tensor grad_points); void gather_points_forward_cuda(int b, int c, int n, int npoints, const Tensor points, const Tensor idx, Tensor out) { GatherPointsForwardCUDAKernelLauncher(b, c, n, npoints, points, idx, out); }; void gather_points_backward_cuda(int b, int c, int n, int npoints, const Tensor grad_out, const Tensor idx, Tensor grad_points) { GatherPointsBackwardCUDAKernelLauncher(b, c, n, npoints, grad_out, idx, grad_points); }; void gather_points_forward_impl(int b, int c, int n, int npoints, const Tensor points, const Tensor idx, Tensor out); void gather_points_backward_impl(int b, int c, int n, int npoints, const Tensor grad_out, const Tensor idx, Tensor grad_points); REGISTER_DEVICE_IMPL(gather_points_forward_impl, CUDA, gather_points_forward_cuda); REGISTER_DEVICE_IMPL(gather_points_backward_impl, CUDA, gather_points_backward_cuda); void GroupPointsForwardCUDAKernelLauncher(int b, int c, int n, int npoints, int nsample, const Tensor points, const Tensor idx, Tensor out); void GroupPointsBackwardCUDAKernelLauncher(int b, int c, int n, int npoints, int nsample, const Tensor grad_out, const Tensor idx, Tensor grad_points); void group_points_forward_cuda(int b, int c, int n, int npoints, int nsample, const Tensor points, const Tensor idx, Tensor out) { GroupPointsForwardCUDAKernelLauncher(b, c, n, npoints, nsample, points, idx, out); }; void group_points_backward_cuda(int b, int c, int n, int npoints, int nsample, const Tensor grad_out, const Tensor idx, Tensor grad_points) { GroupPointsBackwardCUDAKernelLauncher(b, c, n, npoints, nsample, grad_out, idx, grad_points); }; void group_points_forward_impl(int b, int c, int n, int npoints, int nsample, const Tensor points, const Tensor idx, Tensor out); void group_points_backward_impl(int b, int c, int n, int npoints, int nsample, const Tensor grad_out, const Tensor idx, Tensor grad_points); REGISTER_DEVICE_IMPL(group_points_forward_impl, CUDA, group_points_forward_cuda); REGISTER_DEVICE_IMPL(group_points_backward_impl, CUDA, group_points_backward_cuda); void IoU3DBoxesOverlapBevForwardCUDAKernelLauncher(const int num_a, const Tensor boxes_a, const int num_b, const Tensor boxes_b, Tensor ans_overlap); void IoU3DBoxesIoUBevForwardCUDAKernelLauncher(const int num_a, const Tensor boxes_a, const int num_b, const Tensor boxes_b, Tensor ans_iou); void IoU3DNMSForwardCUDAKernelLauncher(const Tensor boxes, unsigned long long* mask, int boxes_num, float nms_overlap_thresh); void IoU3DNMSNormalForwardCUDAKernelLauncher(const Tensor boxes, unsigned long long* mask, int boxes_num, float nms_overlap_thresh); void iou3d_boxes_overlap_bev_forward_cuda(const int num_a, const Tensor boxes_a, const int num_b, const Tensor boxes_b, Tensor ans_overlap) { IoU3DBoxesOverlapBevForwardCUDAKernelLauncher(num_a, boxes_a, num_b, boxes_b, ans_overlap); }; void iou3d_boxes_iou_bev_forward_cuda(const int num_a, const Tensor boxes_a, const int num_b, const Tensor boxes_b, Tensor ans_iou) { IoU3DBoxesIoUBevForwardCUDAKernelLauncher(num_a, boxes_a, num_b, boxes_b, ans_iou); }; void iou3d_nms_forward_cuda(const Tensor boxes, unsigned long long* mask, int boxes_num, float nms_overlap_thresh) { IoU3DNMSForwardCUDAKernelLauncher(boxes, mask, boxes_num, nms_overlap_thresh); }; void iou3d_nms_normal_forward_cuda(const Tensor boxes, unsigned long long* mask, int boxes_num, float nms_overlap_thresh) { IoU3DNMSNormalForwardCUDAKernelLauncher(boxes, mask, boxes_num, nms_overlap_thresh); }; void iou3d_boxes_overlap_bev_forward_impl(const int num_a, const Tensor boxes_a, const int num_b, const Tensor boxes_b, Tensor ans_overlap); void iou3d_boxes_iou_bev_forward_impl(const int num_a, const Tensor boxes_a, const int num_b, const Tensor boxes_b, Tensor ans_iou); void iou3d_nms_forward_impl(const Tensor boxes, unsigned long long* mask, int boxes_num, float nms_overlap_thresh); void iou3d_nms_normal_forward_impl(const Tensor boxes, unsigned long long* mask, int boxes_num, float nms_overlap_thresh); REGISTER_DEVICE_IMPL(iou3d_boxes_overlap_bev_forward_impl, CUDA, iou3d_boxes_overlap_bev_forward_cuda); REGISTER_DEVICE_IMPL(iou3d_boxes_iou_bev_forward_impl, CUDA, iou3d_boxes_iou_bev_forward_cuda); REGISTER_DEVICE_IMPL(iou3d_nms_forward_impl, CUDA, iou3d_nms_forward_cuda); REGISTER_DEVICE_IMPL(iou3d_nms_normal_forward_impl, CUDA, iou3d_nms_normal_forward_cuda); void KNNForwardCUDAKernelLauncher(int b, int n, int m, int nsample, const Tensor xyz, const Tensor new_xyz, Tensor idx, Tensor dist2); void knn_forward_cuda(int b, int n, int m, int nsample, const Tensor xyz, const Tensor new_xyz, Tensor idx, Tensor dist2) { KNNForwardCUDAKernelLauncher(b, n, m, nsample, xyz, new_xyz, idx, dist2); } void knn_forward_impl(int b, int n, int m, int nsample, const Tensor xyz, const Tensor new_xyz, Tensor idx, Tensor dist2); REGISTER_DEVICE_IMPL(knn_forward_impl, CUDA, knn_forward_cuda); void MaskedIm2colForwardCUDAKernelLauncher(const Tensor bottom_data, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor top_data, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w); void MaskedCol2imForwardCUDAKernelLauncher(const Tensor bottom_data, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor top_data, const int height, const int width, const int channels); void masked_im2col_forward_cuda(const Tensor im, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w) { // im: (n, ic, h, w), kernel size (kh, kw) // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh) MaskedIm2colForwardCUDAKernelLauncher(im, mask_h_idx, mask_w_idx, col, kernel_h, kernel_w, pad_h, pad_w); } void masked_col2im_forward_cuda(const Tensor col, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor im, int height, int width, int channels) { // im: (n, ic, h, w), kernel size (kh, kw) // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh) MaskedCol2imForwardCUDAKernelLauncher(col, mask_h_idx, mask_w_idx, im, height, width, channels); } void masked_im2col_forward_impl(const Tensor im, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w); void masked_col2im_forward_impl(const Tensor col, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor im, int height, int width, int channels); REGISTER_DEVICE_IMPL(masked_im2col_forward_impl, CUDA, masked_im2col_forward_cuda); REGISTER_DEVICE_IMPL(masked_col2im_forward_impl, CUDA, masked_col2im_forward_cuda); void modulated_deformable_im2col_cuda( const Tensor data_im, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor data_col); void modulated_deformable_col2im_cuda( const Tensor data_col, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor grad_im); void modulated_deformable_col2im_coord_cuda( const Tensor data_col, const Tensor data_im, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor grad_offset, Tensor grad_mask); void modulated_deformable_im2col_impl( const Tensor data_im, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor data_col); void modulated_deformable_col2im_impl( const Tensor data_col, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor grad_im); void modulated_deformable_col2im_coord_impl( const Tensor data_col, const Tensor data_im, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor grad_offset, Tensor grad_mask); REGISTER_DEVICE_IMPL(modulated_deformable_im2col_impl, CUDA, modulated_deformable_im2col_cuda); REGISTER_DEVICE_IMPL(modulated_deformable_col2im_impl, CUDA, modulated_deformable_col2im_cuda); REGISTER_DEVICE_IMPL(modulated_deformable_col2im_coord_impl, CUDA, modulated_deformable_col2im_coord_cuda); Tensor ms_deform_attn_cuda_forward(const Tensor& value, const Tensor& spatial_shapes, const Tensor& level_start_index, const Tensor& sampling_loc, const Tensor& attn_weight, const int im2col_step); void ms_deform_attn_cuda_backward( const Tensor& value, const Tensor& spatial_shapes, const Tensor& level_start_index, const Tensor& sampling_loc, const Tensor& attn_weight, const Tensor& grad_output, Tensor& grad_value, Tensor& grad_sampling_loc, Tensor& grad_attn_weight, const int im2col_step); Tensor ms_deform_attn_impl_forward(const Tensor& value, const Tensor& spatial_shapes, const Tensor& level_start_index, const Tensor& sampling_loc, const Tensor& attn_weight, const int im2col_step); void ms_deform_attn_impl_backward( const Tensor& value, const Tensor& spatial_shapes, const Tensor& level_start_index, const Tensor& sampling_loc, const Tensor& attn_weight, const Tensor& grad_output, Tensor& grad_value, Tensor& grad_sampling_loc, Tensor& grad_attn_weight, const int im2col_step); REGISTER_DEVICE_IMPL(ms_deform_attn_impl_forward, CUDA, ms_deform_attn_cuda_forward); REGISTER_DEVICE_IMPL(ms_deform_attn_impl_backward, CUDA, ms_deform_attn_cuda_backward); Tensor NMSCUDAKernelLauncher(Tensor boxes, Tensor scores, float iou_threshold, int offset); Tensor nms_cuda(Tensor boxes, Tensor scores, float iou_threshold, int offset) { return NMSCUDAKernelLauncher(boxes, scores, iou_threshold, offset); } Tensor nms_impl(Tensor boxes, Tensor scores, float iou_threshold, int offset); REGISTER_DEVICE_IMPL(nms_impl, CUDA, nms_cuda); void PointsInBoxesPartForwardCUDAKernelLauncher(int batch_size, int boxes_num, int pts_num, const Tensor boxes, const Tensor pts, Tensor box_idx_of_points); void PointsInBoxesAllForwardCUDAKernelLauncher(int batch_size, int boxes_num, int pts_num, const Tensor boxes, const Tensor pts, Tensor box_idx_of_points); void points_in_boxes_part_forward_cuda(int batch_size, int boxes_num, int pts_num, const Tensor boxes, const Tensor pts, Tensor box_idx_of_points) { PointsInBoxesPartForwardCUDAKernelLauncher(batch_size, boxes_num, pts_num, boxes, pts, box_idx_of_points); }; void points_in_boxes_all_forward_cuda(int batch_size, int boxes_num, int pts_num, const Tensor boxes, const Tensor pts, Tensor box_idx_of_points) { PointsInBoxesAllForwardCUDAKernelLauncher(batch_size, boxes_num, pts_num, boxes, pts, box_idx_of_points); }; void points_in_boxes_part_forward_impl(int batch_size, int boxes_num, int pts_num, const Tensor boxes, const Tensor pts, Tensor box_idx_of_points); void points_in_boxes_all_forward_impl(int batch_size, int boxes_num, int pts_num, const Tensor boxes, const Tensor pts, Tensor box_idx_of_points); REGISTER_DEVICE_IMPL(points_in_boxes_part_forward_impl, CUDA, points_in_boxes_part_forward_cuda); REGISTER_DEVICE_IMPL(points_in_boxes_all_forward_impl, CUDA, points_in_boxes_all_forward_cuda); void PSAMaskForwardCUDAKernelLauncher(const int psa_type, const Tensor input, Tensor output, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask); void PSAMaskBackwardCUDAKernelLauncher( const int psa_type, const Tensor grad_output, Tensor grad_input, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask); void psamask_forward_cuda(const int psa_type, const Tensor input, Tensor output, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask) { PSAMaskForwardCUDAKernelLauncher(psa_type, input, output, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask); } void psamask_backward_cuda(const int psa_type, const Tensor grad_output, Tensor grad_input, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask) { PSAMaskBackwardCUDAKernelLauncher(psa_type, grad_output, grad_input, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask); } void psamask_forward_impl(const int psa_type, const Tensor input, Tensor output, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask); void psamask_backward_impl(const int psa_type, const Tensor grad_output, Tensor grad_input, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask); REGISTER_DEVICE_IMPL(psamask_forward_impl, CUDA, psamask_forward_cuda); REGISTER_DEVICE_IMPL(psamask_backward_impl, CUDA, psamask_backward_cuda); void ROIAlignForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned); void ROIAlignBackwardCUDAKernelLauncher(Tensor grad_output, Tensor rois, Tensor argmax_y, Tensor argmax_x, Tensor grad_input, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned); void roi_align_forward_cuda(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned) { ROIAlignForwardCUDAKernelLauncher( input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); } void roi_align_backward_cuda(Tensor grad_output, Tensor rois, Tensor argmax_y, Tensor argmax_x, Tensor grad_input, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned) { ROIAlignBackwardCUDAKernelLauncher( grad_output, rois, argmax_y, argmax_x, grad_input, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); } void roi_align_forward_impl(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned); void roi_align_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax_y, Tensor argmax_x, Tensor grad_input, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned); REGISTER_DEVICE_IMPL(roi_align_forward_impl, CUDA, roi_align_forward_cuda); REGISTER_DEVICE_IMPL(roi_align_backward_impl, CUDA, roi_align_backward_cuda); void ROIAlignRotatedForwardCUDAKernelLauncher( const at::Tensor features, const at::Tensor rois, const float spatial_scale, const int sample_num, const bool aligned, const bool clockwise, const int channels, const int height, const int width, const int num_rois, const int pooled_height, const int pooled_width, at::Tensor output); void ROIAlignRotatedBackwardCUDAKernelLauncher( const at::Tensor top_grad, const at::Tensor rois, const float spatial_scale, const int sample_num, const bool aligned, const bool clockwise, const int channels, const int height, const int width, const int num_rois, const int pooled_height, const int pooled_width, at::Tensor bottom_grad); void roi_align_rotated_forward_cuda(Tensor features, Tensor rois, Tensor output, int aligned_height, int aligned_width, float spatial_scale, int sample_ratio, bool aligned, bool clockwise) { // Number of ROIs int num_rois = rois.size(0); int size_rois = rois.size(1); if (size_rois != 6) { AT_ERROR("wrong roi size"); } int num_channels = features.size(1); int data_height = features.size(2); int data_width = features.size(3); ROIAlignRotatedForwardCUDAKernelLauncher( features, rois, spatial_scale, sample_ratio, aligned, clockwise, num_channels, data_height, data_width, num_rois, aligned_height, aligned_width, output); } void roi_align_rotated_backward_cuda(Tensor top_grad, Tensor rois, Tensor bottom_grad, int aligned_height, int aligned_width, float spatial_scale, int sample_ratio, bool aligned, bool clockwise) { // Number of ROIs int num_rois = rois.size(0); int size_rois = rois.size(1); if (size_rois != 6) { AT_ERROR("wrong roi size"); } int num_channels = bottom_grad.size(1); int data_height = bottom_grad.size(2); int data_width = bottom_grad.size(3); ROIAlignRotatedBackwardCUDAKernelLauncher( top_grad, rois, spatial_scale, sample_ratio, aligned, clockwise, num_channels, data_height, data_width, num_rois, aligned_height, aligned_width, bottom_grad); } void roi_align_rotated_forward_impl(Tensor features, Tensor rois, Tensor output, int aligned_height, int aligned_width, float spatial_scale, int sample_ratio, bool aligned, bool clockwise); void roi_align_rotated_backward_impl(Tensor top_grad, Tensor rois, Tensor bottom_grad, int aligned_height, int aligned_width, float spatial_scale, int sample_ratio, bool aligned, bool clockwise); REGISTER_DEVICE_IMPL(roi_align_rotated_forward_impl, CUDA, roi_align_rotated_forward_cuda); REGISTER_DEVICE_IMPL(roi_align_rotated_backward_impl, CUDA, roi_align_rotated_backward_cuda); void RiROIAlignRotatedForwardCUDAKernelLauncher( const at::Tensor features, const at::Tensor rois, const float spatial_scale, const int num_samples, const bool clockwise, const int channels, const int height, const int width, const int num_rois, const int pooled_height, const int pooled_width, const int num_orientations, at::Tensor output); void RiROIAlignRotatedBackwardCUDAKernelLauncher( const at::Tensor top_grad, const at::Tensor rois, const float spatial_scale, const int num_samples, const bool clockwise, const int channels, const int height, const int width, const int num_rois, const int pooled_height, const int pooled_width, const int num_orientations, at::Tensor bottom_grad); void riroi_align_rotated_forward_cuda(Tensor features, Tensor rois, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int num_samples, int num_orientations, bool clockwise) { // Number of ROIs int num_rois = rois.size(0); int size_rois = rois.size(1); if (size_rois != 6) { AT_ERROR("wrong roi size"); } CHECK_CONTIGUOUS(features); CHECK_CONTIGUOUS(rois); int num_channels = features.size(1) / num_orientations; int data_height = features.size(2); int data_width = features.size(3); RiROIAlignRotatedForwardCUDAKernelLauncher( features, rois, spatial_scale, num_samples, clockwise, num_channels, data_height, data_width, num_rois, pooled_height, pooled_width, num_orientations, output); } void riroi_align_rotated_backward_cuda(Tensor top_grad, Tensor rois, Tensor bottom_grad, int pooled_height, int pooled_width, float spatial_scale, int num_samples, int num_orientations, bool clockwise) { // Number of ROIs int num_rois = rois.size(0); int size_rois = rois.size(1); if (size_rois != 6) { AT_ERROR("wrong roi size"); } CHECK_CONTIGUOUS(top_grad); CHECK_CONTIGUOUS(rois); int num_channels = bottom_grad.size(1) / num_orientations; int data_height = bottom_grad.size(2); int data_width = bottom_grad.size(3); RiROIAlignRotatedBackwardCUDAKernelLauncher( top_grad, rois, spatial_scale, num_samples, clockwise, num_channels, data_height, data_width, num_rois, pooled_height, pooled_width, num_orientations, bottom_grad); } void riroi_align_rotated_forward_impl(Tensor features, Tensor rois, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int num_samples, int num_orientations, bool clockwise); void riroi_align_rotated_backward_impl(Tensor top_grad, Tensor rois, Tensor bottom_grad, int pooled_height, int pooled_width, float spatial_scale, int num_samples, int num_orientations, bool clockwise); REGISTER_DEVICE_IMPL(riroi_align_rotated_forward_impl, CUDA, riroi_align_rotated_forward_cuda); REGISTER_DEVICE_IMPL(riroi_align_rotated_backward_impl, CUDA, riroi_align_rotated_backward_cuda); void RoiawarePool3dForwardCUDAKernelLauncher( int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int out_y, int out_z, const Tensor rois, const Tensor pts, const Tensor pts_feature, Tensor argmax, Tensor pts_idx_of_voxels, Tensor pooled_features, int pool_method); void RoiawarePool3dBackwardCUDAKernelLauncher( int boxes_num, int out_x, int out_y, int out_z, int channels, int max_pts_each_voxel, const Tensor pts_idx_of_voxels, const Tensor argmax, const Tensor grad_out, Tensor grad_in, int pool_method); void roiaware_pool3d_forward_cuda(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int out_y, int out_z, const Tensor rois, const Tensor pts, const Tensor pts_feature, Tensor argmax, Tensor pts_idx_of_voxels, Tensor pooled_features, int pool_method) { RoiawarePool3dForwardCUDAKernelLauncher( boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z, rois, pts, pts_feature, argmax, pts_idx_of_voxels, pooled_features, pool_method); }; void roiaware_pool3d_backward_cuda(int boxes_num, int out_x, int out_y, int out_z, int channels, int max_pts_each_voxel, const Tensor pts_idx_of_voxels, const Tensor argmax, const Tensor grad_out, Tensor grad_in, int pool_method) { RoiawarePool3dBackwardCUDAKernelLauncher( boxes_num, out_x, out_y, out_z, channels, max_pts_each_voxel, pts_idx_of_voxels, argmax, grad_out, grad_in, pool_method); }; void roiaware_pool3d_forward_impl(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int out_y, int out_z, const Tensor rois, const Tensor pts, const Tensor pts_feature, Tensor argmax, Tensor pts_idx_of_voxels, Tensor pooled_features, int pool_method); void roiaware_pool3d_backward_impl(int boxes_num, int out_x, int out_y, int out_z, int channels, int max_pts_each_voxel, const Tensor pts_idx_of_voxels, const Tensor argmax, const Tensor grad_out, Tensor grad_in, int pool_method); REGISTER_DEVICE_IMPL(roiaware_pool3d_forward_impl, CUDA, roiaware_pool3d_forward_cuda); REGISTER_DEVICE_IMPL(roiaware_pool3d_backward_impl, CUDA, roiaware_pool3d_backward_cuda); void RoIPointPool3dForwardCUDAKernelLauncher( int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, const Tensor xyz, const Tensor boxes3d, const Tensor pts_feature, Tensor pooled_features, Tensor pooled_empty_flag); void roipoint_pool3d_forward_cuda(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, const Tensor xyz, const Tensor boxes3d, const Tensor pts_feature, Tensor pooled_features, Tensor pooled_empty_flag) { RoIPointPool3dForwardCUDAKernelLauncher( batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, xyz, boxes3d, pts_feature, pooled_features, pooled_empty_flag); }; void roipoint_pool3d_forward_impl(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, const Tensor xyz, const Tensor boxes3d, const Tensor pts_feature, Tensor pooled_features, Tensor pooled_empty_flag); REGISTER_DEVICE_IMPL(roipoint_pool3d_forward_impl, CUDA, roipoint_pool3d_forward_cuda); void ROIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output, Tensor argmax, int pooled_height, int pooled_width, float spatial_scale); void ROIPoolBackwardCUDAKernelLauncher(Tensor grad_output, Tensor rois, Tensor argmax, Tensor grad_input, int pooled_height, int pooled_width, float spatial_scale); void roi_pool_forward_cuda(Tensor input, Tensor rois, Tensor output, Tensor argmax, int pooled_height, int pooled_width, float spatial_scale) { ROIPoolForwardCUDAKernelLauncher(input, rois, output, argmax, pooled_height, pooled_width, spatial_scale); } void roi_pool_backward_cuda(Tensor grad_output, Tensor rois, Tensor argmax, Tensor grad_input, int pooled_height, int pooled_width, float spatial_scale) { ROIPoolBackwardCUDAKernelLauncher(grad_output, rois, argmax, grad_input, pooled_height, pooled_width, spatial_scale); } void roi_pool_forward_impl(Tensor input, Tensor rois, Tensor output, Tensor argmax, int pooled_height, int pooled_width, float spatial_scale); void roi_pool_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax, Tensor grad_input, int pooled_height, int pooled_width, float spatial_scale); REGISTER_DEVICE_IMPL(roi_pool_forward_impl, CUDA, roi_pool_forward_cuda); REGISTER_DEVICE_IMPL(roi_pool_backward_impl, CUDA, roi_pool_backward_cuda); typedef enum { SUM = 0, MEAN = 1, MAX = 2 } reduce_t; std::vector DynamicPointToVoxelForwardCUDAKernelLauncher( const at::Tensor& feats, const at::Tensor& coors, const reduce_t reduce_type); void DynamicPointToVoxelBackwardCUDAKernelLauncher( at::Tensor& grad_feats, const at::Tensor& grad_reduced_feats, const at::Tensor& feats, const at::Tensor& reduced_feats, const at::Tensor& coors_map, const at::Tensor& reduce_count, const reduce_t reduce_type); std::vector dynamic_point_to_voxel_forward_cuda( const torch::Tensor& feats, const torch::Tensor& coors, const reduce_t reduce_type) { return DynamicPointToVoxelForwardCUDAKernelLauncher(feats, coors, reduce_type); }; void dynamic_point_to_voxel_backward_cuda( torch::Tensor& grad_feats, const torch::Tensor& grad_reduced_feats, const torch::Tensor& feats, const torch::Tensor& reduced_feats, const torch::Tensor& coors_idx, const torch::Tensor& reduce_count, const reduce_t reduce_type) { DynamicPointToVoxelBackwardCUDAKernelLauncher(grad_feats, grad_reduced_feats, feats, reduced_feats, coors_idx, reduce_count, reduce_type); }; std::vector dynamic_point_to_voxel_forward_impl( const torch::Tensor& feats, const torch::Tensor& coors, const reduce_t reduce_type); void dynamic_point_to_voxel_backward_impl( torch::Tensor& grad_feats, const torch::Tensor& grad_reduced_feats, const torch::Tensor& feats, const torch::Tensor& reduced_feats, const torch::Tensor& coors_idx, const torch::Tensor& reduce_count, const reduce_t reduce_type); REGISTER_DEVICE_IMPL(dynamic_point_to_voxel_forward_impl, CUDA, dynamic_point_to_voxel_forward_cuda); REGISTER_DEVICE_IMPL(dynamic_point_to_voxel_backward_impl, CUDA, dynamic_point_to_voxel_backward_cuda); void SyncBNForwardMeanCUDAKernelLauncher(const Tensor input, Tensor mean); void SyncBNForwardVarCUDAKernelLauncher(const Tensor input, const Tensor mean, Tensor var); void SyncBNForwardOutputCUDAKernelLauncher( const Tensor input, const Tensor mean, const Tensor var, Tensor running_mean, Tensor running_var, const Tensor weight, const Tensor bias, Tensor norm, Tensor std, Tensor output, float eps, float momentum, int group_size); void SyncBNBackwardParamCUDAKernelLauncher(const Tensor grad_output, const Tensor norm, Tensor grad_weight, Tensor grad_bias); void SyncBNBackwardDataCUDAKernelLauncher(const Tensor grad_output, const Tensor weight, const Tensor grad_weight, const Tensor grad_bias, const Tensor norm, const Tensor std, Tensor grad_input); void sync_bn_forward_mean_cuda(const Tensor input, Tensor mean) { SyncBNForwardMeanCUDAKernelLauncher(input, mean); } void sync_bn_forward_var_cuda(const Tensor input, const Tensor mean, Tensor var) { SyncBNForwardVarCUDAKernelLauncher(input, mean, var); } void sync_bn_forward_output_cuda(const Tensor input, const Tensor mean, const Tensor var, Tensor running_mean, Tensor running_var, const Tensor weight, const Tensor bias, Tensor norm, Tensor std, Tensor output, float eps, float momentum, int group_size) { SyncBNForwardOutputCUDAKernelLauncher(input, mean, var, running_mean, running_var, weight, bias, norm, std, output, eps, momentum, group_size); } void sync_bn_backward_param_cuda(const Tensor grad_output, const Tensor norm, Tensor grad_weight, Tensor grad_bias) { SyncBNBackwardParamCUDAKernelLauncher(grad_output, norm, grad_weight, grad_bias); } void sync_bn_backward_data_cuda(const Tensor grad_output, const Tensor weight, const Tensor grad_weight, const Tensor grad_bias, const Tensor norm, const Tensor std, Tensor grad_input) { SyncBNBackwardDataCUDAKernelLauncher(grad_output, weight, grad_weight, grad_bias, norm, std, grad_input); } void sync_bn_forward_mean_impl(const Tensor input, Tensor mean); void sync_bn_forward_var_impl(const Tensor input, const Tensor mean, Tensor var); void sync_bn_forward_output_impl(const Tensor input, const Tensor mean, const Tensor var, Tensor running_mean, Tensor running_var, const Tensor weight, const Tensor bias, Tensor norm, Tensor std, Tensor output, float eps, float momentum, int group_size); void sync_bn_backward_param_impl(const Tensor grad_output, const Tensor norm, Tensor grad_weight, Tensor grad_bias); void sync_bn_backward_data_impl(const Tensor grad_output, const Tensor weight, const Tensor grad_weight, const Tensor grad_bias, const Tensor norm, const Tensor std, Tensor grad_input); REGISTER_DEVICE_IMPL(sync_bn_forward_mean_impl, CUDA, sync_bn_forward_mean_cuda); REGISTER_DEVICE_IMPL(sync_bn_forward_var_impl, CUDA, sync_bn_forward_var_cuda); REGISTER_DEVICE_IMPL(sync_bn_forward_output_impl, CUDA, sync_bn_forward_output_cuda); REGISTER_DEVICE_IMPL(sync_bn_backward_param_impl, CUDA, sync_bn_backward_param_cuda); REGISTER_DEVICE_IMPL(sync_bn_backward_data_impl, CUDA, sync_bn_backward_data_cuda); void ThreeInterpolateForwardCUDAKernelLauncher(int b, int c, int m, int n, const Tensor points, const Tensor idx, const Tensor weight, Tensor out); void ThreeInterpolateBackwardCUDAKernelLauncher(int b, int c, int n, int m, const Tensor grad_out, const Tensor idx, const Tensor weight, Tensor grad_points); void three_interpolate_forward_cuda(int b, int c, int m, int n, const Tensor points, const Tensor idx, const Tensor weight, Tensor out) { ThreeInterpolateForwardCUDAKernelLauncher(b, c, m, n, points, idx, weight, out); }; void three_interpolate_backward_cuda(int b, int c, int n, int m, const Tensor grad_out, const Tensor idx, const Tensor weight, Tensor grad_points) { ThreeInterpolateBackwardCUDAKernelLauncher(b, c, n, m, grad_out, idx, weight, grad_points); }; void three_interpolate_forward_impl(int b, int c, int m, int n, const Tensor points, const Tensor idx, const Tensor weight, Tensor out); void three_interpolate_backward_impl(int b, int c, int n, int m, const Tensor grad_out, const Tensor idx, const Tensor weight, Tensor grad_points); REGISTER_DEVICE_IMPL(three_interpolate_forward_impl, CUDA, three_interpolate_forward_cuda); REGISTER_DEVICE_IMPL(three_interpolate_backward_impl, CUDA, three_interpolate_backward_cuda); void ThreeNNForwardCUDAKernelLauncher(int b, int n, int m, const Tensor unknown, const Tensor known, Tensor dist2, Tensor idx); void three_nn_forward_cuda(int b, int n, int m, const Tensor unknown, const Tensor known, Tensor dist2, Tensor idx) { ThreeNNForwardCUDAKernelLauncher(b, n, m, unknown, known, dist2, idx); }; void three_nn_forward_impl(int b, int n, int m, const Tensor unknown, const Tensor known, Tensor dist2, Tensor idx); REGISTER_DEVICE_IMPL(three_nn_forward_impl, CUDA, three_nn_forward_cuda); void TINShiftForwardCUDAKernelLauncher(Tensor input, Tensor shift, Tensor output); void TINShiftBackwardCUDAKernelLauncher(Tensor grad_output, Tensor shift, Tensor grad_input); void tin_shift_forward_cuda(Tensor input, Tensor shift, Tensor output) { TINShiftForwardCUDAKernelLauncher(input, shift, output); } void tin_shift_backward_cuda(Tensor grad_output, Tensor shift, Tensor grad_input) { TINShiftBackwardCUDAKernelLauncher(grad_output, shift, grad_input); } void tin_shift_forward_impl(Tensor input, Tensor shift, Tensor output); void tin_shift_backward_impl(Tensor grad_output, Tensor shift, Tensor grad_input); REGISTER_DEVICE_IMPL(tin_shift_forward_impl, CUDA, tin_shift_forward_cuda); REGISTER_DEVICE_IMPL(tin_shift_backward_impl, CUDA, tin_shift_backward_cuda); torch::Tensor upfirdn2d_op(const torch::Tensor& input, const torch::Tensor& kernel, int up_x, int up_y, int down_x, int down_y, int pad_x0, int pad_x1, int pad_y0, int pad_y1); torch::Tensor upfirdn2d_op_impl(const torch::Tensor& input, const torch::Tensor& kernel, int up_x, int up_y, int down_x, int down_y, int pad_x0, int pad_x1, int pad_y0, int pad_y1); REGISTER_DEVICE_IMPL(upfirdn2d_op_impl, CUDA, upfirdn2d_op); int HardVoxelizeForwardCUDAKernelLauncher( const at::Tensor& points, at::Tensor& voxels, at::Tensor& coors, at::Tensor& num_points_per_voxel, const std::vector voxel_size, const std::vector coors_range, const int max_points, const int max_voxels, const int NDim = 3); void DynamicVoxelizeForwardCUDAKernelLauncher( const at::Tensor& points, at::Tensor& coors, const std::vector voxel_size, const std::vector coors_range, const int NDim = 3); int hard_voxelize_forward_cuda(const at::Tensor& points, at::Tensor& voxels, at::Tensor& coors, at::Tensor& num_points_per_voxel, const std::vector voxel_size, const std::vector coors_range, const int max_points, const int max_voxels, const int NDim) { return HardVoxelizeForwardCUDAKernelLauncher( points, voxels, coors, num_points_per_voxel, voxel_size, coors_range, max_points, max_voxels, NDim); }; void dynamic_voxelize_forward_cuda(const at::Tensor& points, at::Tensor& coors, const std::vector voxel_size, const std::vector coors_range, const int NDim) { DynamicVoxelizeForwardCUDAKernelLauncher(points, coors, voxel_size, coors_range, NDim); }; int hard_voxelize_forward_impl(const at::Tensor& points, at::Tensor& voxels, at::Tensor& coors, at::Tensor& num_points_per_voxel, const std::vector voxel_size, const std::vector coors_range, const int max_points, const int max_voxels, const int NDim); void dynamic_voxelize_forward_impl(const at::Tensor& points, at::Tensor& coors, const std::vector voxel_size, const std::vector coors_range, const int NDim); REGISTER_DEVICE_IMPL(hard_voxelize_forward_impl, CUDA, hard_voxelize_forward_cuda); REGISTER_DEVICE_IMPL(dynamic_voxelize_forward_impl, CUDA, dynamic_voxelize_forward_cuda); void RotatedFeatureAlignForwardCUDAKernelLauncher(const Tensor features, const Tensor best_bboxes, const float spatial_scale, const int points, Tensor output); void RotatedFeatureAlignBackwardCUDAKernelLauncher(const Tensor top_grad, const Tensor best_bboxes, const float spatial_scale, const int points, Tensor bottom_grad); void rotated_feature_align_forward_cuda(const Tensor features, const Tensor best_bboxes, const float spatial_scale, const int points, Tensor output) { RotatedFeatureAlignForwardCUDAKernelLauncher(features, best_bboxes, spatial_scale, points, output); }; void rotated_feature_align_backward_cuda(const Tensor top_grad, const Tensor best_bboxes, const float spatial_scale, const int points, Tensor bottom_grad) { RotatedFeatureAlignBackwardCUDAKernelLauncher( top_grad, best_bboxes, spatial_scale, points, bottom_grad); }; void rotated_feature_align_forward_impl(const Tensor features, const Tensor best_bboxes, const float spatial_scale, const int points, Tensor output); void rotated_feature_align_backward_impl(const Tensor top_grad, const Tensor best_bboxes, const float spatial_scale, const int points, Tensor bottom_grad); REGISTER_DEVICE_IMPL(rotated_feature_align_forward_impl, CUDA, rotated_feature_align_forward_cuda); REGISTER_DEVICE_IMPL(rotated_feature_align_backward_impl, CUDA, rotated_feature_align_backward_cuda); void PointsInPolygonsForwardCUDAKernelLauncher(const at::Tensor points, const at::Tensor polygons, const int rows, const int cols, at::Tensor output); void points_in_polygons_forward_cuda(const Tensor points, const Tensor polygons, Tensor output, const int rows, const int cols) { PointsInPolygonsForwardCUDAKernelLauncher(points, polygons, rows, cols, output); }; void points_in_polygons_forward_impl(const Tensor points, const Tensor polygons, Tensor output, const int rows, const int cols); REGISTER_DEVICE_IMPL(points_in_polygons_forward_impl, CUDA, points_in_polygons_forward_cuda); void MinAreaPolygonsCUDAKernelLauncher(const Tensor pointsets, Tensor polygons); void min_area_polygons_cuda(const Tensor pointsets, Tensor polygons) { MinAreaPolygonsCUDAKernelLauncher(pointsets, polygons); } void min_area_polygons_impl(const Tensor pointsets, Tensor polygons); REGISTER_DEVICE_IMPL(min_area_polygons_impl, CUDA, min_area_polygons_cuda); void ActiveRotatedFilterForwardCUDAKernelLauncher(const Tensor input, const Tensor indices, Tensor output); void ActiveRotatedFilterBackwardCUDAKernelLauncher(const Tensor grad_out, const Tensor indices, Tensor grad_in); void active_rotated_filter_forward_cuda(const Tensor input, const Tensor indices, Tensor output) { ActiveRotatedFilterForwardCUDAKernelLauncher(input, indices, output); }; void active_rotated_filter_backward_cuda(const Tensor grad_out, const Tensor indices, Tensor grad_in) { ActiveRotatedFilterBackwardCUDAKernelLauncher(grad_out, indices, grad_in); }; void active_rotated_filter_forward_impl(const Tensor input, const Tensor indices, Tensor output); void active_rotated_filter_backward_impl(const Tensor grad_out, const Tensor indices, Tensor grad_in); REGISTER_DEVICE_IMPL(active_rotated_filter_forward_impl, CUDA, active_rotated_filter_forward_cuda); REGISTER_DEVICE_IMPL(active_rotated_filter_backward_impl, CUDA, active_rotated_filter_backward_cuda); void ConvexIoUCUDAKernelLauncher(const Tensor pointsets, const Tensor polygons, Tensor ious); void ConvexGIoUCUDAKernelLauncher(const Tensor pointsets, const Tensor polygons, Tensor output); void convex_iou_cuda(const Tensor pointsets, const Tensor polygons, Tensor ious) { ConvexIoUCUDAKernelLauncher(pointsets, polygons, ious); } void convex_giou_cuda(const Tensor pointsets, const Tensor polygons, Tensor output) { ConvexGIoUCUDAKernelLauncher(pointsets, polygons, output); } void convex_iou_impl(const Tensor pointsets, const Tensor polygons, Tensor ious); void convex_giou_impl(const Tensor pointsets, const Tensor polygons, Tensor output); REGISTER_DEVICE_IMPL(convex_iou_impl, CUDA, convex_iou_cuda); REGISTER_DEVICE_IMPL(convex_giou_impl, CUDA, convex_giou_cuda); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/deform_conv.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void deformable_im2col_impl(Tensor data_im, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor data_col) { DISPATCH_DEVICE_IMPL(deformable_im2col_impl, data_im, data_offset, channels, height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, parallel_imgs, deformable_group, data_col); } void deformable_col2im_impl(Tensor data_col, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor grad_im) { DISPATCH_DEVICE_IMPL(deformable_col2im_impl, data_col, data_offset, channels, height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, parallel_imgs, deformable_group, grad_im); } void deformable_col2im_coord_impl( Tensor data_col, Tensor data_im, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor grad_offset) { DISPATCH_DEVICE_IMPL(deformable_col2im_coord_impl, data_col, data_im, data_offset, channels, height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, parallel_imgs, deformable_group, grad_offset); } void deform_conv_shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput, at::Tensor weight, int kH, int kW, int dH, int dW, int padH, int padW, int dilationH, int dilationW, int group, int deformable_group) { TORCH_CHECK( weight.ndimension() == 4, "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, but got: %s", weight.ndimension()); TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); TORCH_CHECK(kW > 0 && kH > 0, "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); TORCH_CHECK((weight.size(2) == kH && weight.size(3) == kW), "kernel size should be consistent with weight, ", "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", kH, kW, weight.size(2), weight.size(3)); TORCH_CHECK(dW > 0 && dH > 0, "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); TORCH_CHECK( dilationW > 0 && dilationH > 0, "dilation should be greater than 0, but got dilationH: %d dilationW: %d", dilationH, dilationW); int ndim = input.ndimension(); int dimf = 0; int dimh = 1; int dimw = 2; if (ndim == 4) { dimf++; dimh++; dimw++; } TORCH_CHECK(ndim == 3 || ndim == 4, "3D or 4D input tensor expected but got: %s", ndim); long nInputPlane = weight.size(1) * group; long inputHeight = input.size(dimh); long inputWidth = input.size(dimw); long nOutputPlane = weight.size(0); long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; long outputWidth = (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; TORCH_CHECK(nInputPlane % deformable_group == 0, "input channels must divide deformable group size"); if (outputWidth < 1 || outputHeight < 1) AT_ERROR( "Given input size: (%ld x %ld x %ld). " "Calculated output size: (%ld x %ld x %ld). Output size is too small", nInputPlane, inputHeight, inputWidth, nOutputPlane, outputHeight, outputWidth); TORCH_CHECK(input.size(1) == nInputPlane, "invalid number of input planes, expected: %d, but got: %d", nInputPlane, input.size(1)); TORCH_CHECK((inputHeight >= kH && inputWidth >= kW), "input image is smaller than kernel"); TORCH_CHECK( (offset.size(2) == outputHeight && offset.size(3) == outputWidth), "invalid spatial size of offset, expected height: %d width: %d, but " "got height: %d width: %d", outputHeight, outputWidth, offset.size(2), offset.size(3)); TORCH_CHECK((offset.size(1) == deformable_group * 2 * kH * kW), "invalid number of channels of offset"); if (gradOutput != NULL) { TORCH_CHECK( gradOutput->size(dimf) == nOutputPlane, "invalid number of gradOutput planes, expected: %d, but got: %d", nOutputPlane, gradOutput->size(dimf)); TORCH_CHECK( (gradOutput->size(dimh) == outputHeight && gradOutput->size(dimw) == outputWidth), "invalid size of gradOutput, expected height: %d width: %d , but " "got height: %d width: %d", outputHeight, outputWidth, gradOutput->size(dimh), gradOutput->size(dimw)); } } void deform_conv_forward(Tensor input, Tensor weight, Tensor offset, Tensor output, Tensor columns, Tensor ones, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, int group, int deformable_group, int im2col_step) { if (input.device().is_cuda()) { #ifdef MMCV_WITH_CUDA CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(offset); CHECK_CUDA_INPUT(weight); CHECK_CUDA_INPUT(output); CHECK_CUDA_INPUT(columns); CHECK_CUDA_INPUT(ones); #else AT_ERROR("DeformConv is not compiled with GPU support"); #endif } else { CHECK_CPU_INPUT(input); CHECK_CPU_INPUT(offset); CHECK_CPU_INPUT(weight); CHECK_CPU_INPUT(output); CHECK_CPU_INPUT(columns); CHECK_CPU_INPUT(ones); } deform_conv_shape_check(input, offset, NULL, weight, kH, kW, dH, dW, padH, padW, dilationH, dilationW, group, deformable_group); at::DeviceGuard guard(input.device()); int batch = 1; if (input.ndimension() == 3) { // Force batch batch = 0; input.unsqueeze_(0); offset.unsqueeze_(0); } // todo: assert batchsize dividable by im2col_step long batchSize = input.size(0); long nInputPlane = input.size(1); long inputHeight = input.size(2); long inputWidth = input.size(3); long nOutputPlane = weight.size(0); long outputWidth = (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane, outputHeight, outputWidth}); columns = at::zeros( {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, input.options()); if (ones.ndimension() != 2 || ones.size(0) * ones.size(1) < outputHeight * outputWidth) { ones = at::ones({outputHeight, outputWidth}, input.options()); } input = input.view({batchSize / im2col_step, im2col_step, nInputPlane, inputHeight, inputWidth}); offset = offset.view({batchSize / im2col_step, im2col_step, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); Tensor output_buffer = at::zeros({batchSize / im2col_step, nOutputPlane, im2col_step * outputHeight, outputWidth}, output.options()); output_buffer = output_buffer.view( {output_buffer.size(0), group, output_buffer.size(1) / group, output_buffer.size(2), output_buffer.size(3)}); for (int elt = 0; elt < batchSize / im2col_step; elt++) { deformable_im2col_impl(input[elt], offset[elt], nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW, dilationH, dilationW, im2col_step, deformable_group, columns); columns = columns.view({group, columns.size(0) / group, columns.size(1)}); weight = weight.view({group, weight.size(0) / group, weight.size(1), weight.size(2), weight.size(3)}); for (int g = 0; g < group; g++) { output_buffer[elt][g] = output_buffer[elt][g] .flatten(1) .addmm_(weight[g].flatten(1), columns[g]) .view_as(output_buffer[elt][g]); } columns = columns.view({columns.size(0) * columns.size(1), columns.size(2)}); weight = weight.view({weight.size(0) * weight.size(1), weight.size(2), weight.size(3), weight.size(4)}); } output_buffer = output_buffer.view( {output_buffer.size(0), output_buffer.size(1) * output_buffer.size(2), output_buffer.size(3), output_buffer.size(4)}); output_buffer = output_buffer.view({batchSize / im2col_step, nOutputPlane, im2col_step, outputHeight, outputWidth}); output_buffer.transpose_(1, 2); output.copy_(output_buffer); output = output.view({batchSize, nOutputPlane, outputHeight, outputWidth}); input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); offset = offset.view( {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); if (batch == 0) { output = output.view({nOutputPlane, outputHeight, outputWidth}); input = input.view({nInputPlane, inputHeight, inputWidth}); offset = offset.view({offset.size(1), offset.size(2), offset.size(3)}); } } void deform_conv_backward_input(Tensor input, Tensor offset, Tensor gradOutput, Tensor gradInput, Tensor gradOffset, Tensor weight, Tensor columns, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, int group, int deformable_group, int im2col_step) { if (input.device().is_cuda()) { #ifdef MMCV_WITH_CUDA CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(offset); CHECK_CUDA_INPUT(gradOutput); CHECK_CUDA_INPUT(gradInput); CHECK_CUDA_INPUT(gradOffset); CHECK_CUDA_INPUT(weight); CHECK_CUDA_INPUT(columns); #else AT_ERROR("DeformConv is not compiled with GPU support"); #endif } else { CHECK_CPU_INPUT(input); CHECK_CPU_INPUT(offset); CHECK_CPU_INPUT(gradOutput); CHECK_CPU_INPUT(gradInput); CHECK_CPU_INPUT(gradOffset); CHECK_CPU_INPUT(weight); CHECK_CPU_INPUT(columns); } deform_conv_shape_check(input, offset, &gradOutput, weight, kH, kW, dH, dW, padH, padW, dilationH, dilationW, group, deformable_group); at::DeviceGuard guard(input.device()); int batch = 1; if (input.ndimension() == 3) { // Force batch batch = 0; input = input.view({1, input.size(0), input.size(1), input.size(2)}); offset = offset.view({1, offset.size(0), offset.size(1), offset.size(2)}); gradOutput = gradOutput.view( {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)}); } long batchSize = input.size(0); long nInputPlane = input.size(1); long inputHeight = input.size(2); long inputWidth = input.size(3); long nOutputPlane = weight.size(0); long outputWidth = (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset"); gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); columns = at::zeros( {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, input.options()); // change order of grad output gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step, nOutputPlane, outputHeight, outputWidth}); gradOutput.transpose_(1, 2); gradInput = gradInput.view({batchSize / im2col_step, im2col_step, nInputPlane, inputHeight, inputWidth}); input = input.view({batchSize / im2col_step, im2col_step, nInputPlane, inputHeight, inputWidth}); gradOffset = gradOffset.view({batchSize / im2col_step, im2col_step, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); offset = offset.view({batchSize / im2col_step, im2col_step, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); for (int elt = 0; elt < batchSize / im2col_step; elt++) { // divide into groups columns = columns.view({group, columns.size(0) / group, columns.size(1)}); weight = weight.view({group, weight.size(0) / group, weight.size(1), weight.size(2), weight.size(3)}); gradOutput = gradOutput.view( {gradOutput.size(0), group, gradOutput.size(1) / group, gradOutput.size(2), gradOutput.size(3), gradOutput.size(4)}); for (int g = 0; g < group; g++) { columns[g] = columns[g].addmm_(weight[g].flatten(1).transpose(0, 1), gradOutput[elt][g].flatten(1), 0.0f, 1.0f); } columns = columns.view({columns.size(0) * columns.size(1), columns.size(2)}); gradOutput = gradOutput.view( {gradOutput.size(0), gradOutput.size(1) * gradOutput.size(2), gradOutput.size(3), gradOutput.size(4), gradOutput.size(5)}); deformable_col2im_coord_impl(columns, input[elt], offset[elt], nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW, dilationH, dilationW, im2col_step, deformable_group, gradOffset[elt]); deformable_col2im_impl(columns, offset[elt], nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW, dilationH, dilationW, im2col_step, deformable_group, gradInput[elt]); weight = weight.view({weight.size(0) * weight.size(1), weight.size(2), weight.size(3), weight.size(4)}); } gradOutput.transpose_(1, 2); gradOutput = gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth}); gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); gradOffset = gradOffset.view( {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); offset = offset.view( {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); if (batch == 0) { gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth}); input = input.view({nInputPlane, inputHeight, inputWidth}); gradInput = gradInput.view({nInputPlane, inputHeight, inputWidth}); offset = offset.view({offset.size(1), offset.size(2), offset.size(3)}); gradOffset = gradOffset.view({offset.size(1), offset.size(2), offset.size(3)}); } } void deform_conv_backward_parameters(Tensor input, Tensor offset, Tensor gradOutput, Tensor gradWeight, Tensor columns, Tensor ones, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, int group, int deformable_group, float scale, int im2col_step) { if (input.device().is_cuda()) { #ifdef MMCV_WITH_CUDA CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(offset); CHECK_CUDA_INPUT(gradOutput); CHECK_CUDA_INPUT(gradWeight); CHECK_CUDA_INPUT(columns); CHECK_CUDA_INPUT(ones); #else AT_ERROR("DeformConv is not compiled with GPU support"); #endif } else { CHECK_CPU_INPUT(input); CHECK_CPU_INPUT(offset); CHECK_CPU_INPUT(gradOutput); CHECK_CPU_INPUT(gradWeight); CHECK_CPU_INPUT(columns); CHECK_CPU_INPUT(ones); } deform_conv_shape_check(input, offset, &gradOutput, gradWeight, kH, kW, dH, dW, padH, padW, dilationH, dilationW, group, deformable_group); at::DeviceGuard guard(input.device()); int batch = 1; if (input.ndimension() == 3) { // Force batch batch = 0; input = input.view( at::IntList({1, input.size(0), input.size(1), input.size(2)})); gradOutput = gradOutput.view( {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)}); } long batchSize = input.size(0); long nInputPlane = input.size(1); long inputHeight = input.size(2); long inputWidth = input.size(3); long nOutputPlane = gradWeight.size(0); long outputWidth = (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); columns = at::zeros( {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, input.options()); gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step, nOutputPlane, outputHeight, outputWidth}); gradOutput.transpose_(1, 2); Tensor gradOutputBuffer = at::zeros_like(gradOutput); gradOutputBuffer = gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane, im2col_step, outputHeight, outputWidth}); gradOutputBuffer = gradOutputBuffer.contiguous(); gradOutputBuffer.copy_(gradOutput); gradOutputBuffer = gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane, im2col_step * outputHeight, outputWidth}); gradOutput.transpose_(1, 2); gradOutput = gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth}); input = input.view({batchSize / im2col_step, im2col_step, nInputPlane, inputHeight, inputWidth}); offset = offset.view({batchSize / im2col_step, im2col_step, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); for (int elt = 0; elt < batchSize / im2col_step; elt++) { deformable_im2col_impl(input[elt], offset[elt], nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW, dilationH, dilationW, im2col_step, deformable_group, columns); // divide into group gradOutputBuffer = gradOutputBuffer.view( {gradOutputBuffer.size(0), group, gradOutputBuffer.size(1) / group, gradOutputBuffer.size(2), gradOutputBuffer.size(3)}); columns = columns.view({group, columns.size(0) / group, columns.size(1)}); gradWeight = gradWeight.view({group, gradWeight.size(0) / group, gradWeight.size(1), gradWeight.size(2), gradWeight.size(3)}); for (int g = 0; g < group; g++) { gradWeight[g] = gradWeight[g] .flatten(1) .addmm_(gradOutputBuffer[elt][g].flatten(1), columns[g].transpose(1, 0), 1.0, scale) .view_as(gradWeight[g]); } gradOutputBuffer = gradOutputBuffer.view( {gradOutputBuffer.size(0), gradOutputBuffer.size(1) * gradOutputBuffer.size(2), gradOutputBuffer.size(3), gradOutputBuffer.size(4)}); columns = columns.view({columns.size(0) * columns.size(1), columns.size(2)}); gradWeight = gradWeight.view({gradWeight.size(0) * gradWeight.size(1), gradWeight.size(2), gradWeight.size(3), gradWeight.size(4)}); } input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); offset = offset.view( {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); if (batch == 0) { gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth}); input = input.view({nInputPlane, inputHeight, inputWidth}); } } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/deform_conv_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "deform_conv_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void deform_conv_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group, im2col_step; SSAttrs(attr) .get("kW", kW) .get("kH", kH) .get("dW", dW) .get("dH", dH) .get("padW", padW) .get("padH", padH) .get("dilationW", dilationW) .get("dilationH", dilationH) .get("group", group) .get("deformable_group", deformable_group) .get("im2col_step", im2col_step) .done(); const auto& input = buildATensor(ctx, ins[0]); const auto& weight = buildATensor(ctx, ins[1]); const auto& offset = buildATensor(ctx, ins[2]); auto output = buildATensor(ctx, outs[0]); auto columns = buildATensor(ctx, outs[1]); auto ones = buildATensor(ctx, outs[2]); deform_conv_forward(input, weight, offset, output, columns, ones, kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group, im2col_step); } void deform_conv_backward_input_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group, im2col_step; SSAttrs(attr) .get("kW", kW) .get("kH", kH) .get("dW", dW) .get("dH", dH) .get("padW", padW) .get("padH", padH) .get("dilationW", dilationW) .get("dilationH", dilationH) .get("group", group) .get("deformable_group", deformable_group) .get("im2col_step", im2col_step) .done(); const auto& input = buildATensor(ctx, ins[0]); const auto& offset = buildATensor(ctx, ins[1]); const auto& gradOutput = buildATensor(ctx, ins[2]); auto gradInput = buildATensor(ctx, outs[0]); auto gradOffset = buildATensor(ctx, outs[1]); auto weight = buildATensor(ctx, outs[2]); auto columns = buildATensor(ctx, outs[3]); deform_conv_backward_input(input, offset, gradOutput, gradInput, gradOffset, weight, columns, kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group, im2col_step); } void deform_conv_backward_parameters_cuda_parrots( CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group, im2col_step; float scale; SSAttrs(attr) .get("kW", kW) .get("kH", kH) .get("dW", dW) .get("dH", dH) .get("padW", padW) .get("padH", padH) .get("dilationW", dilationW) .get("dilationH", dilationH) .get("group", group) .get("deformable_group", deformable_group) .get("scale", scale) .get("im2col_step", im2col_step) .done(); const auto& input = buildATensor(ctx, ins[0]); const auto& offset = buildATensor(ctx, ins[1]); const auto& gradOutput = buildATensor(ctx, ins[2]); auto gradWeight = buildATensor(ctx, outs[0]); auto columns = buildATensor(ctx, outs[1]); auto ones = buildATensor(ctx, outs[2]); deform_conv_backward_parameters(input, offset, gradOutput, gradWeight, columns, ones, kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group, scale, im2col_step); } #endif void deform_conv_forward_cpu_parrots(HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group, im2col_step; SSAttrs(attr) .get("kW", kW) .get("kH", kH) .get("dW", dW) .get("dH", dH) .get("padW", padW) .get("padH", padH) .get("dilationW", dilationW) .get("dilationH", dilationH) .get("group", group) .get("deformable_group", deformable_group) .get("im2col_step", im2col_step) .done(); const auto& input = buildATensor(ctx, ins[0]); const auto& weight = buildATensor(ctx, ins[1]); const auto& offset = buildATensor(ctx, ins[2]); auto output = buildATensor(ctx, outs[0]); auto columns = buildATensor(ctx, outs[1]); auto ones = buildATensor(ctx, outs[2]); deform_conv_forward(input, weight, offset, output, columns, ones, kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group, im2col_step); } void deform_conv_backward_input_cpu_parrots(HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group, im2col_step; SSAttrs(attr) .get("kW", kW) .get("kH", kH) .get("dW", dW) .get("dH", dH) .get("padW", padW) .get("padH", padH) .get("dilationW", dilationW) .get("dilationH", dilationH) .get("group", group) .get("deformable_group", deformable_group) .get("im2col_step", im2col_step) .done(); const auto& input = buildATensor(ctx, ins[0]); const auto& offset = buildATensor(ctx, ins[1]); const auto& gradOutput = buildATensor(ctx, ins[2]); auto gradInput = buildATensor(ctx, outs[0]); auto gradOffset = buildATensor(ctx, outs[1]); auto weight = buildATensor(ctx, outs[2]); auto columns = buildATensor(ctx, outs[3]); deform_conv_backward_input(input, offset, gradOutput, gradInput, gradOffset, weight, columns, kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group, im2col_step); } void deform_conv_backward_parameters_cpu_parrots( HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group, im2col_step; float scale; SSAttrs(attr) .get("kW", kW) .get("kH", kH) .get("dW", dW) .get("dH", dH) .get("padW", padW) .get("padH", padH) .get("dilationW", dilationW) .get("dilationH", dilationH) .get("group", group) .get("deformable_group", deformable_group) .get("scale", scale) .get("im2col_step", im2col_step) .done(); const auto& input = buildATensor(ctx, ins[0]); const auto& offset = buildATensor(ctx, ins[1]); const auto& gradOutput = buildATensor(ctx, ins[2]); auto gradWeight = buildATensor(ctx, outs[0]); auto columns = buildATensor(ctx, outs[1]); auto ones = buildATensor(ctx, outs[2]); deform_conv_backward_parameters(input, offset, gradOutput, gradWeight, columns, ones, kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group, scale, im2col_step); } PARROTS_EXTENSION_REGISTER(deform_conv_forward) .attr("kW") .attr("kH") .attr("dW") .attr("dH") .attr("padW") .attr("padH") .attr("dilationW") .attr("dilationH") .attr("group") .attr("deformable_group") .attr("im2col_step") .input(3) .output(3) .apply(deform_conv_forward_cpu_parrots) #ifdef MMCV_WITH_CUDA .apply(deform_conv_forward_cuda_parrots) #endif .done(); PARROTS_EXTENSION_REGISTER(deform_conv_backward_input) .attr("kW") .attr("kH") .attr("dW") .attr("dH") .attr("padW") .attr("padH") .attr("dilationW") .attr("dilationH") .attr("group") .attr("deformable_group") .attr("im2col_step") .input(3) .output(4) .apply(deform_conv_backward_input_cpu_parrots) #ifdef MMCV_WITH_CUDA .apply(deform_conv_backward_input_cuda_parrots) #endif .done(); PARROTS_EXTENSION_REGISTER(deform_conv_backward_parameters) .attr("kW") .attr("kH") .attr("dW") .attr("dH") .attr("padW") .attr("padH") .attr("dilationW") .attr("dilationH") .attr("group") .attr("deformable_group") .attr("scale") .attr("im2col_step") .input(3) .output(3) .apply(deform_conv_backward_parameters_cpu_parrots) #ifdef MMCV_WITH_CUDA .apply(deform_conv_backward_parameters_cuda_parrots) #endif .done(); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/deform_conv_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef DEFORM_CONV_PYTORCH_H #define DEFORM_CONV_PYTORCH_H #include using namespace at; void deform_conv_forward(Tensor input, Tensor weight, Tensor offset, Tensor output, Tensor columns, Tensor ones, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, int group, int deformable_group, int im2col_step); void deform_conv_backward_input(Tensor input, Tensor offset, Tensor gradOutput, Tensor gradInput, Tensor gradOffset, Tensor weight, Tensor columns, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, int group, int deformable_group, int im2col_step); void deform_conv_backward_parameters(Tensor input, Tensor offset, Tensor gradOutput, Tensor gradWeight, Tensor columns, Tensor ones, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, int group, int deformable_group, float scale, int im2col_step); #endif // DEFORM_CONV_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/deform_roi_pool.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void deform_roi_pool_forward_impl(Tensor input, Tensor rois, Tensor offset, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma) { DISPATCH_DEVICE_IMPL(deform_roi_pool_forward_impl, input, rois, offset, output, pooled_height, pooled_width, spatial_scale, sampling_ratio, gamma); } void deform_roi_pool_backward_impl(Tensor grad_output, Tensor input, Tensor rois, Tensor offset, Tensor grad_input, Tensor grad_offset, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma) { DISPATCH_DEVICE_IMPL(deform_roi_pool_backward_impl, grad_output, input, rois, offset, grad_input, grad_offset, pooled_height, pooled_width, spatial_scale, sampling_ratio, gamma); } void deform_roi_pool_forward(Tensor input, Tensor rois, Tensor offset, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma) { deform_roi_pool_forward_impl(input, rois, offset, output, pooled_height, pooled_width, spatial_scale, sampling_ratio, gamma); } void deform_roi_pool_backward(Tensor grad_output, Tensor input, Tensor rois, Tensor offset, Tensor grad_input, Tensor grad_offset, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma) { deform_roi_pool_backward_impl(grad_output, input, rois, offset, grad_input, grad_offset, pooled_height, pooled_width, spatial_scale, sampling_ratio, gamma); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/deform_roi_pool_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "deform_roi_pool_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA /*void deform_roi_pool_forward_cuda(Tensor input, Tensor rois, Tensor offset, * Tensor output, int pooled_height, * int pooled_width, float spatial_scale, * int sampling_ratio, float gamma); */ void deform_roi_pool_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int pooled_height; int pooled_width; float spatial_scale; int sampling_ratio; float gamma; SSAttrs(attr) .get("pooled_height", pooled_height) .get("pooled_width", pooled_width) .get("spatial_scale", spatial_scale) .get("sampling_ratio", sampling_ratio) .get("gamma", gamma) .done(); const auto& input = buildATensor(ctx, ins[0]); const auto& rois = buildATensor(ctx, ins[1]); const auto& offset = buildATensor(ctx, ins[2]); auto output = buildATensor(ctx, outs[0]); deform_roi_pool_forward_cuda(input, rois, offset, output, pooled_height, pooled_width, spatial_scale, sampling_ratio, gamma); } /*void deform_roi_pool_backward_cuda(Tensor grad_output, Tensor input, * Tensor rois, Tensor offset, * Tensor grad_input, Tensor grad_offset, * int pooled_height, int pooled_width, * float spatial_scale, int sampling_ratio, * float gamma); */ void deform_roi_pool_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int pooled_height; int pooled_width; float spatial_scale; int sampling_ratio; float gamma; SSAttrs(attr) .get("pooled_height", pooled_height) .get("pooled_width", pooled_width) .get("spatial_scale", spatial_scale) .get("sampling_ratio", sampling_ratio) .get("gamma", gamma) .done(); const auto& grad_output = buildATensor(ctx, ins[0]); const auto& input = buildATensor(ctx, ins[1]); const auto& rois = buildATensor(ctx, ins[2]); const auto& offset = buildATensor(ctx, ins[3]); auto grad_input = buildATensor(ctx, outs[0]); auto grad_offset = buildATensor(ctx, outs[1]); deform_roi_pool_backward_cuda(grad_output, input, rois, offset, grad_input, grad_offset, pooled_height, pooled_width, spatial_scale, sampling_ratio, gamma); } PARROTS_EXTENSION_REGISTER(deform_roi_pool_forward) .attr("pooled_height") .attr("pooled_width") .attr("spatial_scale") .attr("sampling_ratio") .attr("gamma") .input(3) .output(1) .apply(deform_roi_pool_forward_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(deform_roi_pool_backward) .attr("pooled_height") .attr("pooled_width") .attr("spatial_scale") .attr("sampling_ratio") .attr("gamma") .input(4) .output(2) .apply(deform_roi_pool_backward_cuda_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/deform_roi_pool_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef DEFORM_ROI_POOL_PYTORCH_H #define DEFORM_ROI_POOL_PYTORCH_H #include using namespace at; void deform_roi_pool_forward_cuda(Tensor input, Tensor rois, Tensor offset, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma); void deform_roi_pool_backward_cuda(Tensor grad_output, Tensor input, Tensor rois, Tensor offset, Tensor grad_input, Tensor grad_offset, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma); #endif // DEFORM_ROI_POOL_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/focal_loss.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void sigmoid_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight, Tensor output, float gamma, float alpha) { DISPATCH_DEVICE_IMPL(sigmoid_focal_loss_forward_impl, input, target, weight, output, gamma, alpha); } void sigmoid_focal_loss_backward_impl(Tensor input, Tensor target, Tensor weight, Tensor grad_input, float gamma, float alpha) { DISPATCH_DEVICE_IMPL(sigmoid_focal_loss_backward_impl, input, target, weight, grad_input, gamma, alpha); } void softmax_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight, Tensor output, float gamma, float alpha) { DISPATCH_DEVICE_IMPL(softmax_focal_loss_forward_impl, input, target, weight, output, gamma, alpha); } void softmax_focal_loss_backward_impl(Tensor input, Tensor target, Tensor weight, Tensor buff, Tensor grad_input, float gamma, float alpha) { DISPATCH_DEVICE_IMPL(softmax_focal_loss_backward_impl, input, target, weight, buff, grad_input, gamma, alpha); } void sigmoid_focal_loss_forward(Tensor input, Tensor target, Tensor weight, Tensor output, float gamma, float alpha) { sigmoid_focal_loss_forward_impl(input, target, weight, output, gamma, alpha); } void sigmoid_focal_loss_backward(Tensor input, Tensor target, Tensor weight, Tensor grad_input, float gamma, float alpha) { sigmoid_focal_loss_backward_impl(input, target, weight, grad_input, gamma, alpha); } void softmax_focal_loss_forward(Tensor input, Tensor target, Tensor weight, Tensor output, float gamma, float alpha) { softmax_focal_loss_forward_impl(input, target, weight, output, gamma, alpha); } void softmax_focal_loss_backward(Tensor input, Tensor target, Tensor weight, Tensor buff, Tensor grad_input, float gamma, float alpha) { softmax_focal_loss_backward_impl(input, target, weight, buff, grad_input, gamma, alpha); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/focal_loss_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "focal_loss_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void sigmoid_focal_loss_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { float gamma; float alpha; SSAttrs(attr).get("gamma", gamma).get("alpha", alpha).done(); // get inputs and outputs const auto& input = buildATensor(ctx, ins[0]); const auto& target = buildATensor(ctx, ins[1]); const auto& weight = buildATensor(ctx, ins[2]); auto output = buildATensor(ctx, outs[0]); sigmoid_focal_loss_forward_cuda(input, target, weight, output, gamma, alpha); } void sigmoid_focal_loss_backward_cuda_parrots( CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { float gamma; float alpha; SSAttrs(attr).get("gamma", gamma).get("alpha", alpha).done(); // get inputs and outputs const auto& input = buildATensor(ctx, ins[0]); const auto& target = buildATensor(ctx, ins[1]); const auto& weight = buildATensor(ctx, ins[2]); auto grad_input = buildATensor(ctx, outs[0]); sigmoid_focal_loss_backward_cuda(input, target, weight, grad_input, gamma, alpha); } void softmax_focal_loss_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { float gamma; float alpha; SSAttrs(attr).get("gamma", gamma).get("alpha", alpha).done(); // get inputs and outputs const auto& input = buildATensor(ctx, ins[0]); const auto& target = buildATensor(ctx, ins[1]); const auto& weight = buildATensor(ctx, ins[2]); auto output = buildATensor(ctx, outs[0]); softmax_focal_loss_forward_cuda(input, target, weight, output, gamma, alpha); } void softmax_focal_loss_backward_cuda_parrots( CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { float gamma; float alpha; SSAttrs(attr).get("gamma", gamma).get("alpha", alpha).done(); // get inputs and outputs const auto& input = buildATensor(ctx, ins[0]); const auto& target = buildATensor(ctx, ins[1]); const auto& weight = buildATensor(ctx, ins[2]); auto buff = buildATensor(ctx, outs[0]); auto grad_input = buildATensor(ctx, outs[1]); softmax_focal_loss_backward_cuda(input, target, weight, buff, grad_input, gamma, alpha); } PARROTS_EXTENSION_REGISTER(sigmoid_focal_loss_forward) .attr("gamma") .attr("alpha") .input(3) .output(1) .apply(sigmoid_focal_loss_forward_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(sigmoid_focal_loss_backward) .attr("gamma") .attr("alpha") .input(3) .output(1) .apply(sigmoid_focal_loss_backward_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(softmax_focal_loss_forward) .attr("gamma") .attr("alpha") .input(3) .output(1) .apply(softmax_focal_loss_forward_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(softmax_focal_loss_backward) .attr("gamma") .attr("alpha") .input(3) .output(2) .apply(softmax_focal_loss_backward_cuda_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/focal_loss_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef FOCAL_LOSS_PYTORCH_H #define FOCAL_LOSS_PYTORCH_H #include using namespace at; void sigmoid_focal_loss_forward_cuda(Tensor input, Tensor target, Tensor weight, Tensor output, float gamma, float alpha); void sigmoid_focal_loss_backward_cuda(Tensor input, Tensor target, Tensor weight, Tensor grad_input, float gamma, float alpha); void softmax_focal_loss_forward_cuda(Tensor input, Tensor target, Tensor weight, Tensor output, float gamma, float alpha); void softmax_focal_loss_backward_cuda(Tensor input, Tensor target, Tensor weight, Tensor buff, Tensor grad_input, float gamma, float alpha); #endif // FOCAL_LOSS_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/furthest_point_sample.cpp ================================================ // Modified from // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/sampling.cpp #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void furthest_point_sampling_forward_impl(Tensor points_tensor, Tensor temp_tensor, Tensor idx_tensor, int b, int n, int m) { DISPATCH_DEVICE_IMPL(furthest_point_sampling_forward_impl, points_tensor, temp_tensor, idx_tensor, b, n, m); } void furthest_point_sampling_with_dist_forward_impl(Tensor points_tensor, Tensor temp_tensor, Tensor idx_tensor, int b, int n, int m) { DISPATCH_DEVICE_IMPL(furthest_point_sampling_with_dist_forward_impl, points_tensor, temp_tensor, idx_tensor, b, n, m); } void furthest_point_sampling_forward(Tensor points_tensor, Tensor temp_tensor, Tensor idx_tensor, int b, int n, int m) { furthest_point_sampling_forward_impl(points_tensor, temp_tensor, idx_tensor, b, n, m); } void furthest_point_sampling_with_dist_forward(Tensor points_tensor, Tensor temp_tensor, Tensor idx_tensor, int b, int n, int m) { furthest_point_sampling_with_dist_forward_impl(points_tensor, temp_tensor, idx_tensor, b, n, m); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/furthest_point_sample_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "furthest_point_sample_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void furthest_point_sample_forward_cuda_parrots( CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int b, n, m; SSAttrs(attr).get("b", b).get("n", n).get("m", m).done(); auto points_tensor = buildATensor(ctx, ins[0]); auto temp_tensor = buildATensor(ctx, ins[1]); auto idx_tensor = buildATensor(ctx, outs[0]); furthest_point_sampling_forward(points_tensor, temp_tensor, idx_tensor, b, n, m); } void furthest_point_sampling_with_dist_forward_cuda_parrots( CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int b, n, m; SSAttrs(attr).get("b", b).get("n", n).get("m", m).done(); auto points_tensor = buildATensor(ctx, ins[0]); auto temp_tensor = buildATensor(ctx, ins[1]); auto idx_tensor = buildATensor(ctx, outs[0]); furthest_point_sampling_with_dist_forward(points_tensor, temp_tensor, idx_tensor, b, n, m); } PARROTS_EXTENSION_REGISTER(furthest_point_sampling_forward) .attr("b") .attr("n") .attr("m") .input(2) .output(1) .apply(furthest_point_sample_forward_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(furthest_point_sampling_with_dist_forward) .attr("b") .attr("n") .attr("m") .input(2) .output(1) .apply(furthest_point_sampling_with_dist_forward_cuda_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/furthest_point_sample_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef FURTHEST_POINT_SAMPLE_PYTORCH_H #define FURTHEST_POINT_SAMPLE_PYTORCH_H #include using namespace at; void furthest_point_sampling_forward(Tensor points_tensor, Tensor temp_tensor, Tensor idx_tensor, int b, int n, int m); void furthest_point_sampling_with_dist_forward(Tensor points_tensor, Tensor temp_tensor, Tensor idx_tensor, int b, int n, int m); #endif // FURTHEST_POINT_SAMPLE_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/fused_bias_leakyrelu.cpp ================================================ // Modified from // https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_bias_act.cpp /* Copyright (c) 2021, NVIDIA Corporation. All rights reserved. NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator Augmentation (ADA) ======================================================================= 1. Definitions "Licensor" means any person or entity that distributes its Work. "Software" means the original work of authorship made available under this License. "Work" means the Software and any additions to or derivative works of the Software that are made available under this License. The terms "reproduce," "reproduction," "derivative works," and "distribution" have the meaning as provided under U.S. copyright law; provided, however, that for the purposes of this License, derivative works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work. Works, including the Software, are "made available" under this License by including in or with the Work either (a) a copyright notice referencing the applicability of this License to the Work, or (b) a copy of this License. 2. License Grants 2.1 Copyright Grant. Subject to the terms and conditions of this License, each Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free, copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense and distribute its Work and any resulting derivative works in any form. 3. Limitations 3.1 Redistribution. You may reproduce or distribute the Work only if (a) you do so under this License, (b) you include a complete copy of this License with your distribution, and (c) you retain without modification any copyright, patent, trademark, or attribution notices that are present in the Work. 3.2 Derivative Works. You may specify that additional or different terms apply to the use, reproduction, and distribution of your derivative works of the Work ("Your Terms") only if (a) Your Terms provide that the use limitation in Section 3.3 applies to your derivative works, and (b) you identify the specific derivative works that are subject to Your Terms. Notwithstanding Your Terms, this License (including the redistribution requirements in Section 3.1) will continue to apply to the Work itself. 3.3 Use Limitation. The Work and any derivative works thereof only may be used or intended for use non-commercially. Notwithstanding the foregoing, NVIDIA and its affiliates may use the Work and any derivative works commercially. As used herein, "non-commercially" means for research or evaluation purposes only. 3.4 Patent Claims. If you bring or threaten to bring a patent claim against any Licensor (including any claim, cross-claim or counterclaim in a lawsuit) to enforce any patents that you allege are infringed by any Work, then your rights under this License from such Licensor (including the grant in Section 2.1) will terminate immediately. 3.5 Trademarks. This License does not grant any rights to use any Licensor’s or its affiliates’ names, logos, or trademarks, except as necessary to reproduce the notices described in this License. 3.6 Termination. If you violate any term of this License, then your rights under this License (including the grant in Section 2.1) will terminate immediately. 4. Disclaimer of Warranty. THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE. 5. Limitation of Liability. EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. ======================================================================= */ #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" torch::Tensor fused_bias_leakyrelu_op_impl(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer, int act, int grad, float alpha, float scale) { return DISPATCH_DEVICE_IMPL(fused_bias_leakyrelu_op_impl, input, bias, refer, act, grad, alpha, scale); } torch::Tensor fused_bias_leakyrelu(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer, int act, int grad, float alpha, float scale) { return fused_bias_leakyrelu_op_impl(input, bias, refer, act, grad, alpha, scale); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/fused_bias_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include using namespace at; using namespace parrots; torch::Tensor fused_bias_leakyrelu(const torch::Tensor &input, const torch::Tensor &bias, const torch::Tensor &refer, int act, int grad, float alpha, float scale); void fused_bias_leakyrelu_parrots(CudaContext &ctx, const SSElement &attr, const OperatorBase::in_list_t &ins, OperatorBase::out_list_t &outs) { int act, grad; float alpha, scale; SSAttrs(attr) .get("act", act) .get("grad", grad) .get("alpha", alpha) .get("scale", scale) .done(); const auto &input = buildATensor(ctx, ins[0]); const auto &bias = buildATensor(ctx, ins[1]); const auto &refer = buildATensor(ctx, ins[2]); auto out = fused_bias_leakyrelu(input, bias, refer, act, grad, alpha, scale); updateDArray(ctx, out, outs[0]); } PARROTS_EXTENSION_REGISTER(fused_bias_leakyrelu) .attr("act") .attr("grad") .attr("alpha") .attr("scale") .input(3) .output(1) .apply(fused_bias_leakyrelu_parrots) .done(); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/gather_points.cpp ================================================ #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void gather_points_forward_impl(int b, int c, int n, int npoints, const Tensor points, const Tensor idx, Tensor out) { DISPATCH_DEVICE_IMPL(gather_points_forward_impl, b, c, n, npoints, points, idx, out); } void gather_points_backward_impl(int b, int c, int n, int npoints, const Tensor grad_out, const Tensor idx, Tensor grad_points) { DISPATCH_DEVICE_IMPL(gather_points_backward_impl, b, c, n, npoints, grad_out, idx, grad_points); } void gather_points_forward(Tensor points_tensor, Tensor idx_tensor, Tensor out_tensor, int b, int c, int n, int npoints) { gather_points_forward_impl(b, c, n, npoints, points_tensor, idx_tensor, out_tensor); } void gather_points_backward(Tensor grad_out_tensor, Tensor idx_tensor, Tensor grad_points_tensor, int b, int c, int n, int npoints) { gather_points_backward_impl(b, c, n, npoints, grad_out_tensor, idx_tensor, grad_points_tensor); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/gather_points_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "gather_points_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void gather_points_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int b, c, n, npoints; SSAttrs(attr) .get("b", b) .get("c", c) .get("n", n) .get("npoints", npoints) .done(); auto points_tensor = buildATensor(ctx, ins[0]); auto idx_tensor = buildATensor(ctx, ins[1]); auto out_tensor = buildATensor(ctx, outs[0]); gather_points_forward(points_tensor, idx_tensor, out_tensor, b, c, n, npoints); } void gather_points_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int b, c, n, npoints; SSAttrs(attr) .get("b", b) .get("c", c) .get("n", n) .get("npoints", npoints) .done(); auto grad_out_tensor = buildATensor(ctx, ins[0]); auto idx_tensor = buildATensor(ctx, ins[1]); auto grad_points_tensor = buildATensor(ctx, outs[0]); gather_points_backward(grad_out_tensor, idx_tensor, grad_points_tensor, b, c, n, npoints); } PARROTS_EXTENSION_REGISTER(gather_points_forward) .attr("b") .attr("c") .attr("n") .attr("npoints") .input(2) .output(1) .apply(gather_points_forward_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(gather_points_backward) .attr("b") .attr("c") .attr("n") .attr("npoints") .input(2) .output(1) .apply(gather_points_backward_cuda_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/gather_points_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef GATHER_POINTS_PYTORCH_H #define GATHER_POINTS_PYTORCH_H #include using namespace at; void gather_points_forward(Tensor points_tensor, Tensor idx_tensor, Tensor out_tensor, int b, int c, int n, int npoints); void gather_points_backward(Tensor grad_out_tensor, Tensor idx_tensor, Tensor grad_points_tensor, int b, int c, int n, int npoints); #endif // GATHER_POINTS_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/group_points.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved. // Modified from // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/group_points.cpp #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void group_points_forward_impl(int b, int c, int n, int npoints, int nsample, const Tensor points, const Tensor idx, Tensor out) { DISPATCH_DEVICE_IMPL(group_points_forward_impl, b, c, n, npoints, nsample, points, idx, out); } void group_points_backward_impl(int b, int c, int n, int npoints, int nsample, const Tensor grad_out, const Tensor idx, Tensor grad_points) { DISPATCH_DEVICE_IMPL(group_points_backward_impl, b, c, n, npoints, nsample, grad_out, idx, grad_points); } void group_points_forward(Tensor points_tensor, Tensor idx_tensor, Tensor out_tensor, int b, int c, int n, int npoints, int nsample) { DISPATCH_DEVICE_IMPL(group_points_forward_impl, b, c, n, npoints, nsample, points_tensor, idx_tensor, out_tensor); } void group_points_backward(Tensor grad_out_tensor, Tensor idx_tensor, Tensor grad_points_tensor, int b, int c, int n, int npoints, int nsample) { group_points_backward_impl(b, c, n, npoints, nsample, grad_out_tensor, idx_tensor, grad_points_tensor); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/group_points_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "group_points_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void group_points_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int b, c, n, npoints, nsample; SSAttrs(attr) .get("b", b) .get("c", c) .get("n", n) .get("npoints", npoints) .get("nsample", nsample) .done(); auto points_tensor = buildATensor(ctx, ins[0]); auto idx_tensor = buildATensor(ctx, ins[1]); auto out_tensor = buildATensor(ctx, outs[0]); group_points_forward(points_tensor, idx_tensor, out_tensor, b, c, n, npoints, nsample); } void group_points_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int b, c, n, npoints, nsample; SSAttrs(attr) .get("b", b) .get("c", c) .get("n", n) .get("npoints", npoints) .get("nsample", nsample) .done(); auto grad_out_tensor = buildATensor(ctx, ins[0]); auto idx_tensor = buildATensor(ctx, ins[1]); auto grad_points_tensor = buildATensor(ctx, outs[0]); group_points_backward(grad_out_tensor, idx_tensor, grad_points_tensor, b, c, n, npoints, nsample); } PARROTS_EXTENSION_REGISTER(group_points_forward) .attr("b") .attr("c") .attr("n") .attr("npoints") .attr("nsample") .input(2) .output(1) .apply(group_points_forward_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(group_points_backward) .attr("b") .attr("c") .attr("n") .attr("npoints") .attr("nsample") .input(2) .output(1) .apply(group_points_backward_cuda_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/group_points_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef GROUP_POINTS_PYTORCH_H #define GROUP_POINTS_PYTORCH_H #include using namespace at; void group_points_forward(Tensor points_tensor, Tensor idx_tensor, Tensor out_tensor, int b, int c, int n, int npoints, int nsample); void group_points_backward(Tensor grad_out_tensor, Tensor idx_tensor, Tensor grad_points_tensor, int b, int c, int n, int npoints, int nsample); #endif // GROUP_POINTS_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/info.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved // modified from // https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/vision.cpp #include "pytorch_cpp_helper.hpp" #ifdef MMCV_WITH_CUDA #ifndef HIP_DIFF #include int get_cudart_version() { return CUDART_VERSION; } #endif #endif std::string get_compiling_cuda_version() { #ifdef MMCV_WITH_CUDA #ifndef HIP_DIFF std::ostringstream oss; // copied from // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231 auto printCudaStyleVersion = [&](int v) { oss << (v / 1000) << "." << (v / 10 % 100); if (v % 10 != 0) { oss << "." << (v % 10); } }; printCudaStyleVersion(get_cudart_version()); return oss.str(); #else return std::string("rocm not available"); #endif #else return std::string("not available"); #endif } // similar to // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp std::string get_compiler_version() { std::ostringstream ss; #if defined(__GNUC__) #ifndef __clang__ { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; } #endif #endif #if defined(__clang_major__) { ss << "clang " << __clang_major__ << "." << __clang_minor__ << "." << __clang_patchlevel__; } #endif #if defined(_MSC_VER) { ss << "MSVC " << _MSC_FULL_VER; } #endif return ss.str(); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/iou3d.cpp ================================================ // Modified from // https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/iou3d_nms/src/iou3d_nms.cpp /* 3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others) Written by Shaoshuai Shi All Rights Reserved 2019-2020. */ #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8; void iou3d_boxes_overlap_bev_forward_impl(const int num_a, const Tensor boxes_a, const int num_b, const Tensor boxes_b, Tensor ans_overlap) { DISPATCH_DEVICE_IMPL(iou3d_boxes_overlap_bev_forward_impl, num_a, boxes_a, num_b, boxes_b, ans_overlap); } void iou3d_boxes_iou_bev_forward_impl(const int num_a, const Tensor boxes_a, const int num_b, const Tensor boxes_b, Tensor ans_iou) { DISPATCH_DEVICE_IMPL(iou3d_boxes_iou_bev_forward_impl, num_a, boxes_a, num_b, boxes_b, ans_iou); } void iou3d_nms_forward_impl(const Tensor boxes, unsigned long long *mask, int boxes_num, float nms_overlap_thresh) { DISPATCH_DEVICE_IMPL(iou3d_nms_forward_impl, boxes, mask, boxes_num, nms_overlap_thresh); } void iou3d_nms_normal_forward_impl(const Tensor boxes, unsigned long long *mask, int boxes_num, float nms_overlap_thresh) { DISPATCH_DEVICE_IMPL(iou3d_nms_normal_forward_impl, boxes, mask, boxes_num, nms_overlap_thresh); } void iou3d_boxes_overlap_bev_forward(Tensor boxes_a, Tensor boxes_b, Tensor ans_overlap) { // params boxes_a: (N, 5) [x1, y1, x2, y2, ry] // params boxes_b: (M, 5) // params ans_overlap: (N, M) int num_a = boxes_a.size(0); int num_b = boxes_b.size(0); iou3d_boxes_overlap_bev_forward_impl(num_a, boxes_a, num_b, boxes_b, ans_overlap); } void iou3d_boxes_iou_bev_forward(Tensor boxes_a, Tensor boxes_b, Tensor ans_iou) { // params boxes_a: (N, 5) [x1, y1, x2, y2, ry] // params boxes_b: (M, 5) // params ans_overlap: (N, M) int num_a = boxes_a.size(0); int num_b = boxes_b.size(0); iou3d_boxes_iou_bev_forward_impl(num_a, boxes_a, num_b, boxes_b, ans_iou); } void iou3d_nms_forward(Tensor boxes, Tensor keep, Tensor keep_num, float nms_overlap_thresh) { // params boxes: (N, 5) [x1, y1, x2, y2, ry] // params keep: (N) CHECK_CONTIGUOUS(boxes); CHECK_CONTIGUOUS(keep); int boxes_num = boxes.size(0); int64_t *keep_data = keep.data_ptr(); int64_t *keep_num_data = keep_num.data_ptr(); const int col_blocks = (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS; Tensor mask = at::empty({boxes_num, col_blocks}, boxes.options().dtype(at::kLong)); unsigned long long *mask_data = (unsigned long long *)mask.data_ptr(); iou3d_nms_forward_impl(boxes, mask_data, boxes_num, nms_overlap_thresh); at::Tensor mask_cpu = mask.to(at::kCPU); unsigned long long *mask_host = (unsigned long long *)mask_cpu.data_ptr(); std::vector remv_cpu(col_blocks); memset(&remv_cpu[0], 0, sizeof(unsigned long long) * col_blocks); int num_to_keep = 0; for (int i = 0; i < boxes_num; i++) { int nblock = i / THREADS_PER_BLOCK_NMS; int inblock = i % THREADS_PER_BLOCK_NMS; if (!(remv_cpu[nblock] & (1ULL << inblock))) { keep_data[num_to_keep++] = i; unsigned long long *p = &mask_host[0] + i * col_blocks; for (int j = nblock; j < col_blocks; j++) { remv_cpu[j] |= p[j]; } } *keep_num_data = num_to_keep; } } void iou3d_nms_normal_forward(Tensor boxes, Tensor keep, Tensor keep_num, float nms_overlap_thresh) { // params boxes: (N, 5) [x1, y1, x2, y2, ry] // params keep: (N) CHECK_CONTIGUOUS(boxes); CHECK_CONTIGUOUS(keep); int boxes_num = boxes.size(0); int64_t *keep_data = keep.data_ptr(); int64_t *keep_num_data = keep_num.data_ptr(); const int col_blocks = (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS; Tensor mask = at::empty({boxes_num, col_blocks}, boxes.options().dtype(at::kLong)); unsigned long long *mask_data = (unsigned long long *)mask.data_ptr(); iou3d_nms_normal_forward_impl(boxes, mask_data, boxes_num, nms_overlap_thresh); at::Tensor mask_cpu = mask.to(at::kCPU); unsigned long long *mask_host = (unsigned long long *)mask_cpu.data_ptr(); std::vector remv_cpu(col_blocks); memset(&remv_cpu[0], 0, sizeof(unsigned long long) * col_blocks); int num_to_keep = 0; for (int i = 0; i < boxes_num; i++) { int nblock = i / THREADS_PER_BLOCK_NMS; int inblock = i % THREADS_PER_BLOCK_NMS; if (!(remv_cpu[nblock] & (1ULL << inblock))) { keep_data[num_to_keep++] = i; unsigned long long *p = &mask_host[0] + i * col_blocks; for (int j = nblock; j < col_blocks; j++) { remv_cpu[j] |= p[j]; } } } *keep_num_data = num_to_keep; } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/iou3d_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "iou3d_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void iou3d_boxes_iou_bev_forward_cuda_parrots( CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { auto boxes_a = buildATensor(ctx, ins[0]); auto boxes_b = buildATensor(ctx, ins[1]); auto ans_iou = buildATensor(ctx, outs[0]); iou3d_boxes_iou_bev_forward(boxes_a, boxes_b, ans_iou); } void iou3d_nms_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { float nms_overlap_thresh; SSAttrs(attr).get("nms_overlap_thresh", nms_overlap_thresh).done(); auto boxes = buildATensor(ctx, ins[0]); auto keep = buildATensor(ctx, outs[0]); auto keep_num = buildATensor(ctx, outs[1]); iou3d_nms_forward(boxes, keep, keep_num, nms_overlap_thresh); } void iou3d_nms_normal_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { float nms_overlap_thresh; SSAttrs(attr).get("nms_overlap_thresh", nms_overlap_thresh).done(); auto boxes = buildATensor(ctx, ins[0]); auto keep = buildATensor(ctx, outs[0]); auto keep_num = buildATensor(ctx, outs[1]); iou3d_nms_normal_forward(boxes, keep, keep_num, nms_overlap_thresh); } PARROTS_EXTENSION_REGISTER(iou3d_boxes_iou_bev_forward) .input(2) .output(1) .apply(iou3d_boxes_iou_bev_forward_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(iou3d_nms_forward) .attr("nms_overlap_thresh") .input(1) .output(2) .apply(iou3d_nms_forward_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(iou3d_nms_normal_forward) .attr("nms_overlap_thresh") .input(1) .output(2) .apply(iou3d_nms_normal_forward_cuda_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/iou3d_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef IOU_3D_PYTORCH_H #define IOU_3D_PYTORCH_H #include using namespace at; void iou3d_boxes_iou_bev_forward(Tensor boxes_a, Tensor boxes_b, Tensor ans_iou); void iou3d_nms_forward(Tensor boxes, Tensor keep, Tensor keep_num, float nms_overlap_thresh); void iou3d_nms_normal_forward(Tensor boxes, Tensor keep, Tensor keep_num, float nms_overlap_thresh); #endif // IOU_3D_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/knn.cpp ================================================ // Modified from // https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/pointops/src/knnquery_heap #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void knn_forward_impl(int b, int n, int m, int nsample, const Tensor xyz, const Tensor new_xyz, Tensor idx, Tensor dist2) { DISPATCH_DEVICE_IMPL(knn_forward_impl, b, n, m, nsample, xyz, new_xyz, idx, dist2); } void knn_forward(Tensor xyz_tensor, Tensor new_xyz_tensor, Tensor idx_tensor, Tensor dist2_tensor, int b, int n, int m, int nsample) { knn_forward_impl(b, n, m, nsample, xyz_tensor, new_xyz_tensor, idx_tensor, dist2_tensor); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/knn_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "knn_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void knn_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int b, n, m, nsample; SSAttrs(attr) .get("b", b) .get("n", n) .get("m", m) .get("nsample", nsample) .done(); auto xyz_tensor = buildATensor(ctx, ins[0]); auto new_xyz_tensor = buildATensor(ctx, ins[1]); auto idx_tensor = buildATensor(ctx, outs[0]); auto dist2_tensor = buildATensor(ctx, outs[1]); knn_forward(xyz_tensor, new_xyz_tensor, idx_tensor, dist2_tensor, b, n, m, nsample); } PARROTS_EXTENSION_REGISTER(knn_forward) .attr("b") .attr("n") .attr("m") .attr("nsample") .input(2) .output(2) .apply(knn_forward_cuda_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/knn_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef KNN_PYTORCH_H #define KNN_PYTORCH_H #include using namespace at; void knn_forward(Tensor xyz_tensor, Tensor new_xyz_tensor, Tensor idx_tensor, Tensor dist2_tensor, int b, int n, int m, int nsample); #endif // KNN_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/masked_conv2d.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void masked_im2col_forward_impl(const Tensor im, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w) { DISPATCH_DEVICE_IMPL(masked_im2col_forward_impl, im, mask_h_idx, mask_w_idx, col, kernel_h, kernel_w, pad_h, pad_w); } void masked_col2im_forward_impl(const Tensor col, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor im, int height, int width, int channels) { DISPATCH_DEVICE_IMPL(masked_col2im_forward_impl, col, mask_h_idx, mask_w_idx, im, height, width, channels); } void masked_im2col_forward(const Tensor im, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w) { masked_im2col_forward_impl(im, mask_h_idx, mask_w_idx, col, kernel_h, kernel_w, pad_h, pad_w); } void masked_col2im_forward(const Tensor col, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor im, int height, int width, int channels) { masked_col2im_forward_impl(col, mask_h_idx, mask_w_idx, im, height, width, channels); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/masked_conv2d_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "masked_conv2d_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void masked_im2col_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { // im: (n, ic, h, w), kernel size (kh, kw) // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh) int kernel_h, kernel_w, pad_h, pad_w; SSAttrs(attr) .get("kernel_h", kernel_h) .get("kernel_w", kernel_w) .get("pad_h", pad_h) .get("pad_w", pad_w) .done(); const auto& im = buildATensor(ctx, ins[0]); const auto& mask_h_idx = buildATensor(ctx, ins[1]); const auto& mask_w_idx = buildATensor(ctx, ins[2]); auto col = buildATensor(ctx, outs[0]); masked_im2col_forward_cuda(im, mask_h_idx, mask_w_idx, col, kernel_h, kernel_w, pad_h, pad_w); } void masked_col2im_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { // im: (n, ic, h, w), kernel size (kh, kw) // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh) int height, width, channels; SSAttrs(attr) .get("height", height) .get("width", width) .get("channels", channels) .done(); const auto& col = buildATensor(ctx, ins[0]); const auto& mask_h_idx = buildATensor(ctx, ins[1]); const auto& mask_w_idx = buildATensor(ctx, ins[2]); auto im = buildATensor(ctx, outs[0]); masked_col2im_forward_cuda(col, mask_h_idx, mask_w_idx, im, height, width, channels); } PARROTS_EXTENSION_REGISTER(masked_im2col_forward) .attr("kernel_h") .attr("kernel_w") .attr("pad_h") .attr("pad_w") .input(3) .output(1) .apply(masked_im2col_forward_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(masked_col2im_forward) .attr("height") .attr("width") .attr("channels") .input(3) .output(1) .apply(masked_col2im_forward_cuda_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/masked_conv2d_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef MASKED_CONV2D_PYTORCH_H #define MASKED_CONV2D_PYTORCH_H #include using namespace at; void masked_im2col_forward_cuda(const Tensor im, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w); void masked_col2im_forward_cuda(const Tensor col, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor im, int height, int width, int channels); #endif // MASKED_CONV2D_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/min_area_polygons.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void min_area_polygons_impl(const Tensor pointsets, Tensor polygons) { DISPATCH_DEVICE_IMPL(min_area_polygons_impl, pointsets, polygons); } void min_area_polygons(const Tensor pointsets, Tensor polygons) { min_area_polygons_impl(pointsets, polygons); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/min_area_polygons_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "min_area_polygons_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void min_area_polygons_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { auto pointsets = buildATensor(ctx, ins[0]); auto polygons = buildATensor(ctx, outs[0]); min_area_polygons(pointsets, polygons); } PARROTS_EXTENSION_REGISTER(min_area_polygons) .input(1) .output(1) .apply(min_area_polygons_cuda_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/min_area_polygons_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef MIN_AREA_POLYGONS_PYTORCH_H #define MIN_AREA_POLYGONS_PYTORCH_H #include using namespace at; void min_area_polygons(const Tensor pointsets, Tensor polygons); #endif // MIN_AREA_POLYGONS_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/modulated_deform_conv.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void modulated_deformable_im2col_impl( const Tensor data_im, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor data_col) { DISPATCH_DEVICE_IMPL(modulated_deformable_im2col_impl, data_im, data_offset, data_mask, batch_size, channels, height_im, width_im, height_col, width_col, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, deformable_group, data_col); } void modulated_deformable_col2im_impl( const Tensor data_col, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor grad_im) { DISPATCH_DEVICE_IMPL(modulated_deformable_col2im_impl, data_col, data_offset, data_mask, batch_size, channels, height_im, width_im, height_col, width_col, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, deformable_group, grad_im); } void modulated_deformable_col2im_coord_impl( const Tensor data_col, const Tensor data_im, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor grad_offset, Tensor grad_mask) { DISPATCH_DEVICE_IMPL(modulated_deformable_col2im_coord_impl, data_col, data_im, data_offset, data_mask, batch_size, channels, height_im, width_im, height_col, width_col, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, deformable_group, grad_offset, grad_mask); } void modulated_deform_conv_forward( Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset, Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const int group, const int deformable_group, const bool with_bias) { at::DeviceGuard guard(input.device()); const int batch = input.size(0); const int channels = input.size(1); const int height = input.size(2); const int width = input.size(3); const int channels_out = weight.size(0); const int channels_kernel = weight.size(1); const int kernel_h_ = weight.size(2); const int kernel_w_ = weight.size(3); if (kernel_h_ != kernel_h || kernel_w_ != kernel_w) AT_ERROR("Input shape and kernel shape won't match: (%d x %d vs %d x %d).", kernel_h_, kernel_w, kernel_h_, kernel_w_); if (channels != channels_kernel * group) AT_ERROR("Input shape and kernel channels won't match: (%d vs %d).", channels, channels_kernel * group); const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; if (ones.ndimension() != 2 || ones.size(0) * ones.size(1) < height_out * width_out) { // Resize plane and fill with ones... ones = at::ones({height_out, width_out}, input.options()); } // resize output output = output.view({batch, channels_out, height_out, width_out}).zero_(); // resize temporary columns columns = at::zeros({channels * kernel_h * kernel_w, 1 * height_out * width_out}, input.options()); output = output.view({output.size(0), group, output.size(1) / group, output.size(2), output.size(3)}); for (int b = 0; b < batch; b++) { modulated_deformable_im2col_impl( input[b], offset[b], mask[b], 1, channels, height, width, height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, deformable_group, columns); // divide into group weight = weight.view({group, weight.size(0) / group, weight.size(1), weight.size(2), weight.size(3)}); columns = columns.view({group, columns.size(0) / group, columns.size(1)}); for (int g = 0; g < group; g++) { output[b][g] = output[b][g] .flatten(1) .addmm_(weight[g].flatten(1), columns[g]) .view_as(output[b][g]); } weight = weight.view({weight.size(0) * weight.size(1), weight.size(2), weight.size(3), weight.size(4)}); columns = columns.view({columns.size(0) * columns.size(1), columns.size(2)}); } output = output.view({output.size(0), output.size(1) * output.size(2), output.size(3), output.size(4)}); if (with_bias) { output += bias.view({1, bias.size(0), 1, 1}); } } void modulated_deform_conv_backward( Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset, Tensor mask, Tensor columns, Tensor grad_input, Tensor grad_weight, Tensor grad_bias, Tensor grad_offset, Tensor grad_mask, Tensor grad_output, int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, const bool with_bias) { at::DeviceGuard guard(input.device()); const int batch = input.size(0); const int channels = input.size(1); const int height = input.size(2); const int width = input.size(3); const int channels_kernel = weight.size(1); const int kernel_h_ = weight.size(2); const int kernel_w_ = weight.size(3); if (kernel_h_ != kernel_h || kernel_w_ != kernel_w) AT_ERROR("Input shape and kernel shape won't match: (%d x %d vs %d x %d).", kernel_h_, kernel_w, kernel_h_, kernel_w_); if (channels != channels_kernel * group) AT_ERROR("Input shape and kernel channels won't match: (%d vs %d).", channels, channels_kernel * group); const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; if (ones.ndimension() != 2 || ones.size(0) * ones.size(1) < height_out * width_out) { // Resize plane and fill with ones... ones = at::ones({height_out, width_out}, input.options()); } grad_input = grad_input.view({batch, channels, height, width}); columns = at::zeros({channels * kernel_h * kernel_w, height_out * width_out}, input.options()); grad_output = grad_output.view({grad_output.size(0), group, grad_output.size(1) / group, grad_output.size(2), grad_output.size(3)}); for (int b = 0; b < batch; b++) { // divide int group columns = columns.view({group, columns.size(0) / group, columns.size(1)}); weight = weight.view({group, weight.size(0) / group, weight.size(1), weight.size(2), weight.size(3)}); for (int g = 0; g < group; g++) { columns[g].addmm_(weight[g].flatten(1).transpose(0, 1), grad_output[b][g].flatten(1), 0.0f, 1.0f); } columns = columns.view({columns.size(0) * columns.size(1), columns.size(2)}); weight = weight.view({weight.size(0) * weight.size(1), weight.size(2), weight.size(3), weight.size(4)}); // gradient w.r.t. input coordinate data modulated_deformable_col2im_coord_impl( columns, input[b], offset[b], mask[b], 1, channels, height, width, height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, deformable_group, grad_offset[b], grad_mask[b]); // gradient w.r.t. input data modulated_deformable_col2im_impl( columns, offset[b], mask[b], 1, channels, height, width, height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, deformable_group, grad_input[b]); // gradient w.r.t. weight, dWeight should accumulate across the batch and // group modulated_deformable_im2col_impl( input[b], offset[b], mask[b], 1, channels, height, width, height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, deformable_group, columns); columns = columns.view({group, columns.size(0) / group, columns.size(1)}); grad_weight = grad_weight.view({group, grad_weight.size(0) / group, grad_weight.size(1), grad_weight.size(2), grad_weight.size(3)}); if (with_bias) grad_bias = grad_bias.view({group, grad_bias.size(0) / group}); for (int g = 0; g < group; g++) { grad_weight[g] = grad_weight[g] .flatten(1) .addmm_(grad_output[b][g].flatten(1), columns[g].transpose(0, 1)) .view_as(grad_weight[g]); if (with_bias) { grad_bias[g] = grad_bias[g] .view({-1, 1}) .addmm_(grad_output[b][g].flatten(1), ones.view({-1, 1})) .view(-1); } } columns = columns.view({columns.size(0) * columns.size(1), columns.size(2)}); grad_weight = grad_weight.view({grad_weight.size(0) * grad_weight.size(1), grad_weight.size(2), grad_weight.size(3), grad_weight.size(4)}); if (with_bias) grad_bias = grad_bias.view({grad_bias.size(0) * grad_bias.size(1)}); } grad_output = grad_output.view({grad_output.size(0) * grad_output.size(1), grad_output.size(2), grad_output.size(3), grad_output.size(4)}); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/modulated_deform_conv_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "modulated_deform_conv_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void modulated_deform_conv_forward_cuda_parrots( CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, group, deformable_group, with_bias; SSAttrs(attr) .get("kernel_h", kernel_h) .get("kernel_w", kernel_w) .get("stride_h", stride_h) .get("stride_w", stride_w) .get("pad_h", pad_h) .get("pad_w", pad_w) .get("dilation_h", dilation_h) .get("dilation_w", dilation_w) .get("group", group) .get("deformable_group", deformable_group) .get("with_bias", with_bias) .done(); const auto& input = buildATensor(ctx, ins[0]); const auto& weight = buildATensor(ctx, ins[1]); const auto& bias = buildATensor(ctx, ins[2]); const auto& ones = buildATensor(ctx, ins[3]); const auto& offset = buildATensor(ctx, ins[4]); const auto& mask = buildATensor(ctx, ins[5]); auto output = buildATensor(ctx, outs[0]); auto columns = buildATensor(ctx, outs[1]); modulated_deform_conv_forward(input, weight, bias, ones, offset, mask, output, columns, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, group, deformable_group, with_bias); } void modulated_deform_conv_backward_cuda_parrots( CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, group, deformable_group, with_bias; SSAttrs(attr) .get("kernel_h", kernel_h) .get("kernel_w", kernel_w) .get("stride_h", stride_h) .get("stride_w", stride_w) .get("pad_h", pad_h) .get("pad_w", pad_w) .get("dilation_h", dilation_h) .get("dilation_w", dilation_w) .get("group", group) .get("deformable_group", deformable_group) .get("with_bias", with_bias) .done(); const auto& input = buildATensor(ctx, ins[0]); const auto& weight = buildATensor(ctx, ins[1]); const auto& bias = buildATensor(ctx, ins[2]); const auto& ones = buildATensor(ctx, ins[3]); const auto& offset = buildATensor(ctx, ins[4]); const auto& mask = buildATensor(ctx, ins[5]); auto columns = buildATensor(ctx, outs[0]); auto grad_input = buildATensor(ctx, outs[1]); auto grad_weight = buildATensor(ctx, outs[2]); auto grad_bias = buildATensor(ctx, outs[3]); auto grad_offset = buildATensor(ctx, outs[4]); auto grad_mask = buildATensor(ctx, outs[5]); auto grad_output = buildATensor(ctx, outs[6]); modulated_deform_conv_backward( input, weight, bias, ones, offset, mask, columns, grad_input, grad_weight, grad_bias, grad_offset, grad_mask, grad_output, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, group, deformable_group, with_bias); } #endif void modulated_deform_conv_forward_cpu_parrots( HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, group, deformable_group, with_bias; SSAttrs(attr) .get("kernel_h", kernel_h) .get("kernel_w", kernel_w) .get("stride_h", stride_h) .get("stride_w", stride_w) .get("pad_h", pad_h) .get("pad_w", pad_w) .get("dilation_h", dilation_h) .get("dilation_w", dilation_w) .get("group", group) .get("deformable_group", deformable_group) .get("with_bias", with_bias) .done(); const auto& input = buildATensor(ctx, ins[0]); const auto& weight = buildATensor(ctx, ins[1]); const auto& bias = buildATensor(ctx, ins[2]); const auto& ones = buildATensor(ctx, ins[3]); const auto& offset = buildATensor(ctx, ins[4]); const auto& mask = buildATensor(ctx, ins[5]); auto output = buildATensor(ctx, outs[0]); auto columns = buildATensor(ctx, outs[1]); modulated_deform_conv_forward(input, weight, bias, ones, offset, mask, output, columns, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, group, deformable_group, with_bias); } void modulated_deform_conv_backward_cpu_parrots( HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, group, deformable_group, with_bias; SSAttrs(attr) .get("kernel_h", kernel_h) .get("kernel_w", kernel_w) .get("stride_h", stride_h) .get("stride_w", stride_w) .get("pad_h", pad_h) .get("pad_w", pad_w) .get("dilation_h", dilation_h) .get("dilation_w", dilation_w) .get("group", group) .get("deformable_group", deformable_group) .get("with_bias", with_bias) .done(); const auto& input = buildATensor(ctx, ins[0]); const auto& weight = buildATensor(ctx, ins[1]); const auto& bias = buildATensor(ctx, ins[2]); const auto& ones = buildATensor(ctx, ins[3]); const auto& offset = buildATensor(ctx, ins[4]); const auto& mask = buildATensor(ctx, ins[5]); auto columns = buildATensor(ctx, outs[0]); auto grad_input = buildATensor(ctx, outs[1]); auto grad_weight = buildATensor(ctx, outs[2]); auto grad_bias = buildATensor(ctx, outs[3]); auto grad_offset = buildATensor(ctx, outs[4]); auto grad_mask = buildATensor(ctx, outs[5]); auto grad_output = buildATensor(ctx, outs[6]); modulated_deform_conv_backward( input, weight, bias, ones, offset, mask, columns, grad_input, grad_weight, grad_bias, grad_offset, grad_mask, grad_output, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, group, deformable_group, with_bias); } PARROTS_EXTENSION_REGISTER(modulated_deform_conv_forward) .attr("kernel_h") .attr("kernel_w") .attr("stride_h") .attr("stride_w") .attr("pad_h") .attr("pad_w") .attr("dilation_h") .attr("dilation_w") .attr("group") .attr("deformable_group") .attr("with_bias") .input(6) .output(2) .apply(modulated_deform_conv_forward_cpu_parrots) #ifdef MMCV_WITH_CUDA .apply(modulated_deform_conv_forward_cuda_parrots) #endif .done(); PARROTS_EXTENSION_REGISTER(modulated_deform_conv_backward) .attr("kernel_h") .attr("kernel_w") .attr("stride_h") .attr("stride_w") .attr("pad_h") .attr("pad_w") .attr("dilation_h") .attr("dilation_w") .attr("group") .attr("deformable_group") .attr("with_bias") .input(6) .output(7) .apply(modulated_deform_conv_backward_cpu_parrots) #ifdef MMCV_WITH_CUDA .apply(modulated_deform_conv_backward_cuda_parrots) #endif .done(); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/modulated_deform_conv_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef MODULATED_DEFORM_CONV_PYTORCH_H #define MODULATED_DEFORM_CONV_PYTORCH_H #include using namespace at; void modulated_deform_conv_forward( Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset, Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const int group, const int deformable_group, const bool with_bias); void modulated_deform_conv_backward( Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset, Tensor mask, Tensor columns, Tensor grad_input, Tensor grad_weight, Tensor grad_bias, Tensor grad_offset, Tensor grad_mask, Tensor grad_output, int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, const bool with_bias); #endif // MODULATED_DEFORM_CONV_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/ms_deform_attn.cpp ================================================ /*! ************************************************************************************************** * Deformable DETR * Copyright (c) 2020 SenseTime. All Rights Reserved. * Licensed under the Apache License, Version 2.0 [see LICENSE for details] ************************************************************************************************** * Modified from *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 ************************************************************************************************** */ #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" Tensor ms_deform_attn_impl_forward(const Tensor &value, const Tensor &spatial_shapes, const Tensor &level_start_index, const Tensor &sampling_loc, const Tensor &attn_weight, const int im2col_step) { return DISPATCH_DEVICE_IMPL(ms_deform_attn_impl_forward, value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step); } void ms_deform_attn_impl_backward( const Tensor &value, const Tensor &spatial_shapes, const Tensor &level_start_index, const Tensor &sampling_loc, const Tensor &attn_weight, const Tensor &grad_output, Tensor &grad_value, Tensor &grad_sampling_loc, Tensor &grad_attn_weight, const int im2col_step) { DISPATCH_DEVICE_IMPL(ms_deform_attn_impl_backward, value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, grad_value, grad_sampling_loc, grad_attn_weight, im2col_step); } Tensor ms_deform_attn_forward(const Tensor &value, const Tensor &spatial_shapes, const Tensor &level_start_index, const Tensor &sampling_loc, const Tensor &attn_weight, const int im2col_step) { at::DeviceGuard guard(value.device()); return ms_deform_attn_impl_forward(value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step); } void ms_deform_attn_backward(const Tensor &value, const Tensor &spatial_shapes, const Tensor &level_start_index, const Tensor &sampling_loc, const Tensor &attn_weight, const Tensor &grad_output, Tensor &grad_value, Tensor &grad_sampling_loc, Tensor &grad_attn_weight, const int im2col_step) { at::DeviceGuard guard(value.device()); ms_deform_attn_impl_backward(value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, grad_value, grad_sampling_loc, grad_attn_weight, im2col_step); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/ms_deform_attn_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include using namespace at; using namespace parrots; Tensor ms_deform_attn_forward(const Tensor &value, const Tensor &spatial_shapes, const Tensor &level_start_index, const Tensor &sampling_loc, const Tensor &attn_weight, const int im2col_step); void ms_deform_attn_backward(const Tensor &value, const Tensor &spatial_shapes, const Tensor &level_start_index, const Tensor &sampling_loc, const Tensor &attn_weight, const Tensor &grad_output, Tensor &grad_value, Tensor &grad_sampling_loc, Tensor &grad_attn_weight, const int im2col_step); void ms_deform_attn_forward_parrots(CudaContext &ctx, const SSElement &attr, const OperatorBase::in_list_t &ins, OperatorBase::out_list_t &outs) { int im2col_step; SSAttrs(attr).get("im2col_step", im2col_step).done(); const auto &value = buildATensor(ctx, ins[0]); const auto &spatial_shapes = buildATensor(ctx, ins[1]); const auto &level_start_index = buildATensor(ctx, ins[2]); const auto &sampling_loc = buildATensor(ctx, ins[3]); const auto &attn_weight = buildATensor(ctx, ins[4]); auto out = ms_deform_attn_forward(value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step); updateDArray(ctx, out, outs[0]); } void ms_deform_attn_backward_parrots(CudaContext &ctx, const SSElement &attr, const OperatorBase::in_list_t &ins, OperatorBase::out_list_t &outs) { int im2col_step; SSAttrs(attr).get("im2col_step", im2col_step).done(); const auto &value = buildATensor(ctx, ins[0]); const auto &spatial_shapes = buildATensor(ctx, ins[1]); const auto &level_start_index = buildATensor(ctx, ins[2]); const auto &sampling_loc = buildATensor(ctx, ins[3]); const auto &attn_weight = buildATensor(ctx, ins[4]); const auto &grad_output = buildATensor(ctx, ins[5]); auto grad_value = buildATensor(ctx, outs[0]); auto grad_sampling_loc = buildATensor(ctx, outs[1]); auto grad_attn_weight = buildATensor(ctx, outs[2]); ms_deform_attn_backward(value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, grad_value, grad_sampling_loc, grad_attn_weight, im2col_step); } PARROTS_EXTENSION_REGISTER(ms_deform_attn_forward) .attr("im2col_step") .input(5) .output(1) .apply(ms_deform_attn_forward_parrots) .done(); PARROTS_EXTENSION_REGISTER(ms_deform_attn_backward) .attr("im2col_step") .input(6) .output(3) .apply(ms_deform_attn_backward_parrots) .done(); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/nms.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" Tensor nms_impl(Tensor boxes, Tensor scores, float iou_threshold, int offset) { return DISPATCH_DEVICE_IMPL(nms_impl, boxes, scores, iou_threshold, offset); } Tensor softnms_impl(Tensor boxes, Tensor scores, Tensor dets, float iou_threshold, float sigma, float min_score, int method, int offset) { return DISPATCH_DEVICE_IMPL(softnms_impl, boxes, scores, dets, iou_threshold, sigma, min_score, method, offset); } std::vector > nms_match_impl(Tensor dets, float iou_threshold) { return DISPATCH_DEVICE_IMPL(nms_match_impl, dets, iou_threshold); } Tensor nms(Tensor boxes, Tensor scores, float iou_threshold, int offset) { return nms_impl(boxes, scores, iou_threshold, offset); } Tensor softnms(Tensor boxes, Tensor scores, Tensor dets, float iou_threshold, float sigma, float min_score, int method, int offset) { return softnms_impl(boxes, scores, dets, iou_threshold, sigma, min_score, method, offset); } std::vector > nms_match(Tensor dets, float iou_threshold) { return nms_match_impl(dets, iou_threshold); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/nms_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "nms_pytorch.h" using namespace parrots; // Tensor nms(Tensor boxes, Tensor scores, float iou_threshold, int offset); template void nms_parrots(T& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { float iou_threshold; int offset; SSAttrs(attr) .get("iou_threshold", iou_threshold) .get("offset", offset) .done(); at::Tensor boxes, scores; boxes = buildATensor(ctx, ins[0]); scores = buildATensor(ctx, ins[1]); auto out = nms(boxes, scores, iou_threshold, offset); updateDArray(ctx, out, outs[0]); } /*Tensor softnms(Tensor boxes, Tensor scores, Tensor dets, float iou_threshold, * float sigma, float min_score, int method, int offset);*/ template void softnms_parrots(T& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { float iou_threshold, sigma, min_score; int method, offset; SSAttrs(attr) .get("iou_threshold", iou_threshold) .get("sigma", sigma) .get("min_score", min_score) .get("method", method) .get("offset", offset) .done(); at::Tensor boxes, scores, dets; boxes = buildATensor(ctx, ins[0]); scores = buildATensor(ctx, ins[1]); dets = buildATensor(ctx, ins[2]); auto out = softnms(boxes, scores, dets, iou_threshold, sigma, min_score, method, offset); updateDArray(ctx, out, outs[0]); } // std::vector > nms_match(Tensor dets, float iou_threshold); template void nms_match_parrots(T& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { float iou_threshold; SSAttrs(attr).get("iou_threshold", iou_threshold).done(); at::Tensor dets; dets = buildATensor(ctx, ins[0]); auto out = nms_match(dets, iou_threshold); int n = out.size(), m = 0; for (int i = 0; i < n; ++i) if (m < out[i].size()) m = out[i].size(); auto options = torch::TensorOptions().dtype(at::kInt); auto tensor = torch::zeros({n, m}, options); for (int i = 0; i < n; i++) tensor.slice(0, i, i + 1) = torch::from_blob(out[i].data(), {out[i].size()}, options); updateDArray(ctx, tensor, outs[0]); } /*Tensor nms_rotated(const Tensor dets, const Tensor scores, const Tensor order, * const Tensor dets_sorted, const float iou_threshold, * const int multi_label);*/ template void nms_rotated_parrots(T& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { float iou_threshold; int multi_label; SSAttrs(attr) .get("iou_threshold", iou_threshold) .get("multi_label", multi_label) .done(); at::Tensor dets, scores, order, dets_sorted; dets = buildATensor(ctx, ins[0]); scores = buildATensor(ctx, ins[1]); order = buildATensor(ctx, ins[2]); dets_sorted = buildATensor(ctx, ins[3]); auto out = nms_rotated(dets, scores, order, dets_sorted, iou_threshold, multi_label); updateDArray(ctx, out, outs[0]); } PARROTS_EXTENSION_REGISTER(nms) .attr("iou_threshold") .attr("offset") .input(2) .output(1) .apply(nms_parrots) #ifdef MMCV_WITH_CUDA .apply(nms_parrots) #endif .done(); PARROTS_EXTENSION_REGISTER(softnms) .attr("iou_threshold") .attr("sigma") .attr("min_score") .attr("method") .attr("offset") .input(3) .output(1) .apply(softnms_parrots) #ifdef MMCV_WITH_CUDA .apply(softnms_parrots) #endif .done(); PARROTS_EXTENSION_REGISTER(nms_match) .attr("iou_threshold") .input(1) .output(1) .apply(nms_match_parrots) #ifdef MMCV_WITH_CUDA .apply(nms_match_parrots) #endif .done(); PARROTS_EXTENSION_REGISTER(nms_rotated) .attr("multi_label") .attr("iou_threshold") .input(4) .output(1) .apply(nms_rotated_parrots) #ifdef MMCV_WITH_CUDA .apply(nms_rotated_parrots) #endif .done(); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/nms_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef NMS_PYTORCH_H #define NMS_PYTORCH_H #include at::Tensor nms(at::Tensor boxes, at::Tensor scores, float iou_threshold, int offset); at::Tensor softnms(at::Tensor boxes, at::Tensor scores, at::Tensor dets, float iou_threshold, float sigma, float min_score, int method, int offset); std::vector > nms_match(at::Tensor dets, float iou_threshold); at::Tensor nms_rotated(const at::Tensor dets, const at::Tensor scores, const at::Tensor order, const at::Tensor dets_sorted, const float iou_threshold, const int multi_label); #endif // NMS_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/nms_rotated.cpp ================================================ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved // modified from // https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/nms_rotated/nms_rotated.h #include "pytorch_cpp_helper.hpp" Tensor nms_rotated_cpu(const Tensor dets, const Tensor scores, const float iou_threshold); #ifdef MMCV_WITH_CUDA Tensor nms_rotated_cuda(const Tensor dets, const Tensor scores, const Tensor order, const Tensor dets_sorted, const float iou_threshold, const int multi_label); #endif // Interface for Python // inline is needed to prevent multiple function definitions when this header is // included by different cpps Tensor nms_rotated(const Tensor dets, const Tensor scores, const Tensor order, const Tensor dets_sorted, const float iou_threshold, const int multi_label) { assert(dets.device().is_cuda() == scores.device().is_cuda()); if (dets.device().is_cuda()) { #ifdef MMCV_WITH_CUDA return nms_rotated_cuda(dets, scores, order, dets_sorted, iou_threshold, multi_label); #else AT_ERROR("Not compiled with GPU support"); #endif } return nms_rotated_cpu(dets, scores, iou_threshold); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/pixel_group.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved // It is modified from https://github.com/WenmuZhou/PAN.pytorch #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" std::vector> pixel_group_impl( Tensor score, Tensor mask, Tensor embedding, Tensor kernel_label, Tensor kernel_contour, int kernel_region_num, float dis_threshold) { return DISPATCH_DEVICE_IMPL(pixel_group_impl, score, mask, embedding, kernel_label, kernel_contour, kernel_region_num, dis_threshold); } std::vector> pixel_group( Tensor score, Tensor mask, Tensor embedding, Tensor kernel_label, Tensor kernel_contour, int kernel_region_num, float distance_threshold) { score = score.contiguous(); mask = mask.contiguous(); embedding = embedding.contiguous(); kernel_label = kernel_label.contiguous(); kernel_contour = kernel_contour.contiguous(); return pixel_group_impl(score, mask, embedding, kernel_label, kernel_contour, kernel_region_num, distance_threshold); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/pixel_group_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "pixel_group_pytorch.h" using namespace parrots; using namespace std; template void pixel_group_parrots(T& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int kernel_region_num; float distance_threshold; SSAttrs(attr) .get("kernel_region_num", kernel_region_num) .get("distance_threshold", distance_threshold) .done(); at::Tensor score; at::Tensor mask; at::Tensor embedding; at::Tensor kernel_label; at::Tensor kernel_contour; score = buildATensor(ctx, ins[0]); mask = buildATensor(ctx, ins[1]); embedding = buildATensor(ctx, ins[2]); kernel_label = buildATensor(ctx, ins[3]); kernel_contour = buildATensor(ctx, ins[4]); auto out = pixel_group(score, mask, embedding, kernel_label, kernel_contour, kernel_region_num, distance_threshold); int n = out.size(); std::vector out_tensor; for (int i = 0; i < n; ++i) out_tensor.push_back(float(out[i].size())); for (int i = 0; i < n; ++i) out_tensor.insert(out_tensor.end(), out[i].begin(), out[i].end()); auto options = torch::TensorOptions().dtype(at::kFloat); auto tensor = torch::zeros({1, out_tensor.size()}, options); tensor.slice(0, 0, 1) = torch::from_blob(out_tensor.data(), {out_tensor.size()}, options); updateDArray(ctx, tensor, outs[0]); } PARROTS_EXTENSION_REGISTER(pixel_group) .attr("kernel_region_num") .attr("distance_threshold") .input(5) .output(1) .apply(pixel_group_parrots) #ifdef MMCV_WITH_CUDA .apply(pixel_group_parrots) #endif .done(); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/pixel_group_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef PIXEL_GROUP_PYTORCH_H #define PIXEL_GROUP_PYTORCH_H #include using namespace at; std::vector> pixel_group( Tensor score, Tensor mask, Tensor embedding, Tensor kernel_label, Tensor kernel_contour, int kernel_region_num, float distance_threshold); #endif // PIXEL_GROUP_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/points_in_boxes.cpp ================================================ #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void points_in_boxes_part_forward_impl(int batch_size, int boxes_num, int pts_num, const Tensor boxes, const Tensor pts, Tensor box_idx_of_points) { DISPATCH_DEVICE_IMPL(points_in_boxes_part_forward_impl, batch_size, boxes_num, pts_num, boxes, pts, box_idx_of_points); } void points_in_boxes_all_forward_impl(int batch_size, int boxes_num, int pts_num, const Tensor boxes, const Tensor pts, Tensor box_idx_of_points) { DISPATCH_DEVICE_IMPL(points_in_boxes_all_forward_impl, batch_size, boxes_num, pts_num, boxes, pts, box_idx_of_points); } void points_in_boxes_part_forward(Tensor boxes_tensor, Tensor pts_tensor, Tensor box_idx_of_points_tensor) { // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR // coordinate, z is the bottom center, each box params pts: (B, npoints, 3) // [x, y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), // default -1 int batch_size = boxes_tensor.size(0); int boxes_num = boxes_tensor.size(1); int pts_num = pts_tensor.size(1); points_in_boxes_part_forward_impl(batch_size, boxes_num, pts_num, boxes_tensor, pts_tensor, box_idx_of_points_tensor); } void points_in_boxes_all_forward(Tensor boxes_tensor, Tensor pts_tensor, Tensor box_idx_of_points_tensor) { // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR // coordinate, z is the bottom center. params pts: (B, npoints, 3) [x, y, z] // in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default -1 int batch_size = boxes_tensor.size(0); int boxes_num = boxes_tensor.size(1); int pts_num = pts_tensor.size(1); points_in_boxes_all_forward_impl(batch_size, boxes_num, pts_num, boxes_tensor, pts_tensor, box_idx_of_points_tensor); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/points_in_boxes_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "points_in_boxes_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void points_in_boxes_part_forward_cuda_parrots( CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { auto boxes_tensor = buildATensor(ctx, ins[0]); auto pts_tensor = buildATensor(ctx, ins[1]); auto box_idx_of_points_tensor = buildATensor(ctx, outs[0]); points_in_boxes_part_forward(boxes_tensor, pts_tensor, box_idx_of_points_tensor); } void points_in_boxes_all_forward_cuda_parrots( CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { auto boxes_tensor = buildATensor(ctx, ins[0]); auto pts_tensor = buildATensor(ctx, ins[1]); auto box_idx_of_points_tensor = buildATensor(ctx, outs[0]); points_in_boxes_all_forward(boxes_tensor, pts_tensor, box_idx_of_points_tensor); } PARROTS_EXTENSION_REGISTER(points_in_boxes_part_forward) .input(2) .output(1) .apply(points_in_boxes_part_forward_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(points_in_boxes_all_forward) .input(2) .output(1) .apply(points_in_boxes_all_forward_cuda_parrots) .done(); #endif void points_in_boxes_forward_cpu_parrots(HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { auto boxes_tensor = buildATensor(ctx, ins[0]); auto pts_tensor = buildATensor(ctx, ins[1]); auto pts_indices_tensor = buildATensor(ctx, outs[0]); points_in_boxes_cpu_forward(boxes_tensor, pts_tensor, pts_indices_tensor); } PARROTS_EXTENSION_REGISTER(points_in_boxes_cpu_forward) .input(2) .output(1) .apply(points_in_boxes_forward_cpu_parrots) .done(); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/points_in_boxes_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef POINTS_IN_BOXES_PYTORCH_H #define POINTS_IN_BOXES_PYTORCH_H #include using namespace at; void points_in_boxes_part_forward(Tensor boxes_tensor, Tensor pts_tensor, Tensor box_idx_of_points_tensor); void points_in_boxes_all_forward(Tensor boxes_tensor, Tensor pts_tensor, Tensor box_idx_of_points_tensor); void points_in_boxes_cpu_forward(Tensor boxes_tensor, Tensor pts_tensor, Tensor pts_indices_tensor); #endif // POINTS_IN_BOXES_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/points_in_polygons.cpp ================================================ #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void points_in_polygons_forward_impl(const Tensor points, const Tensor polygons, Tensor output, const int rows, const int cols) { DISPATCH_DEVICE_IMPL(points_in_polygons_forward_impl, points, polygons, output, rows, cols); } void points_in_polygons_forward(Tensor points, Tensor polygons, Tensor output) { int rows = points.size(0); int cols = polygons.size(0); points_in_polygons_forward_impl(points, polygons, output, rows, cols); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/points_in_polygons_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "points_in_polygons_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void points_in_polygons_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { auto points = buildATensor(ctx, ins[0]); auto polygons = buildATensor(ctx, ins[1]); auto output = buildATensor(ctx, outs[0]); points_in_polygons_forward(points, polygons, output); } PARROTS_EXTENSION_REGISTER(points_in_polygons_forward) .input(2) .output(1) .apply(points_in_polygons_cuda_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/points_in_polygons_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef POINTS_IN_POLYGONS_PYTORCH_H #define POINTS_IN_POLYGONS_PYTORCH_H #include using namespace at; void points_in_polygons_forward(Tensor points, Tensor polygons, Tensor output); #endif // POINTS_IN_POLYGONS_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/psamask.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved // Modified from // https://github.com/hszhao/semseg/blob/master/lib/psa/src #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void psamask_forward_impl(const int psa_type, const Tensor input, Tensor output, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask) { DISPATCH_DEVICE_IMPL(psamask_forward_impl, psa_type, input, output, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask); } void psamask_backward_impl(const int psa_type, const Tensor grad_output, Tensor grad_input, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask) { DISPATCH_DEVICE_IMPL(psamask_backward_impl, psa_type, grad_output, grad_input, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask); } void psamask_forward(const Tensor input, Tensor output, const int psa_type, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask) { psamask_forward_impl(psa_type, input, output, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask); } void psamask_backward(Tensor grad_output, const Tensor grad_input, const int psa_type, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask) { psamask_backward_impl(psa_type, grad_output, grad_input, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/psamask_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "psamask_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void psamask_forward_cuda_parrots(CudaContext &ctx, const SSElement &attr, const OperatorBase::in_list_t &ins, OperatorBase::out_list_t &outs) { int psa_type, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask; SSAttrs(attr) .get("psa_type", psa_type) .get("num_", num_) .get("h_feature", h_feature) .get("w_feature", w_feature) .get("h_mask", h_mask) .get("w_mask", w_mask) .get("half_h_mask", half_h_mask) .get("half_w_mask", half_w_mask) .done(); const auto &input = buildATensor(ctx, ins[0]); auto output = buildATensor(ctx, outs[0]); psamask_forward_cuda(psa_type, input, output, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask); } void psamask_backward_cuda_parrots(CudaContext &ctx, const SSElement &attr, const OperatorBase::in_list_t &ins, OperatorBase::out_list_t &outs) { int psa_type, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask; SSAttrs(attr) .get("psa_type", psa_type) .get("num_", num_) .get("h_feature", h_feature) .get("w_feature", w_feature) .get("h_mask", h_mask) .get("w_mask", w_mask) .get("half_h_mask", half_h_mask) .get("half_w_mask", half_w_mask) .done(); const auto &grad_output = buildATensor(ctx, ins[0]); auto grad_input = buildATensor(ctx, outs[0]); psamask_backward_cuda(psa_type, grad_output, grad_input, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask); } #endif void psamask_forward_cpu_parrots(HostContext &ctx, const SSElement &attr, const OperatorBase::in_list_t &ins, OperatorBase::out_list_t &outs) { int psa_type, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask; SSAttrs(attr) .get("psa_type", psa_type) .get("num_", num_) .get("h_feature", h_feature) .get("w_feature", w_feature) .get("h_mask", h_mask) .get("w_mask", w_mask) .get("half_h_mask", half_h_mask) .get("half_w_mask", half_w_mask) .done(); const auto &input = buildATensor(ctx, ins[0]); auto output = buildATensor(ctx, outs[0]); psamask_forward_cpu(psa_type, input, output, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask); } void psamask_backward_cpu_parrots(HostContext &ctx, const SSElement &attr, const OperatorBase::in_list_t &ins, OperatorBase::out_list_t &outs) { int psa_type, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask; SSAttrs(attr) .get("psa_type", psa_type) .get("num_", num_) .get("h_feature", h_feature) .get("w_feature", w_feature) .get("h_mask", h_mask) .get("w_mask", w_mask) .get("half_h_mask", half_h_mask) .get("half_w_mask", half_w_mask) .done(); const auto &grad_output = buildATensor(ctx, ins[0]); auto grad_input = buildATensor(ctx, outs[0]); psamask_backward_cpu(psa_type, grad_output, grad_input, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask); } PARROTS_EXTENSION_REGISTER(psamask_forward) .attr("psa_type") .attr("num_") .attr("h_feature") .attr("w_feature") .attr("h_mask") .attr("w_mask") .attr("half_h_mask") .attr("half_w_mask") .input(1) .output(1) .apply(psamask_forward_cpu_parrots) #ifdef MMCV_WITH_CUDA .apply(psamask_forward_cuda_parrots) #endif .done(); PARROTS_EXTENSION_REGISTER(psamask_backward) .attr("psa_type") .attr("num_") .attr("h_feature") .attr("w_feature") .attr("h_mask") .attr("w_mask") .attr("half_h_mask") .attr("half_w_mask") .input(1) .output(1) .apply(psamask_backward_cpu_parrots) #ifdef MMCV_WITH_CUDA .apply(psamask_backward_cuda_parrots) #endif .done(); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/psamask_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef PSAMASK_PYTORCH_H #define PSAMASK_PYTORCH_H #include using namespace at; #ifdef MMCV_WITH_CUDA void psamask_forward_cuda(const int psa_type, const Tensor input, Tensor output, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask); void psamask_backward_cuda(const int psa_type, const Tensor grad_output, Tensor grad_input, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask); #endif void psamask_forward_cpu(const int psa_type, const Tensor input, Tensor output, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask); void psamask_backward_cpu(const int psa_type, const Tensor grad_output, Tensor grad_input, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask); #endif // PSAMASK_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/riroi_align_rotated.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void riroi_align_rotated_forward_impl(Tensor features, Tensor rois, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int num_samples, int num_orientations, bool clockwise) { DISPATCH_DEVICE_IMPL(riroi_align_rotated_forward_impl, features, rois, output, pooled_height, pooled_width, spatial_scale, num_samples, num_orientations, clockwise); } void riroi_align_rotated_backward_impl(Tensor top_grad, Tensor rois, Tensor bottom_grad, int pooled_height, int pooled_width, float spatial_scale, int num_samples, int num_orientations, bool clockwise) { DISPATCH_DEVICE_IMPL(riroi_align_rotated_backward_impl, top_grad, rois, bottom_grad, pooled_height, pooled_width, spatial_scale, num_samples, num_orientations, clockwise); } void riroi_align_rotated_forward(Tensor features, Tensor rois, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int num_samples, int num_orientations, bool clockwise) { riroi_align_rotated_forward_impl(features, rois, output, pooled_height, pooled_width, spatial_scale, num_samples, num_orientations, clockwise); } void riroi_align_rotated_backward(Tensor top_grad, Tensor rois, Tensor bottom_grad, int pooled_height, int pooled_width, float spatial_scale, int num_samples, int num_orientations, bool clockwise) { riroi_align_rotated_backward_impl(top_grad, rois, bottom_grad, pooled_height, pooled_width, spatial_scale, num_samples, num_orientations, clockwise); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/riroi_align_rotated_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "riroi_align_rotated_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void riroi_align_rotated_forward_cuda_parrots( CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int pooled_height; int pooled_width; float spatial_scale; int sample_num; int num_orientations; bool clockwise; SSAttrs(attr) .get("pooled_height", pooled_height) .get("pooled_width", pooled_width) .get("spatial_scale", spatial_scale) .get("num_samples", sample_num) .get("num_orientations", num_orientations) .get("clockwise", clockwise) .done(); auto input = buildATensor(ctx, ins[0]); auto rois = buildATensor(ctx, ins[1]); auto output = buildATensor(ctx, outs[0]); riroi_align_rotated_forward(input, rois, output, pooled_height, pooled_width, spatial_scale, sample_num, num_orientations, clockwise); } void riroi_align_rotated_backward_cuda_parrots( CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int pooled_height; int pooled_width; float spatial_scale; int sample_num; int num_orientations; bool clockwise; SSAttrs(attr) .get("pooled_height", pooled_height) .get("pooled_width", pooled_width) .get("spatial_scale", spatial_scale) .get("num_samples", sample_num) .get("num_orientations", num_orientations) .get("clockwise", clockwise) .done(); auto grad_output = buildATensor(ctx, ins[0]); auto rois = buildATensor(ctx, ins[1]); auto grad_input = buildATensor(ctx, outs[0]); riroi_align_rotated_backward(grad_output, rois, grad_input, pooled_height, pooled_width, spatial_scale, sample_num, num_orientations, clockwise); } PARROTS_EXTENSION_REGISTER(riroi_align_rotated_forward) .attr("pooled_height") .attr("pooled_width") .attr("spatial_scale") .attr("num_samples") .attr("num_orientations") .attr("clockwise") .input(2) .output(1) .apply(riroi_align_rotated_forward_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(riroi_align_rotated_backward) .attr("pooled_height") .attr("pooled_width") .attr("spatial_scale") .attr("num_samples") .attr("num_orientations") .attr("clockwise") .input(2) .output(1) .apply(riroi_align_rotated_backward_cuda_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/riroi_align_rotated_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef RIROI_ALIGN_ROTATED_PYTORCH_H #define RIROI_ALIGN_ROTATED_PYTORCH_H #include using namespace at; void riroi_align_rotated_forward(Tensor features, Tensor rois, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int num_samples, int num_orientations, bool clockwise); void riroi_align_rotated_backward(Tensor top_grad, Tensor rois, Tensor bottom_grad, int pooled_height, int pooled_width, float spatial_scale, int num_samples, int num_orientations, bool clockwise); #endif // RIROI_ALIGN_ROTATED_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roi_align.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void roi_align_forward_impl(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned) { DISPATCH_DEVICE_IMPL(roi_align_forward_impl, input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); } void roi_align_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax_y, Tensor argmax_x, Tensor grad_input, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned) { DISPATCH_DEVICE_IMPL(roi_align_backward_impl, grad_output, rois, argmax_y, argmax_x, grad_input, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); } void roi_align_forward(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned) { roi_align_forward_impl(input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); } void roi_align_backward(Tensor grad_output, Tensor rois, Tensor argmax_y, Tensor argmax_x, Tensor grad_input, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned) { roi_align_backward_impl(grad_output, rois, argmax_y, argmax_x, grad_input, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roi_align_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "roi_align_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void roi_align_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int aligned_height; int aligned_width; float spatial_scale; int sampling_ratio; int pool_mode; bool aligned; SSAttrs(attr) .get("aligned_height", aligned_height) .get("aligned_width", aligned_width) .get("spatial_scale", spatial_scale) .get("sampling_ratio", sampling_ratio) .get("pool_mode", pool_mode) .get("aligned", aligned) .done(); const auto& input = buildATensor(ctx, ins[0]); const auto& rois = buildATensor(ctx, ins[1]); auto output = buildATensor(ctx, outs[0]); auto argmax_y = buildATensor(ctx, outs[1]); auto argmax_x = buildATensor(ctx, outs[2]); roi_align_forward_cuda(input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); } void roi_align_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int aligned_height; int aligned_width; float spatial_scale; int sampling_ratio; int pool_mode; bool aligned; SSAttrs(attr) .get("aligned_height", aligned_height) .get("aligned_width", aligned_width) .get("spatial_scale", spatial_scale) .get("sampling_ratio", sampling_ratio) .get("pool_mode", pool_mode) .get("aligned", aligned) .done(); const auto& grad_output = buildATensor(ctx, ins[0]); const auto& rois = buildATensor(ctx, ins[1]); const auto& argmax_y = buildATensor(ctx, ins[2]); const auto& argmax_x = buildATensor(ctx, ins[3]); auto grad_input = buildATensor(ctx, outs[0]); roi_align_backward_cuda(grad_output, rois, argmax_y, argmax_x, grad_input, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); } #endif void roi_align_forward_cpu_parrots(HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int aligned_height; int aligned_width; float spatial_scale; int sampling_ratio; int pool_mode; bool aligned; SSAttrs(attr) .get("aligned_height", aligned_height) .get("aligned_width", aligned_width) .get("spatial_scale", spatial_scale) .get("sampling_ratio", sampling_ratio) .get("pool_mode", pool_mode) .get("aligned", aligned) .done(); const auto& input = buildATensor(ctx, ins[0]); const auto& rois = buildATensor(ctx, ins[1]); auto output = buildATensor(ctx, outs[0]); auto argmax_y = buildATensor(ctx, outs[1]); auto argmax_x = buildATensor(ctx, outs[2]); roi_align_forward_cpu(input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); } void roi_align_backward_cpu_parrots(HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int aligned_height; int aligned_width; float spatial_scale; int sampling_ratio; int pool_mode; bool aligned; SSAttrs(attr) .get("aligned_height", aligned_height) .get("aligned_width", aligned_width) .get("spatial_scale", spatial_scale) .get("sampling_ratio", sampling_ratio) .get("pool_mode", pool_mode) .get("aligned", aligned) .done(); const auto& grad_output = buildATensor(ctx, ins[0]); const auto& rois = buildATensor(ctx, ins[1]); const auto& argmax_y = buildATensor(ctx, ins[2]); const auto& argmax_x = buildATensor(ctx, ins[3]); auto grad_input = buildATensor(ctx, outs[0]); roi_align_backward_cpu(grad_output, rois, argmax_y, argmax_x, grad_input, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); } PARROTS_EXTENSION_REGISTER(roi_align_forward) .attr("aligned_height") .attr("aligned_width") .attr("spatial_scale") .attr("sampling_ratio") .attr("pool_mode") .attr("aligned") .input(2) .output(3) .apply(roi_align_forward_cpu_parrots) #ifdef MMCV_WITH_CUDA .apply(roi_align_forward_cuda_parrots) #endif .done(); PARROTS_EXTENSION_REGISTER(roi_align_backward) .attr("aligned_height") .attr("aligned_width") .attr("spatial_scale") .attr("sampling_ratio") .attr("pool_mode") .attr("aligned") .input(4) .output(1) .apply(roi_align_backward_cpu_parrots) #ifdef MMCV_WITH_CUDA .apply(roi_align_backward_cuda_parrots) #endif .done(); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roi_align_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef ROI_ALIGN_PYTORCH_H #define ROI_ALIGN_PYTORCH_H #include using namespace at; #ifdef MMCV_WITH_CUDA void roi_align_forward_cuda(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned); void roi_align_backward_cuda(Tensor grad_output, Tensor rois, Tensor argmax_y, Tensor argmax_x, Tensor grad_input, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned); #endif void roi_align_forward_cpu(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned); void roi_align_backward_cpu(Tensor grad_output, Tensor rois, Tensor argmax_y, Tensor argmax_x, Tensor grad_input, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned); #endif // ROI_ALIGN_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roi_align_rotated.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void roi_align_rotated_forward_impl(Tensor features, Tensor rois, Tensor output, int aligned_height, int aligned_width, float spatial_scale, int sample_ratio, bool aligned, bool clockwise) { DISPATCH_DEVICE_IMPL(roi_align_rotated_forward_impl, features, rois, output, aligned_height, aligned_width, spatial_scale, sample_ratio, aligned, clockwise); } void roi_align_rotated_backward_impl(Tensor top_grad, Tensor rois, Tensor bottom_grad, int aligned_height, int aligned_width, float spatial_scale, int sample_ratio, bool aligned, bool clockwise) { DISPATCH_DEVICE_IMPL(roi_align_rotated_backward_impl, top_grad, rois, bottom_grad, aligned_height, aligned_width, spatial_scale, sample_ratio, aligned, clockwise); } void roi_align_rotated_forward(Tensor input, Tensor rois, Tensor output, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, bool aligned, bool clockwise) { roi_align_rotated_forward_impl(input, rois, output, aligned_height, aligned_width, spatial_scale, sampling_ratio, aligned, clockwise); } void roi_align_rotated_backward(Tensor top_grad, Tensor rois, Tensor bottom_grad, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, bool aligned, bool clockwise) { roi_align_rotated_backward_impl(top_grad, rois, bottom_grad, aligned_height, aligned_width, spatial_scale, sampling_ratio, aligned, clockwise); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roi_align_rotated_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "roi_align_rotated_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void roi_align_rotated_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int pooled_height; int pooled_width; float spatial_scale; int sample_num; bool aligned; bool clockwise; SSAttrs(attr) .get("pooled_height", pooled_height) .get("pooled_width", pooled_width) .get("spatial_scale", spatial_scale) .get("sample_num", sample_num) .get("aligned", aligned) .get("clockwise", clockwise) .done(); const auto& input = buildATensor(ctx, ins[0]); const auto& rois = buildATensor(ctx, ins[1]); auto output = buildATensor(ctx, outs[0]); roi_align_rotated_forward_cuda(input, rois, output, pooled_height, pooled_width, spatial_scale, sample_num, aligned, clockwise); } void roi_align_rotated_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int pooled_height; int pooled_width; float spatial_scale; int sample_num; bool aligned; bool clockwise; SSAttrs(attr) .get("pooled_height", pooled_height) .get("pooled_width", pooled_width) .get("spatial_scale", spatial_scale) .get("sample_num", sample_num) .get("aligned", aligned) .get("clockwise", clockwise) .done(); const auto& grad_output = buildATensor(ctx, ins[0]); const auto& rois = buildATensor(ctx, ins[1]); auto grad_input = buildATensor(ctx, outs[0]); roi_align_rotated_backward_cuda(grad_output, rois, grad_input, pooled_height, pooled_width, spatial_scale, sample_num, aligned, clockwise); } #endif void roi_align_rotated_forward_cpu_parrots(HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int pooled_height; int pooled_width; float spatial_scale; int sample_num; bool aligned; bool clockwise; SSAttrs(attr) .get("pooled_height", pooled_height) .get("pooled_width", pooled_width) .get("spatial_scale", spatial_scale) .get("sample_num", sample_num) .get("aligned", aligned) .get("clockwise", clockwise) .done(); const auto& input = buildATensor(ctx, ins[0]); const auto& rois = buildATensor(ctx, ins[1]); auto output = buildATensor(ctx, outs[0]); roi_align_rotated_forward_cpu(input, rois, output, pooled_height, pooled_width, spatial_scale, sample_num, aligned, clockwise); } void roi_align_rotated_backward_cpu_parrots(HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int pooled_height; int pooled_width; float spatial_scale; int sample_num; bool aligned; bool clockwise; SSAttrs(attr) .get("pooled_height", pooled_height) .get("pooled_width", pooled_width) .get("spatial_scale", spatial_scale) .get("sample_num", sample_num) .get("aligned", aligned) .get("clockwise", clockwise) .done(); const auto& grad_output = buildATensor(ctx, ins[0]); const auto& rois = buildATensor(ctx, ins[1]); auto grad_input = buildATensor(ctx, outs[0]); roi_align_rotated_backward_cpu(grad_output, rois, grad_input, pooled_height, pooled_width, spatial_scale, sample_num, aligned, clockwise); } PARROTS_EXTENSION_REGISTER(roi_align_rotated_forward) .attr("pooled_height") .attr("pooled_width") .attr("spatial_scale") .attr("sample_num") .attr("aligned") .attr("clockwise") .input(2) .output(1) .apply(roi_align_rotated_forward_cpu_parrots) #ifdef MMCV_WITH_CUDA .apply(roi_align_rotated_forward_cuda_parrots) #endif .done(); PARROTS_EXTENSION_REGISTER(roi_align_rotated_backward) .attr("pooled_height") .attr("pooled_width") .attr("spatial_scale") .attr("sample_num") .attr("aligned") .attr("clockwise") .input(2) .output(1) .apply(roi_align_rotated_backward_cpu_parrots) #ifdef MMCV_WITH_CUDA .apply(roi_align_rotated_backward_cuda_parrots) #endif .done(); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roi_align_rotated_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef ROI_ALIGN_ROTATED_PYTORCH_H #define ROI_ALIGN_ROTATED_PYTORCH_H #include using namespace at; #ifdef MMCV_WITH_CUDA void roi_align_rotated_forward_cuda(Tensor features, Tensor rois, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int sample_num, bool aligned, bool clockwise); void roi_align_rotated_backward_cuda(Tensor grad_output, Tensor rois, Tensor bottom_grad, int pooled_height, int pooled_width, float spatial_scale, int sample_num, bool aligned, bool clockwise); #endif void roi_align_rotated_forward_cpu(Tensor features, Tensor rois, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int sample_num, bool aligned, bool clockwise); void roi_align_rotated_backward_cpu(Tensor grad_output, Tensor rois, Tensor bottom_grad, int pooled_height, int pooled_width, float spatial_scale, int sample_num, bool aligned, bool clockwise); #endif // ROI_ALIGN_ROTATED_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roi_pool.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void roi_pool_forward_impl(Tensor input, Tensor rois, Tensor output, Tensor argmax, int pooled_height, int pooled_width, float spatial_scale) { DISPATCH_DEVICE_IMPL(roi_pool_forward_impl, input, rois, output, argmax, pooled_height, pooled_width, spatial_scale); } void roi_pool_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax, Tensor grad_input, int pooled_height, int pooled_width, float spatial_scale) { DISPATCH_DEVICE_IMPL(roi_pool_backward_impl, grad_output, rois, argmax, grad_input, pooled_height, pooled_width, spatial_scale); } void roi_pool_forward(Tensor input, Tensor rois, Tensor output, Tensor argmax, int pooled_height, int pooled_width, float spatial_scale) { roi_pool_forward_impl(input, rois, output, argmax, pooled_height, pooled_width, spatial_scale); } void roi_pool_backward(Tensor grad_output, Tensor rois, Tensor argmax, Tensor grad_input, int pooled_height, int pooled_width, float spatial_scale) { roi_pool_backward_impl(grad_output, rois, argmax, grad_input, pooled_height, pooled_width, spatial_scale); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roi_pool_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "roi_pool_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void roi_pool_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int pooled_height; int pooled_width; float spatial_scale; SSAttrs(attr) .get("pooled_height", pooled_height) .get("pooled_width", pooled_width) .get("spatial_scale", spatial_scale) .done(); const auto& input = buildATensor(ctx, ins[0]); const auto& rois = buildATensor(ctx, ins[1]); auto output = buildATensor(ctx, outs[0]); auto argmax = buildATensor(ctx, outs[1]); roi_pool_forward_cuda(input, rois, output, argmax, pooled_height, pooled_width, spatial_scale); } void roi_pool_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int pooled_height; int pooled_width; float spatial_scale; SSAttrs(attr) .get("pooled_height", pooled_height) .get("pooled_width", pooled_width) .get("spatial_scale", spatial_scale) .done(); const auto& grad_output = buildATensor(ctx, ins[0]); const auto& rois = buildATensor(ctx, ins[1]); const auto& argmax = buildATensor(ctx, ins[2]); auto grad_input = buildATensor(ctx, outs[0]); roi_pool_backward_cuda(grad_output, rois, argmax, grad_input, pooled_height, pooled_width, spatial_scale); } PARROTS_EXTENSION_REGISTER(roi_pool_forward) .attr("pooled_height") .attr("pooled_width") .attr("spatial_scale") .input(2) .output(2) .apply(roi_pool_forward_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(roi_pool_backward) .attr("pooled_height") .attr("pooled_width") .attr("spatial_scale") .input(3) .output(1) .apply(roi_pool_backward_cuda_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roi_pool_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef ROI_POOL_PYTORCH_H #define ROI_POOL_PYTORCH_H #include using namespace at; #ifdef MMCV_WITH_CUDA void roi_pool_forward_cuda(Tensor input, Tensor rois, Tensor output, Tensor argmax, int pooled_height, int pooled_width, float spatial_scale); void roi_pool_backward_cuda(Tensor grad_output, Tensor rois, Tensor argmax, Tensor grad_input, int pooled_height, int pooled_width, float spatial_scale); #endif #endif // ROI_POOL_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roiaware_pool3d.cpp ================================================ #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void roiaware_pool3d_forward_impl(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int out_y, int out_z, const Tensor rois, const Tensor pts, const Tensor pts_feature, Tensor argmax, Tensor pts_idx_of_voxels, Tensor pooled_features, int pool_method) { DISPATCH_DEVICE_IMPL(roiaware_pool3d_forward_impl, boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z, rois, pts, pts_feature, argmax, pts_idx_of_voxels, pooled_features, pool_method); } void roiaware_pool3d_backward_impl(int boxes_num, int out_x, int out_y, int out_z, int channels, int max_pts_each_voxel, const Tensor pts_idx_of_voxels, const Tensor argmax, const Tensor grad_out, Tensor grad_in, int pool_method) { DISPATCH_DEVICE_IMPL(roiaware_pool3d_backward_impl, boxes_num, out_x, out_y, out_z, channels, max_pts_each_voxel, pts_idx_of_voxels, argmax, grad_out, grad_in, pool_method); } void roiaware_pool3d_forward(Tensor rois, Tensor pts, Tensor pts_feature, Tensor argmax, Tensor pts_idx_of_voxels, Tensor pooled_features, int pool_method) { // params rois: (N, 7) [x, y, z, x_size, y_size, z_size, ry] in LiDAR // coordinate // params pts: (npoints, 3) [x, y, z] in LiDAR coordinate // params pts_feature: (npoints, C) // params argmax: (N, out_x, out_y, out_z, C) // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel) // params pooled_features: (N, out_x, out_y, out_z, C) // params pool_method: 0: max_pool 1: avg_pool int boxes_num = rois.size(0); int pts_num = pts.size(0); int channels = pts_feature.size(1); int max_pts_each_voxel = pts_idx_of_voxels.size(4); // index 0 is the counter int out_x = pts_idx_of_voxels.size(1); int out_y = pts_idx_of_voxels.size(2); int out_z = pts_idx_of_voxels.size(3); assert((out_x < 256) && (out_y < 256) && (out_z < 256)); // we encode index with 8bit roiaware_pool3d_forward_impl(boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z, rois, pts, pts_feature, argmax, pts_idx_of_voxels, pooled_features, pool_method); } void roiaware_pool3d_backward(Tensor pts_idx_of_voxels, Tensor argmax, Tensor grad_out, Tensor grad_in, int pool_method) { // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel) // params argmax: (N, out_x, out_y, out_z, C) // params grad_out: (N, out_x, out_y, out_z, C) // params grad_in: (npoints, C), return value // params pool_method: 0: max_pool 1: avg_pool int boxes_num = pts_idx_of_voxels.size(0); int out_x = pts_idx_of_voxels.size(1); int out_y = pts_idx_of_voxels.size(2); int out_z = pts_idx_of_voxels.size(3); int max_pts_each_voxel = pts_idx_of_voxels.size(4); // index 0 is the counter int channels = grad_out.size(4); roiaware_pool3d_backward_impl(boxes_num, out_x, out_y, out_z, channels, max_pts_each_voxel, pts_idx_of_voxels, argmax, grad_out, grad_in, pool_method); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roiaware_pool3d_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "roiaware_pool3d_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void roiaware_pool3d_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int pool_method; SSAttrs(attr).get("pool_method", pool_method).done(); auto rois = buildATensor(ctx, ins[0]); auto pts = buildATensor(ctx, ins[1]); auto pts_feature = buildATensor(ctx, ins[2]); auto argmax = buildATensor(ctx, outs[0]); auto pts_idx_of_voxels = buildATensor(ctx, outs[1]); auto pooled_features = buildATensor(ctx, outs[2]); roiaware_pool3d_forward(rois, pts, pts_feature, argmax, pts_idx_of_voxels, pooled_features, pool_method); } void roiaware_pool3d_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int pool_method; SSAttrs(attr).get("pool_method", pool_method).done(); auto pts_idx_of_voxels = buildATensor(ctx, ins[0]); auto argmax = buildATensor(ctx, ins[1]); auto grad_out = buildATensor(ctx, ins[2]); auto grad_in = buildATensor(ctx, outs[0]); roiaware_pool3d_backward(pts_idx_of_voxels, argmax, grad_out, grad_in, pool_method); } PARROTS_EXTENSION_REGISTER(roiaware_pool3d_forward) .attr("pool_method") .input(3) .output(3) .apply(roiaware_pool3d_forward_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(roiaware_pool3d_backward) .attr("pool_method") .input(3) .output(1) .apply(roiaware_pool3d_backward_cuda_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roiaware_pool3d_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef ROIAWARE_POOL3D_PYTORCH_H #define ROIAWARE_POOL3D_PYTORCH_H #include using namespace at; void roiaware_pool3d_forward(Tensor rois, Tensor pts, Tensor pts_feature, Tensor argmax, Tensor pts_idx_of_voxels, Tensor pooled_features, int pool_method); void roiaware_pool3d_backward(Tensor pts_idx_of_voxels, Tensor argmax, Tensor grad_out, Tensor grad_in, int pool_method); #endif // ROIAWARE_POOL3D_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roipoint_pool3d.cpp ================================================ /* Modified from https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/roipoint_pool3d/src/roipoint_pool3d.cpp Point cloud feature pooling Written by Shaoshuai Shi All Rights Reserved 2018. */ #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void roipoint_pool3d_forward_impl(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, const Tensor xyz, const Tensor boxes3d, const Tensor pts_feature, Tensor pooled_features, Tensor pooled_empty_flag) { DISPATCH_DEVICE_IMPL(roipoint_pool3d_forward_impl, batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, xyz, boxes3d, pts_feature, pooled_features, pooled_empty_flag); } void roipoint_pool3d_forward(Tensor xyz, Tensor boxes3d, Tensor pts_feature, Tensor pooled_features, Tensor pooled_empty_flag) { // params xyz: (B, N, 3) // params boxes3d: (B, M, 7) // params pts_feature: (B, N, C) // params pooled_features: (B, M, 512, 3+C) // params pooled_empty_flag: (B, M) int batch_size = xyz.size(0); int pts_num = xyz.size(1); int boxes_num = boxes3d.size(1); int feature_in_len = pts_feature.size(2); int sampled_pts_num = pooled_features.size(2); roipoint_pool3d_forward_impl(batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, xyz, boxes3d, pts_feature, pooled_features, pooled_empty_flag); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roipoint_pool3d_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "roipoint_pool3d_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void roipoint_pool3d_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { auto xyz = buildATensor(ctx, ins[0]); auto boxes3d = buildATensor(ctx, ins[1]); auto pts_feature = buildATensor(ctx, ins[2]); auto pooled_features = buildATensor(ctx, outs[0]); auto pooled_empty_flag = buildATensor(ctx, outs[1]); roipoint_pool3d_forward(xyz, boxes3d, pts_feature, pooled_features, pooled_empty_flag); } PARROTS_EXTENSION_REGISTER(roipoint_pool3d_forward) .input(3) .output(2) .apply(roipoint_pool3d_forward_cuda_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roipoint_pool3d_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef ROIPOINT_POOL3D_PYTORCH_H #define ROIPOINT_POOL3D_PYTORCH_H #include using namespace at; void roipoint_pool3d_forward(Tensor xyz, Tensor boxes3d, Tensor pts_feature, Tensor pooled_features, Tensor pooled_empty_flag); #endif // ROIPOINT_POOL3D_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/rotated_feature_align.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved. // Modified from // https://github.com/SJTU-Thinklab-Det/r3det-on-mmdetection/blob/master/mmdet/ops/fr/src/feature_refine_cuda.cpp #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void rotated_feature_align_forward_impl(const Tensor features, const Tensor best_bboxes, const float spatial_scale, const int points, Tensor output) { DISPATCH_DEVICE_IMPL(rotated_feature_align_forward_impl, features, best_bboxes, spatial_scale, points, output); } void rotated_feature_align_backward_impl(const Tensor top_grad, const Tensor best_bboxes, const float spatial_scale, const int points, Tensor bottom_grad) { DISPATCH_DEVICE_IMPL(rotated_feature_align_backward_impl, top_grad, best_bboxes, spatial_scale, points, bottom_grad); } void rotated_feature_align_forward(const Tensor features, const Tensor best_bboxes, Tensor output, const float spatial_scale, const int points) { rotated_feature_align_forward_impl(features, best_bboxes, spatial_scale, points, output); } void rotated_feature_align_backward(const Tensor top_grad, const Tensor best_bboxes, Tensor bottom_grad, const float spatial_scale, const int points) { rotated_feature_align_backward_impl(top_grad, best_bboxes, spatial_scale, points, bottom_grad); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/rotated_feature_align_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "rotated_feature_align_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void rotated_feature_align_forward_cuda_parrots( CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { float spatial_scale; int points; SSAttrs(attr) .get("spatial_scale", spatial_scale) .get("points", points) .done(); auto features = buildATensor(ctx, ins[0]); auto best_bboxes = buildATensor(ctx, ins[1]); auto output = buildATensor(ctx, outs[0]); rotated_feature_align_forward(features, best_bboxes, output, spatial_scale, points); } void rotated_feature_align_backward_cuda_parrots( CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { float spatial_scale; int points; SSAttrs(attr) .get("spatial_scale", spatial_scale) .get("points", points) .done(); auto grad_output = buildATensor(ctx, ins[0]); auto best_bboxes = buildATensor(ctx, ins[1]); auto grad_input = buildATensor(ctx, outs[0]); rotated_feature_align_backward(grad_output, best_bboxes, grad_input, spatial_scale, points); } PARROTS_EXTENSION_REGISTER(rotated_feature_align_forward) .attr("spatial_scale") .attr("points") .input(2) .output(1) .apply(rotated_feature_align_forward_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(rotated_feature_align_backward) .attr("spatial_scale") .attr("points") .input(2) .output(1) .apply(rotated_feature_align_backward_cuda_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/rotated_feature_align_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef ROTATED_FEATURE_ALIGN_PYTORCH_H #define ROTATED_FEATURE_ALIGN_PYTORCH_H #include using namespace at; void rotated_feature_align_forward(const Tensor features, const Tensor best_bboxes, Tensor output, const float spatial_scale, const int points); void rotated_feature_align_backward(const Tensor top_grad, const Tensor best_bboxes, Tensor bottom_grad, const float spatial_scale, const int points); #endif // ROTATED_FEATURE_ALIGN_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/sync_bn.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void sync_bn_forward_mean_impl(const Tensor input, Tensor mean) { DISPATCH_DEVICE_IMPL(sync_bn_forward_mean_impl, input, mean); } void sync_bn_forward_var_impl(const Tensor input, const Tensor mean, Tensor var) { DISPATCH_DEVICE_IMPL(sync_bn_forward_var_impl, input, mean, var); } void sync_bn_forward_output_impl(const Tensor input, const Tensor mean, const Tensor var, Tensor running_mean, Tensor running_var, const Tensor weight, const Tensor bias, Tensor norm, Tensor std, Tensor output, float eps, float momentum, int group_size) { DISPATCH_DEVICE_IMPL(sync_bn_forward_output_impl, input, mean, var, running_mean, running_var, weight, bias, norm, std, output, eps, momentum, group_size); } void sync_bn_backward_param_impl(const Tensor grad_output, const Tensor norm, Tensor grad_weight, Tensor grad_bias) { DISPATCH_DEVICE_IMPL(sync_bn_backward_param_impl, grad_output, norm, grad_weight, grad_bias); } void sync_bn_backward_data_impl(const Tensor grad_output, const Tensor weight, const Tensor grad_weight, const Tensor grad_bias, const Tensor norm, const Tensor std, Tensor grad_input) { DISPATCH_DEVICE_IMPL(sync_bn_backward_data_impl, grad_output, weight, grad_weight, grad_bias, norm, std, grad_input); } void sync_bn_forward_mean(const Tensor input, Tensor mean) { sync_bn_forward_mean_impl(input, mean); } void sync_bn_forward_var(const Tensor input, const Tensor mean, Tensor var) { sync_bn_forward_var_impl(input, mean, var); } void sync_bn_forward_output(const Tensor input, const Tensor mean, const Tensor var, const Tensor weight, const Tensor bias, Tensor running_mean, Tensor running_var, Tensor norm, Tensor std, Tensor output, float eps, float momentum, int group_size) { sync_bn_forward_output_impl(input, mean, var, running_mean, running_var, weight, bias, norm, std, output, eps, momentum, group_size); } void sync_bn_backward_param(const Tensor grad_output, const Tensor norm, Tensor grad_weight, Tensor grad_bias) { sync_bn_backward_param_impl(grad_output, norm, grad_weight, grad_bias); } void sync_bn_backward_data(const Tensor grad_output, const Tensor weight, const Tensor grad_weight, const Tensor grad_bias, const Tensor norm, const Tensor std, Tensor grad_input) { sync_bn_backward_data_impl(grad_output, weight, grad_weight, grad_bias, norm, std, grad_input); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/sync_bn_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "sync_bn_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void sync_bn_forward_mean_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { const auto& input = buildATensor(ctx, ins[0]); auto mean = buildATensor(ctx, outs[0]); sync_bn_forward_mean_cuda(input, mean); } void sync_bn_forward_var_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { const auto& input = buildATensor(ctx, ins[0]); const auto& mean = buildATensor(ctx, ins[1]); auto var = buildATensor(ctx, outs[0]); sync_bn_forward_var_cuda(input, mean, var); } void sync_bn_forward_output_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { size_t group_size; float eps, momentum; SSAttrs(attr) .get("eps", eps) .get("momentum", momentum) .get("group_size", group_size) .done(); const auto& input = buildATensor(ctx, ins[0]); const auto& mean = buildATensor(ctx, ins[1]); const auto& var = buildATensor(ctx, ins[2]); const auto& weight = buildATensor(ctx, ins[3]); const auto& bias = buildATensor(ctx, ins[4]); auto running_mean = buildATensor(ctx, outs[0]); auto running_var = buildATensor(ctx, outs[1]); auto norm = buildATensor(ctx, outs[2]); auto std = buildATensor(ctx, outs[3]); auto output = buildATensor(ctx, outs[4]); sync_bn_forward_output_cuda(input, mean, var, running_mean, running_var, weight, bias, norm, std, output, eps, momentum, group_size); } void sync_bn_backward_param_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { const auto& grad_output = buildATensor(ctx, ins[0]); const auto& norm = buildATensor(ctx, ins[1]); auto grad_weight = buildATensor(ctx, outs[0]); auto grad_bias = buildATensor(ctx, outs[1]); sync_bn_backward_param_cuda(grad_output, norm, grad_weight, grad_bias); } void sync_bn_backward_data_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { const auto& grad_output = buildATensor(ctx, ins[0]); const auto& weight = buildATensor(ctx, ins[1]); const auto& grad_weight = buildATensor(ctx, ins[2]); const auto& grad_bias = buildATensor(ctx, ins[3]); const auto& norm = buildATensor(ctx, ins[4]); const auto& std = buildATensor(ctx, ins[5]); auto grad_input = buildATensor(ctx, outs[0]); sync_bn_backward_data_cuda(grad_output, weight, grad_weight, grad_bias, norm, std, grad_input); } PARROTS_EXTENSION_REGISTER(sync_bn_forward_mean) .input(1) .output(1) .apply(sync_bn_forward_mean_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(sync_bn_forward_var) .input(2) .output(1) .apply(sync_bn_forward_var_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(sync_bn_forward_output) .attr("eps") .attr("momentum") .attr("group_size") .input(5) .output(5) .apply(sync_bn_forward_output_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(sync_bn_backward_param) .input(2) .output(2) .apply(sync_bn_backward_param_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(sync_bn_backward_data) .input(6) .output(1) .apply(sync_bn_backward_data_cuda_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/sync_bn_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef SYNC_BN_PYTORCH_H #define SYNC_BN_PYTORCH_H #include using namespace at; void sync_bn_forward_mean_cuda(const Tensor input, Tensor mean); void sync_bn_forward_var_cuda(const Tensor input, const Tensor mean, Tensor var); void sync_bn_forward_output_cuda(const Tensor input, const Tensor mean, const Tensor var, Tensor running_mean, Tensor running_var, const Tensor weight, const Tensor bias, Tensor norm, Tensor std, Tensor output, float eps, float momentum, int group_size); void sync_bn_backward_param_cuda(const Tensor grad_output, const Tensor norm, Tensor grad_weight, Tensor grad_bias); void sync_bn_backward_data_cuda(const Tensor grad_output, const Tensor weight, const Tensor grad_weight, const Tensor grad_bias, const Tensor norm, const Tensor std, Tensor grad_input); #endif // SYNC_BN_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/three_interpolate.cpp ================================================ // Modified from // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate.cpp #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void three_interpolate_forward_impl(int b, int c, int m, int n, const Tensor points, const Tensor idx, const Tensor weight, Tensor out) { DISPATCH_DEVICE_IMPL(three_interpolate_forward_impl, b, c, m, n, points, idx, weight, out); } void three_interpolate_backward_impl(int b, int c, int n, int m, const Tensor grad_out, const Tensor idx, const Tensor weight, Tensor grad_points) { DISPATCH_DEVICE_IMPL(three_interpolate_backward_impl, b, c, n, m, grad_out, idx, weight, grad_points); } void three_interpolate_forward(Tensor points_tensor, Tensor idx_tensor, Tensor weight_tensor, Tensor out_tensor, int b, int c, int m, int n) { three_interpolate_forward_impl(b, c, m, n, points_tensor, idx_tensor, weight_tensor, out_tensor); } void three_interpolate_backward(Tensor grad_out_tensor, Tensor idx_tensor, Tensor weight_tensor, Tensor grad_points_tensor, int b, int c, int n, int m) { three_interpolate_backward_impl(b, c, n, m, grad_out_tensor, idx_tensor, weight_tensor, grad_points_tensor); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/three_interpolate_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "three_interpolate_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void three_interpolate_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int b, c, m, n; SSAttrs(attr) .get("b", b) .get("c", c) .get("m", m) .get("n", n) .done(); auto points_tensor = buildATensor(ctx, ins[0]); auto idx_tensor = buildATensor(ctx, ins[1]); auto weight_tensor = buildATensor(ctx, ins[2]); auto out_tensor = buildATensor(ctx, outs[0]); three_interpolate_forward(points_tensor, idx_tensor, weight_tensor, out_tensor, b, c, m, n); } void three_interpolate_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int b, c, n, m; SSAttrs(attr) .get("b", b) .get("c", c) .get("n", n) .get("m", m) .done(); auto grad_out_tensor = buildATensor(ctx, ins[0]); auto idx_tensor = buildATensor(ctx, ins[1]); auto weight_tensor = buildATensor(ctx, ins[2]); auto grad_points_tensor = buildATensor(ctx, outs[0]); three_interpolate_backward(grad_out_tensor, idx_tensor, weight_tensor, grad_points_tensor, b, c, n, m); } PARROTS_EXTENSION_REGISTER(three_interpolate_forward) .attr("b") .attr("c") .attr("m") .attr("n") .input(3) .output(1) .apply(three_interpolate_forward_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(three_interpolate_backward) .attr("b") .attr("c") .attr("n") .attr("m") .input(3) .output(1) .apply(three_interpolate_backward_cuda_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/three_interpolate_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef THREE_INTERPOLATE_PYTORCH_H #define THREE_INTERPOLATE_PYTORCH_H #include using namespace at; void three_interpolate_forward(Tensor points_tensor, Tensor idx_tensor, Tensor weight_tensor, Tensor out_tensor, int b, int c, int m, int n); void three_interpolate_backward(Tensor grad_out_tensor, Tensor idx_tensor, Tensor weight_tensor, Tensor grad_points_tensor, int b, int c, int n, int m); #endif // THREE_INTERPOLATE_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/three_nn.cpp ================================================ // Modified from // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate.cpp #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void three_nn_forward_impl(int b, int n, int m, const Tensor unknown, const Tensor known, Tensor dist2, Tensor idx) { DISPATCH_DEVICE_IMPL(three_nn_forward_impl, b, n, m, unknown, known, dist2, idx); } void three_nn_forward(Tensor unknown_tensor, Tensor known_tensor, Tensor dist2_tensor, Tensor idx_tensor, int b, int n, int m) { three_nn_forward_impl(b, n, m, unknown_tensor, known_tensor, dist2_tensor, idx_tensor); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/three_nn_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "three_nn_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void three_nn_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int b, n, m; SSAttrs(attr).get("b", b).get("n", n).get("m", m).done(); auto unknown_tensor = buildATensor(ctx, ins[0]); auto known_tensor = buildATensor(ctx, ins[1]); auto dist2_tensor = buildATensor(ctx, outs[0]); auto idx_tensor = buildATensor(ctx, outs[1]); three_nn_forward(unknown_tensor, known_tensor, dist2_tensor, idx_tensor, b, n, m); } PARROTS_EXTENSION_REGISTER(three_nn_forward) .attr("b") .attr("n") .attr("m") .input(2) .output(2) .apply(three_nn_forward_cuda_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/three_nn_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef THREE_NN_PYTORCH_H #define THREE_NN_PYTORCH_H #include using namespace at; void three_nn_forward(Tensor unknown_tensor, Tensor known_tensor, Tensor dist2_tensor, Tensor idx_tensor, int b, int n, int m); #endif // THREE_NN_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/tin_shift.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void tin_shift_forward_impl(Tensor input, Tensor shift, Tensor output) { DISPATCH_DEVICE_IMPL(tin_shift_forward_impl, input, shift, output); } void tin_shift_backward_impl(Tensor grad_output, Tensor shift, Tensor grad_input) { DISPATCH_DEVICE_IMPL(tin_shift_backward_impl, grad_output, shift, grad_input); } void tin_shift_forward(Tensor input, Tensor shift, Tensor output) { tin_shift_forward_impl(input, shift, output); } void tin_shift_backward(Tensor grad_output, Tensor shift, Tensor grad_input) { tin_shift_backward_impl(grad_output, shift, grad_input); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/tin_shift_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "tin_shift_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void tin_shift_forward_cuda_parrots(CudaContext &ctx, const SSElement &attr, const OperatorBase::in_list_t &ins, OperatorBase::out_list_t &outs) { const auto &input = buildATensor(ctx, ins[0]); const auto &shift = buildATensor(ctx, ins[1]); auto output = buildATensor(ctx, outs[0]); tin_shift_forward_cuda(input, shift, output); } void tin_shift_backward_cuda_parrots(CudaContext &ctx, const SSElement &attr, const OperatorBase::in_list_t &ins, OperatorBase::out_list_t &outs) { const auto &grad_output = buildATensor(ctx, ins[0]); const auto &shift = buildATensor(ctx, ins[1]); auto grad_input = buildATensor(ctx, outs[0]); tin_shift_backward_cuda(grad_output, shift, grad_input); } PARROTS_EXTENSION_REGISTER(tin_shift_forward) .input(2) .output(1) .apply(tin_shift_forward_cuda_parrots) .done(); PARROTS_EXTENSION_REGISTER(tin_shift_backward) .input(2) .output(1) .apply(tin_shift_backward_cuda_parrots) .done(); #endif ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/tin_shift_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef TIN_SHIFT_PYTORCH_H #define TIN_SHIFT_PYTORCH_H #include using namespace at; void tin_shift_forward_cuda(Tensor input, Tensor shift, Tensor output); void tin_shift_backward_cuda(Tensor grad_output, Tensor shift, Tensor grad_input); #endif // TIN_SHIFT_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/upfirdn2d.cpp ================================================ // Modified from // https://github.com/rosinality/stylegan2-pytorch/blob/master/op/upfirdn2d.cpp /* Copyright (c) 2021, NVIDIA Corporation. All rights reserved. NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator Augmentation (ADA) ======================================================================= 1. Definitions "Licensor" means any person or entity that distributes its Work. "Software" means the original work of authorship made available under this License. "Work" means the Software and any additions to or derivative works of the Software that are made available under this License. The terms "reproduce," "reproduction," "derivative works," and "distribution" have the meaning as provided under U.S. copyright law; provided, however, that for the purposes of this License, derivative works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work. Works, including the Software, are "made available" under this License by including in or with the Work either (a) a copyright notice referencing the applicability of this License to the Work, or (b) a copy of this License. 2. License Grants 2.1 Copyright Grant. Subject to the terms and conditions of this License, each Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free, copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense and distribute its Work and any resulting derivative works in any form. 3. Limitations 3.1 Redistribution. You may reproduce or distribute the Work only if (a) you do so under this License, (b) you include a complete copy of this License with your distribution, and (c) you retain without modification any copyright, patent, trademark, or attribution notices that are present in the Work. 3.2 Derivative Works. You may specify that additional or different terms apply to the use, reproduction, and distribution of your derivative works of the Work ("Your Terms") only if (a) Your Terms provide that the use limitation in Section 3.3 applies to your derivative works, and (b) you identify the specific derivative works that are subject to Your Terms. Notwithstanding Your Terms, this License (including the redistribution requirements in Section 3.1) will continue to apply to the Work itself. 3.3 Use Limitation. The Work and any derivative works thereof only may be used or intended for use non-commercially. Notwithstanding the foregoing, NVIDIA and its affiliates may use the Work and any derivative works commercially. As used herein, "non-commercially" means for research or evaluation purposes only. 3.4 Patent Claims. If you bring or threaten to bring a patent claim against any Licensor (including any claim, cross-claim or counterclaim in a lawsuit) to enforce any patents that you allege are infringed by any Work, then your rights under this License from such Licensor (including the grant in Section 2.1) will terminate immediately. 3.5 Trademarks. This License does not grant any rights to use any Licensor’s or its affiliates’ names, logos, or trademarks, except as necessary to reproduce the notices described in this License. 3.6 Termination. If you violate any term of this License, then your rights under this License (including the grant in Section 2.1) will terminate immediately. 4. Disclaimer of Warranty. THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE. 5. Limitation of Liability. EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. ======================================================================= */ #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" torch::Tensor upfirdn2d_op_impl(const torch::Tensor& input, const torch::Tensor& kernel, int up_x, int up_y, int down_x, int down_y, int pad_x0, int pad_x1, int pad_y0, int pad_y1) { return DISPATCH_DEVICE_IMPL(upfirdn2d_op_impl, input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1); } torch::Tensor upfirdn2d(const torch::Tensor& input, const torch::Tensor& kernel, int up_x, int up_y, int down_x, int down_y, int pad_x0, int pad_x1, int pad_y0, int pad_y1) { return upfirdn2d_op_impl(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/upfirdn2d_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include using namespace at; using namespace parrots; torch::Tensor upfirdn2d(const Tensor &input, const Tensor &kernel, int up_x, int up_y, int down_x, int down_y, int pad_x0, int pad_x1, int pad_y0, int pad_y1); void upfirdn2d_parrots(CudaContext &ctx, const SSElement &attr, const OperatorBase::in_list_t &ins, OperatorBase::out_list_t &outs) { int up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1; const auto &input = buildATensor(ctx, ins[0]); const auto &kernel = buildATensor(ctx, ins[1]); SSAttrs(attr) .get("up_x", up_x) .get("up_y", up_y) .get("down_x", down_x) .get("down_y", down_y) .get("pad_x0", pad_x0) .get("pad_x1", pad_x1) .get("pad_y0", pad_y0) .get("pad_y1", pad_y1) .done(); auto out = upfirdn2d(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1); updateDArray(ctx, out, outs[0]); } PARROTS_EXTENSION_REGISTER(upfirdn2d) .attr("up_x") .attr("up_y") .attr("down_x") .attr("down_y") .attr("pad_x0") .attr("pad_x1") .attr("pad_y0") .attr("pad_y1") .input(2) .output(1) .apply(upfirdn2d_parrots) .done(); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/voxelization.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved. #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" int hard_voxelize_forward_impl(const at::Tensor &points, at::Tensor &voxels, at::Tensor &coors, at::Tensor &num_points_per_voxel, const std::vector voxel_size, const std::vector coors_range, const int max_points, const int max_voxels, const int NDim = 3) { return DISPATCH_DEVICE_IMPL(hard_voxelize_forward_impl, points, voxels, coors, num_points_per_voxel, voxel_size, coors_range, max_points, max_voxels, NDim); } void dynamic_voxelize_forward_impl(const at::Tensor &points, at::Tensor &coors, const std::vector voxel_size, const std::vector coors_range, const int NDim = 3) { DISPATCH_DEVICE_IMPL(dynamic_voxelize_forward_impl, points, coors, voxel_size, coors_range, NDim); } void hard_voxelize_forward(const at::Tensor &points, const at::Tensor &voxel_size, const at::Tensor &coors_range, at::Tensor &voxels, at::Tensor &coors, at::Tensor &num_points_per_voxel, at::Tensor &voxel_num, const int max_points, const int max_voxels, const int NDim = 3) { int64_t *voxel_num_data = voxel_num.data_ptr(); std::vector voxel_size_v( voxel_size.data_ptr(), voxel_size.data_ptr() + voxel_size.numel()); std::vector coors_range_v( coors_range.data_ptr(), coors_range.data_ptr() + coors_range.numel()); *voxel_num_data = hard_voxelize_forward_impl( points, voxels, coors, num_points_per_voxel, voxel_size_v, coors_range_v, max_points, max_voxels, NDim); } void dynamic_voxelize_forward(const at::Tensor &points, const at::Tensor &voxel_size, const at::Tensor &coors_range, at::Tensor &coors, const int NDim = 3) { std::vector voxel_size_v( voxel_size.data_ptr(), voxel_size.data_ptr() + voxel_size.numel()); std::vector coors_range_v( coors_range.data_ptr(), coors_range.data_ptr() + coors_range.numel()); dynamic_voxelize_forward_impl(points, coors, voxel_size_v, coors_range_v, NDim); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/voxelization_parrots.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include "voxelization_pytorch.h" using namespace parrots; #ifdef MMCV_WITH_CUDA void hard_voxelize_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int max_points, max_voxels, NDim; SSAttrs(attr) .get("max_points", max_points) .get("max_voxels", max_voxels) .get("NDim", NDim) .done(); const auto& points = buildATensor(ctx, ins[0]); const auto& voxel_size = buildATensor(ctx, ins[1]); const auto& coors_range = buildATensor(ctx, ins[2]); auto voxels = buildATensor(ctx, outs[0]); auto coors = buildATensor(ctx, outs[1]); auto num_points_per_voxel = buildATensor(ctx, outs[2]); auto voxel_num = buildATensor(ctx, outs[3]); hard_voxelize_forward(points, voxel_size, coors_range, voxels, coors, num_points_per_voxel, voxel_num, max_points, max_voxels, NDim); } void dynamic_voxelize_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int NDim; SSAttrs(attr).get("NDim", NDim).done(); const auto& points = buildATensor(ctx, ins[0]); const auto& voxel_size = buildATensor(ctx, ins[1]); const auto& coors_range = buildATensor(ctx, ins[2]); auto coors = buildATensor(ctx, outs[0]); dynamic_voxelize_forward(points, voxel_size, coors_range, coors, NDim); } #endif void hard_voxelize_forward_cpu_parrots(HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int max_points, max_voxels, NDim; SSAttrs(attr) .get("max_points", max_points) .get("max_voxels", max_voxels) .get("NDim", NDim) .done(); const auto& points = buildATensor(ctx, ins[0]); const auto& voxel_size = buildATensor(ctx, ins[1]); const auto& coors_range = buildATensor(ctx, ins[2]); auto voxels = buildATensor(ctx, outs[0]); auto coors = buildATensor(ctx, outs[1]); auto num_points_per_voxel = buildATensor(ctx, outs[2]); auto voxel_num = buildATensor(ctx, outs[3]); hard_voxelize_forward(points, voxel_size, coors_range, voxels, coors, num_points_per_voxel, voxel_num, max_points, max_voxels, NDim); } void dynamic_voxelize_forward_cpu_parrots(HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins, OperatorBase::out_list_t& outs) { int NDim; SSAttrs(attr).get("NDim", NDim).done(); const auto& points = buildATensor(ctx, ins[0]); const auto& voxel_size = buildATensor(ctx, ins[1]); const auto& coors_range = buildATensor(ctx, ins[2]); auto coors = buildATensor(ctx, outs[0]); dynamic_voxelize_forward(points, voxel_size, coors_range, coors, NDim); } PARROTS_EXTENSION_REGISTER(hard_voxelize_forward) .attr("max_points") .attr("max_voxels") .attr("NDim") .input(3) .output(4) .apply(hard_voxelize_forward_cpu_parrots) #ifdef MMCV_WITH_CUDA .apply(hard_voxelize_forward_cuda_parrots) #endif .done(); PARROTS_EXTENSION_REGISTER(dynamic_voxelize_forward) .attr("NDim") .input(3) .output(1) .apply(dynamic_voxelize_forward_cpu_parrots) #ifdef MMCV_WITH_CUDA .apply(dynamic_voxelize_forward_cuda_parrots) #endif .done(); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/voxelization_pytorch.h ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef VOXELIZATION_PYTORCH_H #define VOXELIZATION_PYTORCH_H #include using namespace at; void hard_voxelize_forward(const at::Tensor &points, const at::Tensor &voxel_size, const at::Tensor &coors_range, at::Tensor &voxels, at::Tensor &coors, at::Tensor &num_points_per_voxel, at::Tensor &voxel_num, const int max_points, const int max_voxels, const int NDim = 3); void dynamic_voxelize_forward(const at::Tensor &points, const at::Tensor &voxel_size, const at::Tensor &coors_range, at::Tensor &coors, const int NDim = 3); #endif // VOXELIZATION_PYTORCH_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/active_rotated_filter.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved. // Modified from // https://github.com/csuhan/s2anet/blob/master/mmdet/ops/orn/src/ActiveRotatingFilter.h #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void active_rotated_filter_forward_impl(const Tensor input, const Tensor indices, Tensor output) { DISPATCH_DEVICE_IMPL(active_rotated_filter_forward_impl, input, indices, output); } void active_rotated_filter_backward_impl(const Tensor grad_out, const Tensor indices, Tensor grad_in) { DISPATCH_DEVICE_IMPL(active_rotated_filter_backward_impl, grad_out, indices, grad_in); } void active_rotated_filter_forward(const Tensor input, const Tensor indices, Tensor output) { active_rotated_filter_forward_impl(input, indices, output); } void active_rotated_filter_backward(const Tensor grad_out, const Tensor indices, Tensor grad_in) { active_rotated_filter_backward_impl(grad_out, indices, grad_in); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/assign_score_withk.cpp ================================================ // Modified from // https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/paconv_lib/src/gpu #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void assign_score_withk_forward_impl(int B, int N0, int N1, int M, int K, int O, int aggregate, const Tensor& points, const Tensor& centers, const Tensor& scores, const Tensor& knn_idx, Tensor& output) { DISPATCH_DEVICE_IMPL(assign_score_withk_forward_impl, B, N0, N1, M, K, O, aggregate, points, centers, scores, knn_idx, output); } void assign_score_withk_backward_impl( int B, int N0, int N1, int M, int K, int O, int aggregate, const Tensor& grad_out, const Tensor& points, const Tensor& centers, const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points, Tensor& grad_centers, Tensor& grad_scores) { DISPATCH_DEVICE_IMPL(assign_score_withk_backward_impl, B, N0, N1, M, K, O, aggregate, grad_out, points, centers, scores, knn_idx, grad_points, grad_centers, grad_scores); } void assign_score_withk_forward(const Tensor& points, const Tensor& centers, const Tensor& scores, const Tensor& knn_idx, Tensor& output, int B, int N0, int N1, int M, int K, int O, int aggregate) { assign_score_withk_forward_impl(B, N0, N1, M, K, O, aggregate, points, centers, scores, knn_idx, output); } void assign_score_withk_backward(const Tensor& grad_out, const Tensor& points, const Tensor& centers, const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points, Tensor& grad_centers, Tensor& grad_scores, int B, int N0, int N1, int M, int K, int O, int aggregate) { assign_score_withk_backward_impl(B, N0, N1, M, K, O, aggregate, grad_out, points, centers, scores, knn_idx, grad_points, grad_centers, grad_scores); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/ball_query.cpp ================================================ // Modified from // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/ball_query.cpp #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void ball_query_forward_impl(int b, int n, int m, float min_radius, float max_radius, int nsample, const Tensor new_xyz, const Tensor xyz, Tensor idx) { DISPATCH_DEVICE_IMPL(ball_query_forward_impl, b, n, m, min_radius, max_radius, nsample, new_xyz, xyz, idx); } void ball_query_forward(Tensor new_xyz_tensor, Tensor xyz_tensor, Tensor idx_tensor, int b, int n, int m, float min_radius, float max_radius, int nsample) { ball_query_forward_impl(b, n, m, min_radius, max_radius, nsample, new_xyz_tensor, xyz_tensor, idx_tensor); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/bbox_overlaps.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void bbox_overlaps_impl(const Tensor bboxes1, const Tensor bboxes2, Tensor ious, const int mode, const bool aligned, const int offset) { DISPATCH_DEVICE_IMPL(bbox_overlaps_impl, bboxes1, bboxes2, ious, mode, aligned, offset); } void bbox_overlaps(const Tensor bboxes1, const Tensor bboxes2, Tensor ious, const int mode, const bool aligned, const int offset) { bbox_overlaps_impl(bboxes1, bboxes2, ious, mode, aligned, offset); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/border_align.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void border_align_forward_impl(const Tensor &input, const Tensor &boxes, Tensor output, Tensor argmax_idx, const int pool_size) { DISPATCH_DEVICE_IMPL(border_align_forward_impl, input, boxes, output, argmax_idx, pool_size); } void border_align_backward_impl(const Tensor &grad_output, const Tensor &boxes, const Tensor &argmax_idx, Tensor grad_input, const int pool_size) { DISPATCH_DEVICE_IMPL(border_align_backward_impl, grad_output, boxes, argmax_idx, grad_input, pool_size); } void border_align_forward(const Tensor &input, const Tensor &boxes, Tensor output, Tensor argmax_idx, const int pool_size) { border_align_forward_impl(input, boxes, output, argmax_idx, pool_size); } void border_align_backward(const Tensor &grad_output, const Tensor &boxes, const Tensor &argmax_idx, Tensor grad_input, const int pool_size) { border_align_backward_impl(grad_output, boxes, argmax_idx, grad_input, pool_size); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/box_iou_rotated.cpp ================================================ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved // modified from // https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void box_iou_rotated_impl(const Tensor boxes1, const Tensor boxes2, Tensor ious, const int mode_flag, const bool aligned) { DISPATCH_DEVICE_IMPL(box_iou_rotated_impl, boxes1, boxes2, ious, mode_flag, aligned); } // Interface for Python // inline is needed to prevent multiple function definitions when this header is // included by different cpps void box_iou_rotated(const Tensor boxes1, const Tensor boxes2, Tensor ious, const int mode_flag, const bool aligned) { box_iou_rotated_impl(boxes1, boxes2, ious, mode_flag, aligned); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/carafe.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void carafe_forward_impl(Tensor features, Tensor masks, Tensor rfeatures, Tensor routput, Tensor rmasks, Tensor output, int kernel_size, int group_size, int scale_factor) { DISPATCH_DEVICE_IMPL(carafe_forward_impl, features, masks, rfeatures, routput, rmasks, output, kernel_size, group_size, scale_factor); } void carafe_backward_impl(Tensor top_grad, Tensor rfeatures, Tensor masks, Tensor rtop_grad, Tensor rbottom_grad_hs, Tensor rbottom_grad, Tensor rmask_grad, Tensor bottom_grad, Tensor mask_grad, int kernel_size, int group_size, int scale_factor) { DISPATCH_DEVICE_IMPL(carafe_backward_impl, top_grad, rfeatures, masks, rtop_grad, rbottom_grad_hs, rbottom_grad, rmask_grad, bottom_grad, mask_grad, kernel_size, group_size, scale_factor); } void carafe_forward(Tensor features, Tensor masks, Tensor rfeatures, Tensor routput, Tensor rmasks, Tensor output, int kernel_size, int group_size, int scale_factor) { carafe_forward_impl(features, masks, rfeatures, routput, rmasks, output, kernel_size, group_size, scale_factor); } void carafe_backward(Tensor top_grad, Tensor rfeatures, Tensor masks, Tensor rtop_grad, Tensor rbottom_grad_hs, Tensor rbottom_grad, Tensor rmask_grad, Tensor bottom_grad, Tensor mask_grad, int kernel_size, int group_size, int scale_factor) { carafe_backward_impl(top_grad, rfeatures, masks, rtop_grad, rbottom_grad_hs, rbottom_grad, rmask_grad, bottom_grad, mask_grad, kernel_size, group_size, scale_factor); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/carafe_naive.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void carafe_naive_forward_impl(Tensor features, Tensor masks, Tensor output, int kernel_size, int group_size, int scale_factor) { DISPATCH_DEVICE_IMPL(carafe_naive_forward_impl, features, masks, output, kernel_size, group_size, scale_factor); } void carafe_naive_backward_impl(Tensor top_grad, Tensor features, Tensor masks, Tensor bottom_grad, Tensor mask_grad, int kernel_size, int group_size, int scale_factor) { DISPATCH_DEVICE_IMPL(carafe_naive_backward_impl, top_grad, features, masks, bottom_grad, mask_grad, kernel_size, group_size, scale_factor); } void carafe_naive_forward(Tensor features, Tensor masks, Tensor output, int kernel_size, int group_size, int scale_factor) { carafe_naive_forward_impl(features, masks, output, kernel_size, group_size, scale_factor); } void carafe_naive_backward(Tensor top_grad, Tensor features, Tensor masks, Tensor bottom_grad, Tensor mask_grad, int kernel_size, int group_size, int scale_factor) { carafe_naive_backward_impl(top_grad, features, masks, bottom_grad, mask_grad, kernel_size, group_size, scale_factor); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/contour_expand.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved // It is modified from https://github.com/whai362/PSENet #include #include #include "pytorch_cpp_helper.hpp" using namespace std; class Point2d { public: int x; int y; Point2d() : x(0), y(0) {} Point2d(int _x, int _y) : x(_x), y(_y) {} }; void kernel_dilate(const uint8_t *data, IntArrayRef data_shape, const int *label_map, int &label_num, int &min_area, vector> &text_line) { std::vector area(label_num + 1); int kernel_num = data_shape[0]; int height = data_shape[1]; int width = data_shape[2]; for (int x = 0; x < height; ++x) { for (int y = 0; y < width; ++y) { int label = label_map[x * width + y]; if (label == 0) continue; area[label] += 1; } } queue queue, next_queue; for (int x = 0; x < height; ++x) { vector row(width); for (int y = 0; y < width; ++y) { int label = label_map[x * width + y]; if (label == 0) continue; if (area[label] < min_area) continue; Point2d point(x, y); queue.push(point); row[y] = label; } text_line.emplace_back(row); } int dx[] = {-1, 1, 0, 0}; int dy[] = {0, 0, -1, 1}; vector kernel_step(kernel_num); std::for_each(kernel_step.begin(), kernel_step.end(), [=](int &k) { return k * height * width; }); for (int kernel_id = kernel_num - 2; kernel_id >= 0; --kernel_id) { while (!queue.empty()) { Point2d point = queue.front(); queue.pop(); int x = point.x; int y = point.y; int label = text_line[x][y]; bool is_edge = true; for (int d = 0; d < 4; ++d) { int tmp_x = x + dx[d]; int tmp_y = y + dy[d]; if (tmp_x < 0 || tmp_x >= height) continue; if (tmp_y < 0 || tmp_y >= width) continue; int kernel_value = data[kernel_step[kernel_id] + tmp_x * width + tmp_y]; if (kernel_value == 0) continue; if (text_line[tmp_x][tmp_y] > 0) continue; Point2d point(tmp_x, tmp_y); queue.push(point); text_line[tmp_x][tmp_y] = label; is_edge = false; } if (is_edge) { next_queue.push(point); } } swap(queue, next_queue); } } std::vector> contour_expand(Tensor kernel_mask, Tensor internal_kernel_label, int min_kernel_area, int kernel_num) { kernel_mask = kernel_mask.contiguous(); internal_kernel_label = internal_kernel_label.contiguous(); assert(kernel_mask.dim() == 3); assert(internal_kernel_label.dim() == 2); assert(kernel_mask.size(1) == internal_kernel_label.size(0)); assert(kernel_mask.size(2) == internal_kernel_label.size(1)); CHECK_CPU_INPUT(kernel_mask); CHECK_CPU_INPUT(internal_kernel_label); auto ptr_data = kernel_mask.data_ptr(); IntArrayRef data_shape = kernel_mask.sizes(); auto data_label_map = internal_kernel_label.data_ptr(); vector> text_line; kernel_dilate(ptr_data, data_shape, data_label_map, kernel_num, min_kernel_area, text_line); return text_line; } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/convex_iou.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved // modified from // https://github.com/SDL-GuoZonghao/BeyondBoundingBox/tree/main/mmdet/ops/iou/src #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void convex_iou_impl(const Tensor pointsets, const Tensor polygons, Tensor ious) { DISPATCH_DEVICE_IMPL(convex_iou_impl, pointsets, polygons, ious); } void convex_iou(const Tensor pointsets, const Tensor polygons, Tensor ious) { convex_iou_impl(pointsets, polygons, ious); } void convex_giou_impl(const Tensor pointsets, const Tensor polygons, Tensor output) { DISPATCH_DEVICE_IMPL(convex_giou_impl, pointsets, polygons, output); } void convex_giou(const Tensor pointsets, const Tensor polygons, Tensor output) { convex_giou_impl(pointsets, polygons, output); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/corner_pool.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved // Modified from // https://github.com/princeton-vl/CornerNet-Lite/tree/master/core/models/py_utils/_cpools/src #include "pytorch_cpp_helper.hpp" Tensor bottom_pool_forward(Tensor input) { // Initialize output Tensor output = at::zeros_like(input); // Get height int64_t height = input.size(2); output.copy_(input); for (int64_t ind = 1; ind < height; ind <<= 1) { Tensor max_temp = at::slice(output, 2, ind, height); Tensor cur_temp = at::slice(output, 2, ind, height).clone(); Tensor next_temp = at::slice(output, 2, 0, height - ind).clone(); at::max_out(max_temp, cur_temp, next_temp); } return output; } Tensor bottom_pool_backward(Tensor input, Tensor grad_output) { auto output = at::zeros_like(input); int32_t batch = input.size(0); int32_t channel = input.size(1); int32_t height = input.size(2); int32_t width = input.size(3); auto max_val = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kFloat)); auto max_ind = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kLong)); auto input_temp = input.select(2, 0); max_val.copy_(input_temp); max_ind.fill_(0); auto output_temp = output.select(2, 0); auto grad_output_temp = grad_output.select(2, 0); output_temp.copy_(grad_output_temp); auto un_max_ind = max_ind.unsqueeze(2); auto gt_mask = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kBool)); auto max_temp = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kFloat)); for (int32_t ind = 0; ind < height - 1; ++ind) { input_temp = input.select(2, ind + 1); at::gt_out(gt_mask, input_temp, max_val); at::masked_select_out(max_temp, input_temp, gt_mask); max_val.masked_scatter_(gt_mask, max_temp); max_ind.masked_fill_(gt_mask, ind + 1); grad_output_temp = grad_output.select(2, ind + 1).unsqueeze(2); output.scatter_add_(2, un_max_ind, grad_output_temp); } return output; } Tensor left_pool_forward(Tensor input) { // Initialize output Tensor output = at::zeros_like(input); // Get width int64_t width = input.size(3); output.copy_(input); for (int64_t ind = 1; ind < width; ind <<= 1) { Tensor max_temp = at::slice(output, 3, 0, width - ind); Tensor cur_temp = at::slice(output, 3, 0, width - ind).clone(); Tensor next_temp = at::slice(output, 3, ind, width).clone(); at::max_out(max_temp, cur_temp, next_temp); } return output; } Tensor left_pool_backward(Tensor input, Tensor grad_output) { auto output = at::zeros_like(input); int32_t batch = input.size(0); int32_t channel = input.size(1); int32_t height = input.size(2); int32_t width = input.size(3); auto max_val = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kFloat)); auto max_ind = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kLong)); auto input_temp = input.select(3, width - 1); max_val.copy_(input_temp); max_ind.fill_(width - 1); auto output_temp = output.select(3, width - 1); auto grad_output_temp = grad_output.select(3, width - 1); output_temp.copy_(grad_output_temp); auto un_max_ind = max_ind.unsqueeze(3); auto gt_mask = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kBool)); auto max_temp = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kFloat)); for (int32_t ind = 1; ind < width; ++ind) { input_temp = input.select(3, width - ind - 1); at::gt_out(gt_mask, input_temp, max_val); at::masked_select_out(max_temp, input_temp, gt_mask); max_val.masked_scatter_(gt_mask, max_temp); max_ind.masked_fill_(gt_mask, width - ind - 1); grad_output_temp = grad_output.select(3, width - ind - 1).unsqueeze(3); output.scatter_add_(3, un_max_ind, grad_output_temp); } return output; } Tensor right_pool_forward(Tensor input) { // Initialize output Tensor output = at::zeros_like(input); // Get width int64_t width = input.size(3); output.copy_(input); for (int64_t ind = 1; ind < width; ind <<= 1) { Tensor max_temp = at::slice(output, 3, ind, width); Tensor cur_temp = at::slice(output, 3, ind, width).clone(); Tensor next_temp = at::slice(output, 3, 0, width - ind).clone(); at::max_out(max_temp, cur_temp, next_temp); } return output; } Tensor right_pool_backward(Tensor input, Tensor grad_output) { Tensor output = at::zeros_like(input); int32_t batch = input.size(0); int32_t channel = input.size(1); int32_t height = input.size(2); int32_t width = input.size(3); auto max_val = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kFloat)); auto max_ind = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kLong)); auto input_temp = input.select(3, 0); max_val.copy_(input_temp); max_ind.fill_(0); auto output_temp = output.select(3, 0); auto grad_output_temp = grad_output.select(3, 0); output_temp.copy_(grad_output_temp); auto un_max_ind = max_ind.unsqueeze(3); auto gt_mask = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kBool)); auto max_temp = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kFloat)); for (int32_t ind = 0; ind < width - 1; ++ind) { input_temp = input.select(3, ind + 1); at::gt_out(gt_mask, input_temp, max_val); at::masked_select_out(max_temp, input_temp, gt_mask); max_val.masked_scatter_(gt_mask, max_temp); max_ind.masked_fill_(gt_mask, ind + 1); grad_output_temp = grad_output.select(3, ind + 1).unsqueeze(3); output.scatter_add_(3, un_max_ind, grad_output_temp); } return output; } Tensor top_pool_forward(Tensor input) { // Initialize output Tensor output = at::zeros_like(input); // Get height int64_t height = input.size(2); output.copy_(input); for (int64_t ind = 1; ind < height; ind <<= 1) { Tensor max_temp = at::slice(output, 2, 0, height - ind); Tensor cur_temp = at::slice(output, 2, 0, height - ind).clone(); Tensor next_temp = at::slice(output, 2, ind, height).clone(); at::max_out(max_temp, cur_temp, next_temp); } return output; } Tensor top_pool_backward(Tensor input, Tensor grad_output) { auto output = at::zeros_like(input); int32_t batch = input.size(0); int32_t channel = input.size(1); int32_t height = input.size(2); int32_t width = input.size(3); auto max_val = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kFloat)); auto max_ind = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kLong)); auto input_temp = input.select(2, height - 1); max_val.copy_(input_temp); max_ind.fill_(height - 1); auto output_temp = output.select(2, height - 1); auto grad_output_temp = grad_output.select(2, height - 1); output_temp.copy_(grad_output_temp); auto un_max_ind = max_ind.unsqueeze(2); auto gt_mask = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kBool)); auto max_temp = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kFloat)); for (int32_t ind = 1; ind < height; ++ind) { input_temp = input.select(2, height - ind - 1); at::gt_out(gt_mask, input_temp, max_val); at::masked_select_out(max_temp, input_temp, gt_mask); max_val.masked_scatter_(gt_mask, max_temp); max_ind.masked_fill_(gt_mask, height - ind - 1); grad_output_temp = grad_output.select(2, height - ind - 1).unsqueeze(2); output.scatter_add_(2, un_max_ind, grad_output_temp); } return output; } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/correlation.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved. #include #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void correlation_forward_impl(Tensor input1, Tensor input2, Tensor output, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW) { DISPATCH_DEVICE_IMPL(correlation_forward_impl, input1, input2, output, kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH, dilation_patchW, dH, dW); } void correlation_backward_impl(Tensor grad_output, Tensor input1, Tensor input2, Tensor grad_input1, Tensor grad_input2, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW) { DISPATCH_DEVICE_IMPL(correlation_backward_impl, grad_output, input1, input2, grad_input1, grad_input2, kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH, dilation_patchW, dH, dW); } void correlation_forward(Tensor input1, Tensor input2, Tensor output, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW) { correlation_forward_impl(input1, input2, output, kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH, dilation_patchW, dH, dW); } void correlation_backward(Tensor grad_output, Tensor input1, Tensor input2, Tensor grad_input1, Tensor grad_input2, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW) { correlation_backward_impl(grad_output, input1, input2, grad_input1, grad_input2, kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH, dilation_patchW, dH, dW); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cpu/active_rotated_filter.cpp ================================================ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved // modified from // https://github.com/csuhan/s2anet/blob/master/mmdet/ops/orn/src/cpu/ActiveRotatingFilter_cpu.cpp #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" template void active_rotated_filter_forward_cpu_kernel( const T* weightData, const int* indicesData, const int num_output_planes, const int num_input_planes, const int num_orientations, const int kH, const int kW, const int num_rotations, T* outputData) { const int nEntry = num_orientations * kH * kW; int i, j, l; int k; #pragma omp parallel for private(i, j, l, k) for (i = 0; i < num_output_planes; i++) { for (j = 0; j < num_input_planes; j++) { for (l = 0; l < nEntry; l++) { int weightIndex = i * num_input_planes * nEntry + j * nEntry + l; T val = *(weightData + weightIndex); for (k = 0; k < num_rotations; k++) { int index = (int)(*(indicesData + l * num_rotations + k)) - 1; T* target = outputData + i * (num_rotations * num_input_planes * nEntry) + k * (num_input_planes * nEntry) + j * (nEntry) + index; *target = val; } } } } } template void active_rotated_filter_backward_cpu_kernel( const T* gradOutputData, const int* indicesData, const int num_output_planes, const int num_input_planes, const int num_orientations, const int kH, const int kW, const int num_rotations, T* gradInputData) { const int nEntry = num_orientations * kH * kW; int i, j, l; int k; #pragma omp parallel for private(i, j, l, k) for (i = 0; i < num_output_planes; i++) { for (j = 0; j < num_input_planes; j++) { for (l = 0; l < nEntry; l++) { int gradInputIndex = i * num_input_planes * nEntry + j * nEntry + l; T* val = gradInputData + gradInputIndex; *val = 0; for (k = 0; k < num_rotations; k++) { int index = (int)(*(indicesData + l * num_rotations + k)) - 1; const T* target = gradOutputData + i * (num_rotations * num_input_planes * nEntry) + k * (num_input_planes * nEntry) + j * (nEntry) + index; *val = *val + *target; } } } } } void ActiveRotatedFilterForwardCPULauncher(const Tensor input, const Tensor indices, Tensor output) { const int num_output_planes = input.size(0); const int num_input_planes = input.size(1); const int num_orientations = input.size(2); const int kH = input.size(3); const int kW = input.size(4); const int num_rotations = indices.size(3); AT_DISPATCH_FLOATING_TYPES_AND_HALF( input.scalar_type(), "active_rotated_filter_forward_cpu_kernel", [&] { active_rotated_filter_forward_cpu_kernel( input.data_ptr(), indices.data_ptr(), num_output_planes, num_input_planes, num_orientations, kH, kW, num_rotations, output.data_ptr()); }); } void ActiveRotatedFilterBackwardCPULauncher(const Tensor grad_out, const Tensor indices, Tensor grad_in) { const int num_orientations = indices.size(0); const int kH = indices.size(1); const int kW = indices.size(2); const int num_rotations = indices.size(3); const int num_output_planes = grad_out.size(0) / num_rotations; const int num_input_planes = grad_out.size(1) / num_orientations; AT_DISPATCH_FLOATING_TYPES_AND_HALF( grad_out.scalar_type(), "active_rotated_filter_backward_cpu_kernel", [&] { active_rotated_filter_backward_cpu_kernel( grad_out.data_ptr(), indices.data_ptr(), num_output_planes, num_input_planes, num_orientations, kH, kW, num_rotations, grad_in.data_ptr()); }); } void active_rotated_filter_forward_cpu(const Tensor input, const Tensor indices, Tensor output) { ActiveRotatedFilterForwardCPULauncher(input, indices, output); } void active_rotated_filter_backward_cpu(const Tensor grad_out, const Tensor indices, Tensor grad_in) { ActiveRotatedFilterBackwardCPULauncher(grad_out, indices, grad_in); } void active_rotated_filter_forward_impl(const Tensor input, const Tensor indices, Tensor output); void active_rotated_filter_backward_impl(const Tensor grad_out, const Tensor indices, Tensor grad_in); REGISTER_DEVICE_IMPL(active_rotated_filter_forward_impl, CPU, active_rotated_filter_forward_cpu); REGISTER_DEVICE_IMPL(active_rotated_filter_backward_impl, CPU, active_rotated_filter_backward_cpu); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cpu/box_iou_rotated.cpp ================================================ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved // modified from // https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp #include "box_iou_rotated_utils.hpp" #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" template void box_iou_rotated_cpu_kernel(const Tensor boxes1, const Tensor boxes2, Tensor ious, const int mode_flag, const bool aligned) { int output_size = ious.numel(); auto num_boxes1 = boxes1.size(0); auto num_boxes2 = boxes2.size(0); if (aligned) { for (int i = 0; i < output_size; i++) { ious[i] = single_box_iou_rotated(boxes1[i].data_ptr(), boxes2[i].data_ptr(), mode_flag); } } else { for (int i = 0; i < num_boxes1; i++) { for (int j = 0; j < num_boxes2; j++) { ious[i * num_boxes2 + j] = single_box_iou_rotated( boxes1[i].data_ptr(), boxes2[j].data_ptr(), mode_flag); } } } } void box_iou_rotated_cpu(const Tensor boxes1, const Tensor boxes2, Tensor ious, const int mode_flag, const bool aligned) { box_iou_rotated_cpu_kernel(boxes1, boxes2, ious, mode_flag, aligned); } void box_iou_rotated_impl(const Tensor boxes1, const Tensor boxes2, Tensor ious, const int mode_flag, const bool aligned); REGISTER_DEVICE_IMPL(box_iou_rotated_impl, CPU, box_iou_rotated_cpu); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cpu/deform_conv.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" template T deformable_im2col_bilinear_cpu(const T *input, const int data_width, const int height, const int width, T h, T w) { if (h <= -1 || height <= h || w <= -1 || width <= w) { return 0; } int h_low = floor(h); int w_low = floor(w); int h_high = h_low + 1; int w_high = w_low + 1; T lh = h - h_low; T lw = w - w_low; T hh = 1 - lh, hw = 1 - lw; T v1 = 0; if (h_low >= 0 && w_low >= 0) v1 = input[h_low * data_width + w_low]; T v2 = 0; if (h_low >= 0 && w_high <= width - 1) v2 = input[h_low * data_width + w_high]; T v3 = 0; if (h_high <= height - 1 && w_low >= 0) v3 = input[h_high * data_width + w_low]; T v4 = 0; if (h_high <= height - 1 && w_high <= width - 1) v4 = input[h_high * data_width + w_high]; T w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); return val; } template T get_gradient_weight_cpu(T argmax_h, T argmax_w, const int h, const int w, const int height, const int width) { if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) { // empty return 0; } int argmax_h_low = floor(argmax_h); int argmax_w_low = floor(argmax_w); int argmax_h_high = argmax_h_low + 1; int argmax_w_high = argmax_w_low + 1; T weight = 0; if (h == argmax_h_low && w == argmax_w_low) weight = (h + 1 - argmax_h) * (w + 1 - argmax_w); if (h == argmax_h_low && w == argmax_w_high) weight = (h + 1 - argmax_h) * (argmax_w + 1 - w); if (h == argmax_h_high && w == argmax_w_low) weight = (argmax_h + 1 - h) * (w + 1 - argmax_w); if (h == argmax_h_high && w == argmax_w_high) weight = (argmax_h + 1 - h) * (argmax_w + 1 - w); return weight; } template T get_coordinate_weight_cpu(T argmax_h, T argmax_w, const int height, const int width, const T *im_data, const int data_width, const int bp_dir) { if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) { // empty return 0; } int argmax_h_low = floor(argmax_h); int argmax_w_low = floor(argmax_w); int argmax_h_high = argmax_h_low + 1; int argmax_w_high = argmax_w_low + 1; T weight = 0; if (bp_dir == 0) { if (argmax_h_low >= 0 && argmax_w_low >= 0) weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low]; if (argmax_h_low >= 0 && argmax_w_high <= width - 1) weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high]; if (argmax_h_high <= height - 1 && argmax_w_low >= 0) weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low]; if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high]; } else if (bp_dir == 1) { if (argmax_h_low >= 0 && argmax_w_low >= 0) weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low]; if (argmax_h_low >= 0 && argmax_w_high <= width - 1) weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high]; if (argmax_h_high <= height - 1 && argmax_w_low >= 0) weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low]; if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high]; } return weight; } template void deformable_im2col_cpu_kernel( const int n, const T *data_im, const T *data_offset, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int channel_per_deformable_group, const int batch_size, const int num_channels, const int deformable_group, const int height_col, const int width_col, T *data_col) { for (int index = 0; index < n; index++) { // index index of output matrix const int w_col = index % width_col; const int h_col = (index / width_col) % height_col; const int b_col = (index / width_col / height_col) % batch_size; const int c_im = (index / width_col / height_col) / batch_size; const int c_col = c_im * kernel_h * kernel_w; // compute deformable group index const int deformable_group_index = c_im / channel_per_deformable_group; const int h_in = h_col * stride_h - pad_h; const int w_in = w_col * stride_w - pad_w; T *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col; const T *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width; const T *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; for (int i = 0; i < kernel_h; ++i) { for (int j = 0; j < kernel_w; ++j) { const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col; const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col; const T offset_h = data_offset_ptr[data_offset_h_ptr]; const T offset_w = data_offset_ptr[data_offset_w_ptr]; T val = static_cast(0); const T h_im = h_in + i * dilation_h + offset_h; const T w_im = w_in + j * dilation_w + offset_w; if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) val = deformable_im2col_bilinear_cpu(data_im_ptr, width, height, width, h_im, w_im); *data_col_ptr = val; data_col_ptr += batch_size * height_col * width_col; } } } } template void deformable_col2im_cpu_kernel( const int n, const T *data_col, const T *data_offset, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int channel_per_deformable_group, const int batch_size, const int deformable_group, const int height_col, const int width_col, T *grad_im) { for (int index = 0; index < n; index++) { const int j = (index / width_col / height_col / batch_size) % kernel_w; const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h; const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h; // compute the start and end of the output const int deformable_group_index = c / channel_per_deformable_group; int w_out = index % width_col; int h_out = (index / width_col) % height_col; int b = (index / width_col / height_col) % batch_size; int w_in = w_out * stride_w - pad_w; int h_in = h_out * stride_h - pad_h; const T *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out; const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out; const T offset_h = data_offset_ptr[data_offset_h_ptr]; const T offset_w = data_offset_ptr[data_offset_w_ptr]; const T cur_inv_h_data = h_in + i * dilation_h + offset_h; const T cur_inv_w_data = w_in + j * dilation_w + offset_w; const T cur_top_grad = data_col[index]; const int cur_h = (int)cur_inv_h_data; const int cur_w = (int)cur_inv_w_data; for (int dy = -2; dy <= 2; dy++) { for (int dx = -2; dx <= 2; dx++) { if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 && cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 && abs(cur_inv_w_data - (cur_w + dx)) < 1) { int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx; T weight = get_gradient_weight_cpu(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width); *(grad_im + cur_bottom_grad_pos) += weight * cur_top_grad; } } } } } template void deformable_col2im_coord_cpu_kernel( const int n, const T *data_col, const T *data_im, const T *data_offset, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int channel_per_deformable_group, const int batch_size, const int offset_channels, const int deformable_group, const int height_col, const int width_col, T *grad_offset) { for (int index = 0; index < n; index++) { T val = 0; int w = index % width_col; int h = (index / width_col) % height_col; int c = (index / width_col / height_col) % offset_channels; int b = (index / width_col / height_col) / offset_channels; // compute the start and end of the output const int deformable_group_index = c / (2 * kernel_h * kernel_w); const int col_step = kernel_h * kernel_w; int cnt = 0; const T *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group * batch_size * width_col * height_col; const T *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) * channel_per_deformable_group / kernel_h / kernel_w * height * width; const T *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w; for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step) { const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w; const int bp_dir = offset_c % 2; int j = (col_pos / width_col / height_col / batch_size) % kernel_w; int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h; int w_out = col_pos % width_col; int h_out = (col_pos / width_col) % height_col; int w_in = w_out * stride_w - pad_w; int h_in = h_out * stride_h - pad_h; const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out); const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out); const T offset_h = data_offset_ptr[data_offset_h_ptr]; const T offset_w = data_offset_ptr[data_offset_w_ptr]; T inv_h = h_in + i * dilation_h + offset_h; T inv_w = w_in + j * dilation_w + offset_w; if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) inv_h = inv_w = -2; const T weight = get_coordinate_weight_cpu( inv_h, inv_w, height, width, data_im_ptr + cnt * height * width, width, bp_dir); val += weight * data_col_ptr[col_pos]; cnt += 1; } grad_offset[index] = val; } } void deformable_im2col_cpu(Tensor data_im, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor data_col) { int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; int num_kernels = channels * height_col * width_col * parallel_imgs; int channel_per_deformable_group = channels / deformable_group; AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_im.scalar_type(), "deformable_im2col_cpu", [&] { deformable_im2col_cpu_kernel( num_kernels, data_im.data_ptr(), data_offset.data_ptr(), height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group, parallel_imgs, channels, deformable_group, height_col, width_col, data_col.data_ptr()); }); } void deformable_col2im_cpu(Tensor data_col, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor grad_im) { // todo: make sure parallel_imgs is passed in correctly int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; int num_kernels = channels * ksize_h * ksize_w * height_col * width_col * parallel_imgs; int channel_per_deformable_group = channels / deformable_group; AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_col.scalar_type(), "deformable_col2im_gpu", ([&] { const scalar_t *data_col_ = data_col.data_ptr(); const scalar_t *data_offset_ = data_offset.data_ptr(); scalar_t *grad_im_ = grad_im.data_ptr(); deformable_col2im_cpu_kernel( num_kernels, data_col_, data_offset_, channels, height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group, parallel_imgs, deformable_group, height_col, width_col, grad_im_); })); } void deformable_col2im_coord_cpu( Tensor data_col, Tensor data_im, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor grad_offset) { int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; int num_kernels = height_col * width_col * 2 * ksize_h * ksize_w * deformable_group * parallel_imgs; int channel_per_deformable_group = channels * ksize_h * ksize_w / deformable_group; AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_col.scalar_type(), "deformable_col2im_coord_cpu", ([&] { const scalar_t *data_col_ = data_col.data_ptr(); const scalar_t *data_im_ = data_im.data_ptr(); const scalar_t *data_offset_ = data_offset.data_ptr(); scalar_t *grad_offset_ = grad_offset.data_ptr(); deformable_col2im_coord_cpu_kernel( num_kernels, data_col_, data_im_, data_offset_, channels, height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group, parallel_imgs, 2 * ksize_h * ksize_w * deformable_group, deformable_group, height_col, width_col, grad_offset_); })); } void deformable_im2col_impl(Tensor data_im, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor data_col); void deformable_col2im_impl(Tensor data_col, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor grad_im); void deformable_col2im_coord_impl( Tensor data_col, Tensor data_im, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor grad_offset); REGISTER_DEVICE_IMPL(deformable_im2col_impl, CPU, deformable_im2col_cpu); REGISTER_DEVICE_IMPL(deformable_col2im_impl, CPU, deformable_col2im_cpu); REGISTER_DEVICE_IMPL(deformable_col2im_coord_impl, CPU, deformable_col2im_coord_cpu); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cpu/modulated_deform_conv.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" template T dmcn_im2col_bilinear_cpu(const T *input, const int data_width, const int height, const int width, T h, T w) { int h_low = floorf(h); int w_low = floorf(w); int h_high = h_low + 1; int w_high = w_low + 1; T lh = h - h_low; T lw = w - w_low; T hh = 1 - lh, hw = 1 - lw; T v1 = 0; if (h_low >= 0 && w_low >= 0) v1 = input[h_low * data_width + w_low]; T v2 = 0; if (h_low >= 0 && w_high <= width - 1) v2 = input[h_low * data_width + w_high]; T v3 = 0; if (h_high <= height - 1 && w_low >= 0) v3 = input[h_high * data_width + w_low]; T v4 = 0; if (h_high <= height - 1 && w_high <= width - 1) v4 = input[h_high * data_width + w_high]; T w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); return val; } template T dmcn_get_gradient_weight_cpu(T argmax_h, T argmax_w, const int h, const int w, const int height, const int width) { if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) { // empty return 0; } int argmax_h_low = floorf(argmax_h); int argmax_w_low = floorf(argmax_w); int argmax_h_high = argmax_h_low + 1; int argmax_w_high = argmax_w_low + 1; T weight = 0; if (h == argmax_h_low && w == argmax_w_low) weight = (h + 1 - argmax_h) * (w + 1 - argmax_w); if (h == argmax_h_low && w == argmax_w_high) weight = (h + 1 - argmax_h) * (argmax_w + 1 - w); if (h == argmax_h_high && w == argmax_w_low) weight = (argmax_h + 1 - h) * (w + 1 - argmax_w); if (h == argmax_h_high && w == argmax_w_high) weight = (argmax_h + 1 - h) * (argmax_w + 1 - w); return weight; } template T dmcn_get_coordinate_weight_cpu(T argmax_h, T argmax_w, const int height, const int width, const T *im_data, const int data_width, const int bp_dir) { if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) { // empty return 0; } int argmax_h_low = floorf(argmax_h); int argmax_w_low = floorf(argmax_w); int argmax_h_high = argmax_h_low + 1; int argmax_w_high = argmax_w_low + 1; T weight = 0; if (bp_dir == 0) { if (argmax_h_low >= 0 && argmax_w_low >= 0) weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low]; if (argmax_h_low >= 0 && argmax_w_high <= width - 1) weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high]; if (argmax_h_high <= height - 1 && argmax_w_low >= 0) weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low]; if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high]; } else if (bp_dir == 1) { if (argmax_h_low >= 0 && argmax_w_low >= 0) weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low]; if (argmax_h_low >= 0 && argmax_w_high <= width - 1) weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high]; if (argmax_h_high <= height - 1 && argmax_w_low >= 0) weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low]; if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high]; } return weight; } template void modulated_deformable_im2col_cpu_kernel( const int n, const T *data_im, const T *data_offset, const T *data_mask, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int channel_per_deformable_group, const int batch_size, const int num_channels, const int deformable_group, const int height_col, const int width_col, T *data_col) { for (int index = 0; index < n; index++) { // index index of output matrix const int w_col = index % width_col; const int h_col = (index / width_col) % height_col; const int b_col = (index / width_col / height_col) % batch_size; const int c_im = (index / width_col / height_col) / batch_size; const int c_col = c_im * kernel_h * kernel_w; // compute deformable group index const int deformable_group_index = c_im / channel_per_deformable_group; const int h_in = h_col * stride_h - pad_h; const int w_in = w_col * stride_w - pad_w; T *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col; const T *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width; const T *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; const T *data_mask_ptr = data_mask + (b_col * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col; for (int i = 0; i < kernel_h; ++i) { for (int j = 0; j < kernel_w; ++j) { const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col; const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col; const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_col) * width_col + w_col; const T offset_h = data_offset_ptr[data_offset_h_ptr]; const T offset_w = data_offset_ptr[data_offset_w_ptr]; const T mask = data_mask_ptr[data_mask_hw_ptr]; T val = static_cast(0); const T h_im = h_in + i * dilation_h + offset_h; const T w_im = w_in + j * dilation_w + offset_w; if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) val = dmcn_im2col_bilinear_cpu(data_im_ptr, width, height, width, h_im, w_im); *data_col_ptr = val * mask; data_col_ptr += batch_size * height_col * width_col; } } } } template void modulated_deformable_col2im_cpu_kernel( const int n, const T *data_col, const T *data_offset, const T *data_mask, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int channel_per_deformable_group, const int batch_size, const int deformable_group, const int height_col, const int width_col, T *grad_im) { for (int index = 0; index < n; index++) { const int j = (index / width_col / height_col / batch_size) % kernel_w; const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h; const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h; // compute the start and end of the output const int deformable_group_index = c / channel_per_deformable_group; int w_out = index % width_col; int h_out = (index / width_col) % height_col; int b = (index / width_col / height_col) % batch_size; int w_in = w_out * stride_w - pad_w; int h_in = h_out * stride_h - pad_h; const T *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; const T *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col; const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out; const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out; const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_out) * width_col + w_out; const T offset_h = data_offset_ptr[data_offset_h_ptr]; const T offset_w = data_offset_ptr[data_offset_w_ptr]; const T mask = data_mask_ptr[data_mask_hw_ptr]; const T cur_inv_h_data = h_in + i * dilation_h + offset_h; const T cur_inv_w_data = w_in + j * dilation_w + offset_w; const T cur_top_grad = data_col[index] * mask; const int cur_h = (int)cur_inv_h_data; const int cur_w = (int)cur_inv_w_data; for (int dy = -2; dy <= 2; dy++) { for (int dx = -2; dx <= 2; dx++) { if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 && cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 && abs(cur_inv_w_data - (cur_w + dx)) < 1) { int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx; T weight = dmcn_get_gradient_weight_cpu(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width); *(grad_im + cur_bottom_grad_pos) += weight * cur_top_grad; } } } } } template void modulated_deformable_col2im_coord_cpu_kernel( const int n, const T *data_col, const T *data_im, const T *data_offset, const T *data_mask, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int channel_per_deformable_group, const int batch_size, const int offset_channels, const int deformable_group, const int height_col, const int width_col, T *grad_offset, T *grad_mask) { for (int index = 0; index < n; index++) { T val = 0, mval = 0; int w = index % width_col; int h = (index / width_col) % height_col; int c = (index / width_col / height_col) % offset_channels; int b = (index / width_col / height_col) / offset_channels; // compute the start and end of the output const int deformable_group_index = c / (2 * kernel_h * kernel_w); const int col_step = kernel_h * kernel_w; int cnt = 0; const T *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group * batch_size * width_col * height_col; const T *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) * channel_per_deformable_group / kernel_h / kernel_w * height * width; const T *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; const T *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col; const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w; for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step) { const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w; const int bp_dir = offset_c % 2; int j = (col_pos / width_col / height_col / batch_size) % kernel_w; int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h; int w_out = col_pos % width_col; int h_out = (col_pos / width_col) % height_col; int w_in = w_out * stride_w - pad_w; int h_in = h_out * stride_h - pad_h; const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out); const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out); const int data_mask_hw_ptr = (((i * kernel_w + j) * height_col + h_out) * width_col + w_out); const T offset_h = data_offset_ptr[data_offset_h_ptr]; const T offset_w = data_offset_ptr[data_offset_w_ptr]; const T mask = data_mask_ptr[data_mask_hw_ptr]; T inv_h = h_in + i * dilation_h + offset_h; T inv_w = w_in + j * dilation_w + offset_w; if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) inv_h = inv_w = -2; else mval += data_col_ptr[col_pos] * dmcn_im2col_bilinear_cpu(data_im_ptr + cnt * height * width, width, height, width, inv_h, inv_w); const T weight = dmcn_get_coordinate_weight_cpu( inv_h, inv_w, height, width, data_im_ptr + cnt * height * width, width, bp_dir); val += weight * data_col_ptr[col_pos] * mask; cnt += 1; } // KERNEL_ASSIGN(grad_offset[index], offset_req, val); grad_offset[index] = val; if (offset_c % 2 == 0) // KERNEL_ASSIGN(grad_mask[(((b * deformable_group + // deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * // height_col + h) * width_col + w], mask_req, mval); grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w] = mval; } } void modulated_deformable_im2col_cpu( const Tensor data_im, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor data_col) { // num_axes should be smaller than block size const int channel_per_deformable_group = channels / deformable_group; const int num_kernels = channels * batch_size * height_col * width_col; AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_im.scalar_type(), "modulated_deformable_im2col_cpu", ([&] { const scalar_t *data_im_ = data_im.data_ptr(); const scalar_t *data_offset_ = data_offset.data_ptr(); const scalar_t *data_mask_ = data_mask.data_ptr(); scalar_t *data_col_ = data_col.data_ptr(); modulated_deformable_im2col_cpu_kernel( num_kernels, data_im_, data_offset_, data_mask_, height_im, width_im, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group, batch_size, channels, deformable_group, height_col, width_col, data_col_); })); } void modulated_deformable_col2im_cpu( const Tensor data_col, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor grad_im) { const int channel_per_deformable_group = channels / deformable_group; const int num_kernels = channels * kernel_h * kernel_w * batch_size * height_col * width_col; AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_col.scalar_type(), "modulated_deformable_col2im_cpu", ([&] { const scalar_t *data_col_ = data_col.data_ptr(); const scalar_t *data_offset_ = data_offset.data_ptr(); const scalar_t *data_mask_ = data_mask.data_ptr(); scalar_t *grad_im_ = grad_im.data_ptr(); modulated_deformable_col2im_cpu_kernel( num_kernels, data_col_, data_offset_, data_mask_, channels, height_im, width_im, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group, batch_size, deformable_group, height_col, width_col, grad_im_); })); } void modulated_deformable_col2im_coord_cpu( const Tensor data_col, const Tensor data_im, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor grad_offset, Tensor grad_mask) { const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * kernel_w * deformable_group; const int channel_per_deformable_group = channels * kernel_h * kernel_w / deformable_group; AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_col.scalar_type(), "modulated_deformable_col2im_coord_cpu", ([&] { const scalar_t *data_col_ = data_col.data_ptr(); const scalar_t *data_im_ = data_im.data_ptr(); const scalar_t *data_offset_ = data_offset.data_ptr(); const scalar_t *data_mask_ = data_mask.data_ptr(); scalar_t *grad_offset_ = grad_offset.data_ptr(); scalar_t *grad_mask_ = grad_mask.data_ptr(); modulated_deformable_col2im_coord_cpu_kernel( num_kernels, data_col_, data_im_, data_offset_, data_mask_, channels, height_im, width_im, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group, batch_size, 2 * kernel_h * kernel_w * deformable_group, deformable_group, height_col, width_col, grad_offset_, grad_mask_); })); } void modulated_deformable_im2col_impl( const Tensor data_im, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor data_col); void modulated_deformable_col2im_impl( const Tensor data_col, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor grad_im); void modulated_deformable_col2im_coord_impl( const Tensor data_col, const Tensor data_im, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor grad_offset, Tensor grad_mask); REGISTER_DEVICE_IMPL(modulated_deformable_im2col_impl, CPU, modulated_deformable_im2col_cpu); REGISTER_DEVICE_IMPL(modulated_deformable_col2im_impl, CPU, modulated_deformable_col2im_cpu); REGISTER_DEVICE_IMPL(modulated_deformable_col2im_coord_impl, CPU, modulated_deformable_col2im_coord_cpu); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cpu/nms.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" Tensor nms_cpu(Tensor boxes, Tensor scores, float iou_threshold, int offset) { if (boxes.numel() == 0) { return at::empty({0}, boxes.options().dtype(at::kLong)); } auto x1_t = boxes.select(1, 0).contiguous(); auto y1_t = boxes.select(1, 1).contiguous(); auto x2_t = boxes.select(1, 2).contiguous(); auto y2_t = boxes.select(1, 3).contiguous(); Tensor areas_t = (x2_t - x1_t + offset) * (y2_t - y1_t + offset); auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); auto nboxes = boxes.size(0); Tensor select_t = at::ones({nboxes}, boxes.options().dtype(at::kBool)); auto select = select_t.data_ptr(); auto order = order_t.data_ptr(); auto x1 = x1_t.data_ptr(); auto y1 = y1_t.data_ptr(); auto x2 = x2_t.data_ptr(); auto y2 = y2_t.data_ptr(); auto areas = areas_t.data_ptr(); for (int64_t _i = 0; _i < nboxes; _i++) { if (select[_i] == false) continue; auto i = order[_i]; auto ix1 = x1[i]; auto iy1 = y1[i]; auto ix2 = x2[i]; auto iy2 = y2[i]; auto iarea = areas[i]; for (int64_t _j = _i + 1; _j < nboxes; _j++) { if (select[_j] == false) continue; auto j = order[_j]; auto xx1 = std::max(ix1, x1[j]); auto yy1 = std::max(iy1, y1[j]); auto xx2 = std::min(ix2, x2[j]); auto yy2 = std::min(iy2, y2[j]); auto w = std::max(0.f, xx2 - xx1 + offset); auto h = std::max(0.f, yy2 - yy1 + offset); auto inter = w * h; auto ovr = inter / (iarea + areas[j] - inter); if (ovr > iou_threshold) select[_j] = false; } } return order_t.masked_select(select_t); } Tensor nms_impl(Tensor boxes, Tensor scores, float iou_threshold, int offset); REGISTER_DEVICE_IMPL(nms_impl, CPU, nms_cpu); Tensor softnms_cpu(Tensor boxes, Tensor scores, Tensor dets, float iou_threshold, float sigma, float min_score, int method, int offset) { if (boxes.numel() == 0) { return at::empty({0}, boxes.options().dtype(at::kLong)); } auto x1_t = boxes.select(1, 0).contiguous(); auto y1_t = boxes.select(1, 1).contiguous(); auto x2_t = boxes.select(1, 2).contiguous(); auto y2_t = boxes.select(1, 3).contiguous(); auto scores_t = scores.clone(); Tensor areas_t = (x2_t - x1_t + offset) * (y2_t - y1_t + offset); auto nboxes = boxes.size(0); auto x1 = x1_t.data_ptr(); auto y1 = y1_t.data_ptr(); auto x2 = x2_t.data_ptr(); auto y2 = y2_t.data_ptr(); auto sc = scores_t.data_ptr(); auto areas = areas_t.data_ptr(); auto de = dets.data_ptr(); int64_t pos = 0; Tensor inds_t = at::arange(nboxes, boxes.options().dtype(at::kLong)); auto inds = inds_t.data_ptr(); for (int64_t i = 0; i < nboxes; i++) { auto max_score = sc[i]; auto max_pos = i; pos = i + 1; // get max box while (pos < nboxes) { if (max_score < sc[pos]) { max_score = sc[pos]; max_pos = pos; } pos = pos + 1; } // swap auto ix1 = de[i * 5 + 0] = x1[max_pos]; auto iy1 = de[i * 5 + 1] = y1[max_pos]; auto ix2 = de[i * 5 + 2] = x2[max_pos]; auto iy2 = de[i * 5 + 3] = y2[max_pos]; auto iscore = de[i * 5 + 4] = sc[max_pos]; auto iarea = areas[max_pos]; auto iind = inds[max_pos]; x1[max_pos] = x1[i]; y1[max_pos] = y1[i]; x2[max_pos] = x2[i]; y2[max_pos] = y2[i]; sc[max_pos] = sc[i]; areas[max_pos] = areas[i]; inds[max_pos] = inds[i]; x1[i] = ix1; y1[i] = iy1; x2[i] = ix2; y2[i] = iy2; sc[i] = iscore; areas[i] = iarea; inds[i] = iind; pos = i + 1; while (pos < nboxes) { auto xx1 = std::max(ix1, x1[pos]); auto yy1 = std::max(iy1, y1[pos]); auto xx2 = std::min(ix2, x2[pos]); auto yy2 = std::min(iy2, y2[pos]); auto w = std::max(0.f, xx2 - xx1 + offset); auto h = std::max(0.f, yy2 - yy1 + offset); auto inter = w * h; auto ovr = inter / (iarea + areas[pos] - inter); float weight = 1.; if (method == 0) { if (ovr >= iou_threshold) weight = 0; } else if (method == 1) { if (ovr >= iou_threshold) weight = 1 - ovr; } else if (method == 2) { weight = std::exp(-(ovr * ovr) / sigma); } sc[pos] *= weight; // if box score falls below threshold, discard the box by // swapping with last box update N if (sc[pos] < min_score) { x1[pos] = x1[nboxes - 1]; y1[pos] = y1[nboxes - 1]; x2[pos] = x2[nboxes - 1]; y2[pos] = y2[nboxes - 1]; sc[pos] = sc[nboxes - 1]; areas[pos] = areas[nboxes - 1]; inds[pos] = inds[nboxes - 1]; nboxes = nboxes - 1; pos = pos - 1; } pos = pos + 1; } } return inds_t.slice(0, 0, nboxes); } Tensor softnms_impl(Tensor boxes, Tensor scores, Tensor dets, float iou_threshold, float sigma, float min_score, int method, int offset); REGISTER_DEVICE_IMPL(softnms_impl, CPU, softnms_cpu); std::vector > nms_match_cpu(Tensor dets, float iou_threshold) { auto x1_t = dets.select(1, 0).contiguous(); auto y1_t = dets.select(1, 1).contiguous(); auto x2_t = dets.select(1, 2).contiguous(); auto y2_t = dets.select(1, 3).contiguous(); auto scores = dets.select(1, 4).contiguous(); at::Tensor areas_t = (x2_t - x1_t) * (y2_t - y1_t); auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); auto ndets = dets.size(0); at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); auto suppressed = suppressed_t.data_ptr(); auto order = order_t.data_ptr(); auto x1 = x1_t.data_ptr(); auto y1 = y1_t.data_ptr(); auto x2 = x2_t.data_ptr(); auto y2 = y2_t.data_ptr(); auto areas = areas_t.data_ptr(); std::vector keep; std::vector > matched; for (int64_t _i = 0; _i < ndets; _i++) { auto i = order[_i]; if (suppressed[i] == 1) continue; keep.push_back(i); std::vector v_i; auto ix1 = x1[i]; auto iy1 = y1[i]; auto ix2 = x2[i]; auto iy2 = y2[i]; auto iarea = areas[i]; for (int64_t _j = _i + 1; _j < ndets; _j++) { auto j = order[_j]; if (suppressed[j] == 1) continue; auto xx1 = std::max(ix1, x1[j]); auto yy1 = std::max(iy1, y1[j]); auto xx2 = std::min(ix2, x2[j]); auto yy2 = std::min(iy2, y2[j]); auto w = std::max(static_cast(0), xx2 - xx1); auto h = std::max(static_cast(0), yy2 - yy1); auto inter = w * h; auto ovr = inter / (iarea + areas[j] - inter); if (ovr >= iou_threshold) { suppressed[j] = 1; v_i.push_back(j); } } matched.push_back(v_i); } for (size_t i = 0; i < keep.size(); i++) matched[i].insert(matched[i].begin(), keep[i]); return matched; } std::vector > nms_match_impl(Tensor dets, float iou_threshold); REGISTER_DEVICE_IMPL(nms_match_impl, CPU, nms_match_cpu); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cpu/nms_rotated.cpp ================================================ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved // modified from // https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp #include "box_iou_rotated_utils.hpp" #include "pytorch_cpp_helper.hpp" template Tensor nms_rotated_cpu_kernel(const Tensor dets, const Tensor scores, const float iou_threshold) { // nms_rotated_cpu_kernel is modified from torchvision's nms_cpu_kernel, // however, the code in this function is much shorter because // we delegate the IoU computation for rotated boxes to // the single_box_iou_rotated function in box_iou_rotated_utils.h AT_ASSERTM(!dets.is_cuda(), "dets must be a CPU tensor"); AT_ASSERTM(!scores.is_cuda(), "scores must be a CPU tensor"); AT_ASSERTM(dets.scalar_type() == scores.scalar_type(), "dets should have the same type as scores"); if (dets.numel() == 0) { return at::empty({0}, dets.options().dtype(at::kLong)); } auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); auto ndets = dets.size(0); Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte)); Tensor keep_t = at::zeros({ndets}, dets.options().dtype(at::kLong)); auto suppressed = suppressed_t.data_ptr(); auto keep = keep_t.data_ptr(); auto order = order_t.data_ptr(); int64_t num_to_keep = 0; for (int64_t _i = 0; _i < ndets; _i++) { auto i = order[_i]; if (suppressed[i] == 1) { continue; } keep[num_to_keep++] = i; for (int64_t _j = _i + 1; _j < ndets; _j++) { auto j = order[_j]; if (suppressed[j] == 1) { continue; } auto ovr = single_box_iou_rotated( dets[i].data_ptr(), dets[j].data_ptr(), 0); if (ovr >= iou_threshold) { suppressed[j] = 1; } } } return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep); } Tensor nms_rotated_cpu(const Tensor dets, const Tensor scores, const float iou_threshold) { auto result = at::empty({0}, dets.options()); AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms_rotated", [&] { result = nms_rotated_cpu_kernel(dets, scores, iou_threshold); }); return result; } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cpu/pixel_group.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved // It is modified from https://github.com/WenmuZhou/PAN.pytorch #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" std::vector> estimate_confidence(int32_t* label, float* score, int label_num, int height, int width) { std::vector> point_vector; for (int i = 0; i < label_num; i++) { std::vector point; point.push_back(0); point.push_back(0); point_vector.push_back(point); } for (int y = 0; y < height; y++) { auto label_tmp = label + y * width; auto score_tmp = score + y * width; for (int x = 0; x < width; x++) { auto l = label_tmp[x]; if (l > 0) { float confidence = score_tmp[x]; point_vector[l].push_back(x); point_vector[l].push_back(y); point_vector[l][0] += confidence; point_vector[l][1] += 1; } } } for (size_t l = 0; l < point_vector.size(); l++) if (point_vector[l][1] > 0) { point_vector[l][0] /= point_vector[l][1]; } return point_vector; } std::vector> pixel_group_cpu( Tensor score, Tensor mask, Tensor embedding, Tensor kernel_label, Tensor kernel_contour, int kernel_region_num, float dis_threshold) { assert(score.dim() == 2); assert(mask.dim() == 2); assert(embedding_dim.dim() == 3); int height = score.size(0); int width = score.size(1); assert(height == mask.size(0) == embedding.size(1) == kernel_label.size(1)); assert(width == mask.size(1) == embedding.size(2) == kernel_label.size(2)); auto threshold_square = dis_threshold * dis_threshold; auto ptr_score = score.data_ptr(); auto ptr_mask = mask.data_ptr(); auto ptr_kernel_contour = kernel_contour.data_ptr(); auto ptr_embedding = embedding.data_ptr(); auto ptr_kernel_label = kernel_label.data_ptr(); std::queue> contour_pixels; auto embedding_dim = embedding.size(2); std::vector> kernel_vector( kernel_region_num, std::vector(embedding_dim + 1, 0)); Tensor text_label; text_label = kernel_label.clone(); auto ptr_text_label = text_label.data_ptr(); for (int i = 0; i < height; i++) { auto ptr_embedding_tmp = ptr_embedding + i * width * embedding_dim; auto ptr_kernel_label_tmp = ptr_kernel_label + i * width; auto ptr_kernel_contour_tmp = ptr_kernel_contour + i * width; for (int j = 0, k = 0; j < width && k < width * embedding_dim; j++, k += embedding_dim) { int32_t label = ptr_kernel_label_tmp[j]; if (label > 0) { for (int d = 0; d < embedding_dim; d++) kernel_vector[label][d] += ptr_embedding_tmp[k + d]; kernel_vector[label][embedding_dim] += 1; // kernel pixel number if (ptr_kernel_contour_tmp[j]) { contour_pixels.push(std::make_tuple(i, j, label)); } } } } for (int i = 0; i < kernel_region_num; i++) { for (int j = 0; j < embedding_dim; j++) { kernel_vector[i][j] /= kernel_vector[i][embedding_dim]; } } int dx[4] = {-1, 1, 0, 0}; int dy[4] = {0, 0, -1, 1}; while (!contour_pixels.empty()) { auto query_pixel = contour_pixels.front(); contour_pixels.pop(); int y = std::get<0>(query_pixel); int x = std::get<1>(query_pixel); int32_t l = std::get<2>(query_pixel); auto kernel_cv = kernel_vector[l]; for (int idx = 0; idx < 4; idx++) { int tmpy = y + dy[idx]; int tmpx = x + dx[idx]; auto ptr_text_label_tmp = ptr_text_label + tmpy * width; if (tmpy < 0 || tmpy >= height || tmpx < 0 || tmpx >= width) continue; if (!ptr_mask[tmpy * width + tmpx] || ptr_text_label_tmp[tmpx] > 0) continue; float dis = 0; auto ptr_embedding_tmp = ptr_embedding + tmpy * width * embedding_dim; for (size_t i = 0; i < embedding_dim; i++) { dis += pow(kernel_cv[i] - ptr_embedding_tmp[tmpx * embedding_dim + i], 2); // ignore further computing if dis is big enough if (dis >= threshold_square) break; } if (dis >= threshold_square) continue; contour_pixels.push(std::make_tuple(tmpy, tmpx, l)); ptr_text_label_tmp[tmpx] = l; } } return estimate_confidence(ptr_text_label, ptr_score, kernel_region_num, height, width); } std::vector> pixel_group_impl( Tensor score, Tensor mask, Tensor embedding, Tensor kernel_label, Tensor kernel_contour, int kernel_region_num, float dis_threshold); REGISTER_DEVICE_IMPL(pixel_group_impl, CPU, pixel_group_cpu); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cpu/points_in_boxes.cpp ================================================ #include "pytorch_cpp_helper.hpp" inline void lidar_to_local_coords_cpu(float shift_x, float shift_y, float rz, float &local_x, float &local_y) { float cosa = cos(-rz), sina = sin(-rz); local_x = shift_x * cosa + shift_y * (-sina); local_y = shift_x * sina + shift_y * cosa; } inline int check_pt_in_box3d_cpu(const float *pt, const float *box3d, float &local_x, float &local_y) { // param pt: (x, y, z) // param box3d: (cx, cy, cz, x_size, y_size, z_size, rz) in LiDAR coordinate, // cz in the bottom center float x = pt[0], y = pt[1], z = pt[2]; float cx = box3d[0], cy = box3d[1], cz = box3d[2]; float x_size = box3d[3], y_size = box3d[4], z_size = box3d[5], rz = box3d[6]; cz += z_size / 2.0; // shift to the center since cz in box3d is the bottom center if (fabsf(z - cz) > z_size / 2.0) return 0; lidar_to_local_coords_cpu(x - cx, y - cy, rz, local_x, local_y); float in_flag = (local_x > -x_size / 2.0) & (local_x < x_size / 2.0) & (local_y > -y_size / 2.0) & (local_y < y_size / 2.0); return in_flag; } void points_in_boxes_cpu_forward(Tensor boxes_tensor, Tensor pts_tensor, Tensor pts_indices_tensor) { // params boxes: (N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR // coordinate, z is the bottom center, each box DO NOT overlaps params pts: // (npoints, 3) [x, y, z] in LiDAR coordinate params pts_indices: (N, npoints) CHECK_CONTIGUOUS(boxes_tensor); CHECK_CONTIGUOUS(pts_tensor); CHECK_CONTIGUOUS(pts_indices_tensor); int boxes_num = boxes_tensor.size(0); int pts_num = pts_tensor.size(0); const float *boxes = boxes_tensor.data_ptr(); const float *pts = pts_tensor.data_ptr(); int *pts_indices = pts_indices_tensor.data_ptr(); float local_x = 0, local_y = 0; for (int i = 0; i < boxes_num; i++) { for (int j = 0; j < pts_num; j++) { int cur_in_flag = check_pt_in_box3d_cpu(pts + j * 3, boxes + i * 7, local_x, local_y); pts_indices[i * pts_num + j] = cur_in_flag; } } } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cpu/psamask.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved // Modified from // https://github.com/hszhao/semseg/blob/master/lib/psa/src #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" #ifndef min #define min(a, b) (((a) < (b)) ? (a) : (b)) #endif #ifndef max #define max(a, b) (((a) > (b)) ? (a) : (b)) #endif void psamask_collect_forward(const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask, const Tensor mask_data, Tensor buffer_data) { for (int n = 0; n < num_; n++) { for (int h = 0; h < h_feature; h++) { for (int w = 0; w < w_feature; w++) { // effective mask region : [hstart, hend) x [wstart, wend) with // mask-indexed const int hstart = max(0, half_h_mask - h); const int hend = min(h_mask, h_feature + half_h_mask - h); const int wstart = max(0, half_w_mask - w); const int wend = min(w_mask, w_feature + half_w_mask - w); // (hidx, widx ) with mask-indexed // (hidx + h - half_h_mask, widx + w - half_w_mask) with // feature-indexed for (int hidx = hstart; hidx < hend; hidx++) { for (int widx = wstart; widx < wend; widx++) { buffer_data.view({-1})[(n * h_feature * w_feature + (hidx + h - half_h_mask) * w_feature + (widx + w - half_w_mask)) * h_feature * w_feature + h * w_feature + w] = mask_data.view( {-1})[((n * h_mask * w_mask + hidx * w_mask + widx) * h_feature + h) * w_feature + w]; } } } } } } void psamask_distribute_forward(const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask, const Tensor mask_data, Tensor buffer_data) { for (int n = 0; n < num_; n++) { for (int h = 0; h < h_feature; h++) { for (int w = 0; w < w_feature; w++) { // effective mask region : [hstart, hend) x [wstart, wend) with // mask-indexed const int hstart = max(0, half_h_mask - h); const int hend = min(h_mask, h_feature + half_h_mask - h); const int wstart = max(0, half_w_mask - w); const int wend = min(w_mask, w_feature + half_w_mask - w); // (hidx, widx ) with mask-indexed // (hidx + h - half_h_mask, widx + w - half_w_mask) with // feature-indexed for (int hidx = hstart; hidx < hend; hidx++) { for (int widx = wstart; widx < wend; widx++) { buffer_data.view( {-1})[(n * h_feature * w_feature + h * w_feature + w) * h_feature * w_feature + (hidx + h - half_h_mask) * w_feature + (widx + w - half_w_mask)] = mask_data.view( {-1})[((n * h_mask * w_mask + hidx * w_mask + widx) * h_feature + h) * w_feature + w]; } } } } } } void psamask_collect_backward(const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask, const Tensor buffer_diff, Tensor mask_diff) { for (int n = 0; n < num_; n++) { for (int h = 0; h < h_feature; h++) { for (int w = 0; w < w_feature; w++) { // effective mask region : [hstart, hend) x [wstart, wend) with // mask-indexed const int hstart = max(0, half_h_mask - h); const int hend = min(h_mask, h_feature + half_h_mask - h); const int wstart = max(0, half_w_mask - w); const int wend = min(w_mask, w_feature + half_w_mask - w); // (hidx, widx ) with mask-indexed // (hidx + h - half_h_mask, widx + w - half_w_mask) with // feature-indexed for (int hidx = hstart; hidx < hend; hidx++) { for (int widx = wstart; widx < wend; widx++) { mask_diff.view({-1})[((n * h_mask * w_mask + hidx * w_mask + widx) * h_feature + h) * w_feature + w] = buffer_diff.view({-1})[(n * h_feature * w_feature + (hidx + h - half_h_mask) * w_feature + (widx + w - half_w_mask)) * h_feature * w_feature + h * w_feature + w]; } } } } } } void psamask_distribute_backward(const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask, const Tensor buffer_diff, Tensor mask_diff) { for (int n = 0; n < num_; n++) { for (int h = 0; h < h_feature; h++) { for (int w = 0; w < w_feature; w++) { // effective mask region : [hstart, hend) x [wstart, wend) with // mask-indexed const int hstart = max(0, half_h_mask - h); const int hend = min(h_mask, h_feature + half_h_mask - h); const int wstart = max(0, half_w_mask - w); const int wend = min(w_mask, w_feature + half_w_mask - w); // (hidx, widx ) with mask-indexed // (hidx + h - half_h_mask, widx + w - half_w_mask) with // feature-indexed for (int hidx = hstart; hidx < hend; hidx++) { for (int widx = wstart; widx < wend; widx++) { mask_diff.view({-1})[((n * h_mask * w_mask + hidx * w_mask + widx) * h_feature + h) * w_feature + w] = buffer_diff.view( {-1})[(n * h_feature * w_feature + h * w_feature + w) * h_feature * w_feature + (hidx + h - half_h_mask) * w_feature + (widx + w - half_w_mask)]; } } } } } } void psamask_forward_cpu(const int psa_type, const Tensor input, Tensor output, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask) { if (psa_type == 0) psamask_collect_forward(num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask, input, output); else psamask_distribute_forward(num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask, input, output); } void psamask_backward_cpu(const int psa_type, const Tensor grad_output, Tensor grad_input, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask) { if (psa_type == 0) psamask_collect_backward(num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask, grad_output, grad_input); else psamask_distribute_backward(num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask, grad_output, grad_input); } void psamask_forward_impl(const int psa_type, const Tensor input, Tensor output, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask); void psamask_backward_impl(const int psa_type, const Tensor grad_output, Tensor grad_input, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask); REGISTER_DEVICE_IMPL(psamask_forward_impl, CPU, psamask_forward_cpu); REGISTER_DEVICE_IMPL(psamask_backward_impl, CPU, psamask_backward_cpu); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cpu/roi_align.cpp ================================================ // Modified from // https://github.com/facebookresearch/detectron2/tree/master/detectron2/layers/csrc/ROIAlign // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved #include #include #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" // implementation taken from Caffe2 template struct PreCalc { int pos1; int pos2; int pos3; int pos4; T w1; T w2; T w3; T w4; }; template void pre_calc_for_bilinear_interpolate( const int height, const int width, const int pooled_height, const int pooled_width, const int iy_upper, const int ix_upper, T roi_start_h, T roi_start_w, T bin_size_h, T bin_size_w, int roi_bin_grid_h, int roi_bin_grid_w, std::vector>& pre_calc) { int pre_calc_index = 0; for (int ph = 0; ph < pooled_height; ph++) { for (int pw = 0; pw < pooled_width; pw++) { for (int iy = 0; iy < iy_upper; iy++) { const T yy = roi_start_h + ph * bin_size_h + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 for (int ix = 0; ix < ix_upper; ix++) { const T xx = roi_start_w + pw * bin_size_w + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); T x = xx; T y = yy; // deal with: inverse elements are out of feature map boundary if (y < -1.0 || y > height || x < -1.0 || x > width) { // empty PreCalc pc; pc.pos1 = 0; pc.pos2 = 0; pc.pos3 = 0; pc.pos4 = 0; pc.w1 = 0; pc.w2 = 0; pc.w3 = 0; pc.w4 = 0; pre_calc[pre_calc_index] = pc; pre_calc_index += 1; continue; } if (y <= 0) { y = 0; } if (x <= 0) { x = 0; } int y_low = (int)y; int x_low = (int)x; int y_high; int x_high; if (y_low >= height - 1) { y_high = y_low = height - 1; y = (T)y_low; } else { y_high = y_low + 1; } if (x_low >= width - 1) { x_high = x_low = width - 1; x = (T)x_low; } else { x_high = x_low + 1; } T ly = y - y_low; T lx = x - x_low; T hy = 1. - ly, hx = 1. - lx; T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; // save weights and indices PreCalc pc; pc.pos1 = y_low * width + x_low; pc.pos2 = y_low * width + x_high; pc.pos3 = y_high * width + x_low; pc.pos4 = y_high * width + x_high; pc.w1 = w1; pc.w2 = w2; pc.w3 = w3; pc.w4 = w4; pre_calc[pre_calc_index] = pc; pre_calc_index += 1; } } } } } template void ROIAlignForward(const int nthreads, const T* input, const T* rois, T* output, T* argmax_y, T* argmax_x, const int pooled_height, const int pooled_width, const T spatial_scale, const int sampling_ratio, const int pool_mode, // 0 - max pool, 1 - avg pool const bool aligned, const int channels, const int height, const int width) { int n_rois = nthreads / channels / pooled_width / pooled_height; // (n, c, ph, pw) is an element in the pooled output // can be parallelized using omp // #pragma omp parallel for num_threads(32) for (int n = 0; n < n_rois; n++) { int index_n = n * channels * pooled_width * pooled_height; const T* offset_rois = rois + n * 5; int roi_batch_ind = offset_rois[0]; // Do not use rounding; this implementation detail is critical T offset = aligned ? (T)0.5 : (T)0.0; T roi_start_w = offset_rois[1] * spatial_scale - offset; T roi_start_h = offset_rois[2] * spatial_scale - offset; T roi_end_w = offset_rois[3] * spatial_scale - offset; T roi_end_h = offset_rois[4] * spatial_scale - offset; T roi_width = roi_end_w - roi_start_w; T roi_height = roi_end_h - roi_start_h; if (aligned) { AT_ASSERTM(roi_width >= 0 && roi_height >= 0, "ROIs in ROIAlign cannot have non-negative size!"); } else { // for backward-compatibility only roi_width = std::max(roi_width, (T)1.); roi_height = std::max(roi_height, (T)1.); } T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); // We use roi_bin_grid to sample the grid and mimic integral int roi_bin_grid_h = (sampling_ratio > 0) ? sampling_ratio : ceilf(roi_height / pooled_height); // e.g., = 2 int roi_bin_grid_w = (sampling_ratio > 0) ? sampling_ratio : ceilf(roi_width / pooled_width); // When the grid is empty, output zeros == 0/1, instead of NaN. const T count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4 // we want to precalculate indices and weights shared by all channels, // this is the key point of optimization std::vector> pre_calc(roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height); pre_calc_for_bilinear_interpolate( height, width, pooled_height, pooled_width, roi_bin_grid_h, roi_bin_grid_w, roi_start_h, roi_start_w, bin_size_h, bin_size_w, roi_bin_grid_h, roi_bin_grid_w, pre_calc); for (int c = 0; c < channels; c++) { int index_n_c = index_n + c * pooled_width * pooled_height; const T* offset_input = input + (roi_batch_ind * channels + c) * height * width; int pre_calc_index = 0; for (int ph = 0; ph < pooled_height; ph++) { for (int pw = 0; pw < pooled_width; pw++) { int index = index_n_c + ph * pooled_width + pw; T output_val = 0.; T maxval = -10000; T maxidx_y = -1.f, maxidx_x = -1.f; for (int iy = 0; iy < roi_bin_grid_h; iy++) { const T y = roi_start_h + ph * bin_size_h + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); for (int ix = 0; ix < roi_bin_grid_w; ix++) { const T x = roi_start_w + pw * bin_size_w + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); PreCalc pc = pre_calc[pre_calc_index]; T val = pc.w1 * offset_input[pc.pos1] + pc.w2 * offset_input[pc.pos2] + pc.w3 * offset_input[pc.pos3] + pc.w4 * offset_input[pc.pos4]; if (val > maxval) { maxval = val; maxidx_y = y; maxidx_x = x; } output_val += val; pre_calc_index += 1; } } if (pool_mode == 0) { // We do max pooling inside a bin output[index] = maxval; argmax_y[index] = maxidx_y; argmax_x[index] = maxidx_x; } else if (pool_mode == 1) { // We do average (integral) pooling inside a bin output[index] = output_val / count; } // if } // for pw } // for ph } // for c } // for n } template void bilinear_interpolate_gradient(const int height, const int width, T y, T x, T& w1, T& w2, T& w3, T& w4, int& x_low, int& x_high, int& y_low, int& y_high, const int index /* index for debug only*/) { // deal with cases that inverse elements are out of feature map boundary if (y < -1.0 || y > height || x < -1.0 || x > width) { // empty w1 = w2 = w3 = w4 = 0.; x_low = x_high = y_low = y_high = -1; return; } if (y <= 0) y = 0; if (x <= 0) x = 0; y_low = (int)y; x_low = (int)x; if (y_low >= height - 1) { y_high = y_low = height - 1; y = (T)y_low; } else { y_high = y_low + 1; } if (x_low >= width - 1) { x_high = x_low = width - 1; x = (T)x_low; } else { x_high = x_low + 1; } T ly = y - y_low; T lx = x - x_low; T hy = 1. - ly, hx = 1. - lx; // reference in forward // T v1 = input[y_low * width + x_low]; // T v2 = input[y_low * width + x_high]; // T v3 = input[y_high * width + x_low]; // T v4 = input[y_high * width + x_high]; // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; return; } template inline void add(T* address, const T& val) { *address += val; } template void ROIAlignBackward(const int nthreads, const T* grad_output, const T* rois, const T* argmax_y, const T* argmax_x, T* grad_input, const int pooled_height, const int pooled_width, const T spatial_scale, const int sampling_ratio, const int pool_mode, // 0 - max pool, 1 - avg pool const bool aligned, const int channels, const int height, const int width, const int n_stride, const int c_stride, const int h_stride, const int w_stride) { for (int index = 0; index < nthreads; index++) { // (n, c, ph, pw) is an element in the pooled output int pw = index % pooled_width; int ph = (index / pooled_width) % pooled_height; int c = (index / pooled_width / pooled_height) % channels; int n = index / pooled_width / pooled_height / channels; const T* offset_rois = rois + n * 5; int roi_batch_ind = offset_rois[0]; // Do not use rounding; this implementation detail is critical T offset = aligned ? (T)0.5 : (T)0.0; T roi_start_w = offset_rois[1] * spatial_scale - offset; T roi_start_h = offset_rois[2] * spatial_scale - offset; T roi_end_w = offset_rois[3] * spatial_scale - offset; T roi_end_h = offset_rois[4] * spatial_scale - offset; T roi_width = roi_end_w - roi_start_w; T roi_height = roi_end_h - roi_start_h; if (aligned) { AT_ASSERTM(roi_width >= 0 && roi_height >= 0, "ROIs in ROIAlign do not have non-negative size!"); } else { // for backward-compatibility only roi_width = std::max(roi_width, (T)1.); roi_height = std::max(roi_height, (T)1.); } T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); T* offset_grad_input = grad_input + ((roi_batch_ind * channels + c) * height * width); int output_offset = n * n_stride + c * c_stride; const T* offset_grad_output = grad_output + output_offset; const T grad_output_this_bin = offset_grad_output[ph * h_stride + pw * w_stride]; if (pool_mode == 0) { // We do max pooling inside a bin T y = argmax_y[index], x = argmax_x[index]; if (y != -1.f) { T w1, w2, w3, w4; int x_low, x_high, y_low, y_high; bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high, index); T g1 = grad_output_this_bin * w1; T g2 = grad_output_this_bin * w2; T g3 = grad_output_this_bin * w3; T g4 = grad_output_this_bin * w4; if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { // atomic add is not needed for now since it is single threaded add(offset_grad_input + y_low * width + x_low, static_cast(g1)); add(offset_grad_input + y_low * width + x_high, static_cast(g2)); add(offset_grad_input + y_high * width + x_low, static_cast(g3)); add(offset_grad_input + y_high * width + x_high, static_cast(g4)); } // if } // mode } else if (pool_mode == 1) { // We do average (integral) pooling inside a bin // We use roi_bin_grid to sample the grid and mimic integral int roi_bin_grid_h = (sampling_ratio > 0) ? sampling_ratio : ceilf(roi_height / pooled_height); // e.g., = 2 int roi_bin_grid_w = (sampling_ratio > 0) ? sampling_ratio : ceilf(roi_width / pooled_width); const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 for (int iy = 0; iy < roi_bin_grid_h; iy++) { const T y = roi_start_h + ph * bin_size_h + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 for (int ix = 0; ix < roi_bin_grid_w; ix++) { const T x = roi_start_w + pw * bin_size_w + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); T w1, w2, w3, w4; int x_low, x_high, y_low, y_high; bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high, index); T g1 = grad_output_this_bin * w1 / count; T g2 = grad_output_this_bin * w2 / count; T g3 = grad_output_this_bin * w3 / count; T g4 = grad_output_this_bin * w4 / count; if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { // atomic add is not needed for now since it is single threaded add(offset_grad_input + y_low * width + x_low, static_cast(g1)); add(offset_grad_input + y_low * width + x_high, static_cast(g2)); add(offset_grad_input + y_high * width + x_low, static_cast(g3)); add(offset_grad_input + y_high * width + x_high, static_cast(g4)); } // if } // ix } // iy } // mode } // for } // ROIAlignBackward void ROIAlignForwardCPULauncher(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned) { int output_size = output.numel(); int channels = input.size(1); int height = input.size(2); int width = input.size(3); AT_DISPATCH_FLOATING_TYPES_AND_HALF( input.scalar_type(), "ROIAlign_forward", [&] { ROIAlignForward( output_size, input.data_ptr(), rois.data_ptr(), output.data_ptr(), argmax_y.data_ptr(), argmax_x.data_ptr(), aligned_height, aligned_width, static_cast(spatial_scale), sampling_ratio, pool_mode, aligned, channels, height, width); }); } void ROIAlignBackwardCPULauncher(Tensor grad_output, Tensor rois, Tensor argmax_y, Tensor argmax_x, Tensor grad_input, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned) { int output_size = grad_output.numel(); int channels = grad_input.size(1); int height = grad_input.size(2); int width = grad_input.size(3); // get stride values to ensure indexing into gradients is correct. int n_stride = grad_output.stride(0); int c_stride = grad_output.stride(1); int h_stride = grad_output.stride(2); int w_stride = grad_output.stride(3); AT_DISPATCH_FLOATING_TYPES_AND_HALF( grad_output.scalar_type(), "ROIAlign_backward", [&] { ROIAlignBackward( output_size, grad_output.data_ptr(), rois.data_ptr(), argmax_y.data_ptr(), argmax_x.data_ptr(), grad_input.data_ptr(), aligned_height, aligned_width, static_cast(spatial_scale), sampling_ratio, pool_mode, aligned, channels, height, width, n_stride, c_stride, h_stride, w_stride); }); } void roi_align_forward_cpu(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned) { ROIAlignForwardCPULauncher(input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); } void roi_align_backward_cpu(Tensor grad_output, Tensor rois, Tensor argmax_y, Tensor argmax_x, Tensor grad_input, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned) { ROIAlignBackwardCPULauncher(grad_output, rois, argmax_y, argmax_x, grad_input, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); } void roi_align_forward_impl(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned); void roi_align_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax_y, Tensor argmax_x, Tensor grad_input, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned); REGISTER_DEVICE_IMPL(roi_align_forward_impl, CPU, roi_align_forward_cpu); REGISTER_DEVICE_IMPL(roi_align_backward_impl, CPU, roi_align_backward_cpu); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cpu/roi_align_rotated.cpp ================================================ // Modified from // https://github.com/facebookresearch/detectron2/tree/master/detectron2/layers/csrc/ROIAlignRotated // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved #include #include #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" // implementation taken from Caffe2 template struct PreCalc { int pos1; int pos2; int pos3; int pos4; T w1; T w2; T w3; T w4; }; template void pre_calc_for_bilinear_interpolate( const int height, const int width, const int pooled_height, const int pooled_width, const int iy_upper, const int ix_upper, T roi_start_h, T roi_start_w, T bin_size_h, T bin_size_w, int roi_bin_grid_h, int roi_bin_grid_w, T roi_center_h, T roi_center_w, T cos_theta, T sin_theta, std::vector>& pre_calc) { int pre_calc_index = 0; for (int ph = 0; ph < pooled_height; ph++) { for (int pw = 0; pw < pooled_width; pw++) { for (int iy = 0; iy < iy_upper; iy++) { const T yy = roi_start_h + ph * bin_size_h + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 for (int ix = 0; ix < ix_upper; ix++) { const T xx = roi_start_w + pw * bin_size_w + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); // Rotate by theta around the center and translate // In image space, (y, x) is the order for Right Handed System, // and this is essentially multiplying the point by a rotation matrix // to rotate it counterclockwise through angle theta. T y = yy * cos_theta - xx * sin_theta + roi_center_h; T x = yy * sin_theta + xx * cos_theta + roi_center_w; // deal with: inverse elements are out of feature map boundary if (y < -1.0 || y > height || x < -1.0 || x > width) { // empty PreCalc pc; pc.pos1 = 0; pc.pos2 = 0; pc.pos3 = 0; pc.pos4 = 0; pc.w1 = 0; pc.w2 = 0; pc.w3 = 0; pc.w4 = 0; pre_calc[pre_calc_index] = pc; pre_calc_index += 1; continue; } if (y < 0) { y = 0; } if (x < 0) { x = 0; } int y_low = (int)y; int x_low = (int)x; int y_high; int x_high; if (y_low >= height - 1) { y_high = y_low = height - 1; y = (T)y_low; } else { y_high = y_low + 1; } if (x_low >= width - 1) { x_high = x_low = width - 1; x = (T)x_low; } else { x_high = x_low + 1; } T ly = y - y_low; T lx = x - x_low; T hy = 1. - ly, hx = 1. - lx; T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; // save weights and indices PreCalc pc; pc.pos1 = y_low * width + x_low; pc.pos2 = y_low * width + x_high; pc.pos3 = y_high * width + x_low; pc.pos4 = y_high * width + x_high; pc.w1 = w1; pc.w2 = w2; pc.w3 = w3; pc.w4 = w4; pre_calc[pre_calc_index] = pc; pre_calc_index += 1; } } } } } template void ROIAlignRotatedForward(const int nthreads, const T* input, const T& spatial_scale, const bool aligned, const bool clockwise, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const int sampling_ratio, const T* rois, T* output) { int n_rois = nthreads / channels / pooled_width / pooled_height; // (n, c, ph, pw) is an element in the pooled output // can be parallelized using omp // #pragma omp parallel for num_threads(32) for (int n = 0; n < n_rois; n++) { int index_n = n * channels * pooled_width * pooled_height; const T* current_roi = rois + n * 6; int roi_batch_ind = current_roi[0]; // Do not use rounding; this implementation detail is critical T offset = aligned ? (T)0.5 : (T)0.0; T roi_center_w = current_roi[1] * spatial_scale - offset; T roi_center_h = current_roi[2] * spatial_scale - offset; T roi_width = current_roi[3] * spatial_scale; T roi_height = current_roi[4] * spatial_scale; T theta = current_roi[5]; if (clockwise) { theta = -theta; // If clockwise, the angle needs to be reversed. } T cos_theta = cos(theta); T sin_theta = sin(theta); if (aligned) { AT_ASSERTM(roi_width >= 0 && roi_height >= 0, "ROIs in ROIAlignRotated do not have non-negative size!"); } else { // for backward-compatibility only roi_width = std::max(roi_width, (T)1.); roi_height = std::max(roi_height, (T)1.); } T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); // We use roi_bin_grid to sample the grid and mimic integral int roi_bin_grid_h = (sampling_ratio > 0) ? sampling_ratio : ceilf(roi_height / pooled_height); // e.g., = 2 int roi_bin_grid_w = (sampling_ratio > 0) ? sampling_ratio : ceilf(roi_width / pooled_width); // We do average (integral) pooling inside a bin const T count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4 // we want to precalculate indices and weights shared by all channels, // this is the key point of optimization std::vector> pre_calc(roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height); // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). // Appropriate translation needs to be applied after. T roi_start_h = -roi_height / 2.0; T roi_start_w = -roi_width / 2.0; pre_calc_for_bilinear_interpolate( height, width, pooled_height, pooled_width, roi_bin_grid_h, roi_bin_grid_w, roi_start_h, roi_start_w, bin_size_h, bin_size_w, roi_bin_grid_h, roi_bin_grid_w, roi_center_h, roi_center_w, cos_theta, sin_theta, pre_calc); for (int c = 0; c < channels; c++) { int index_n_c = index_n + c * pooled_width * pooled_height; const T* offset_input = input + (roi_batch_ind * channels + c) * height * width; int pre_calc_index = 0; for (int ph = 0; ph < pooled_height; ph++) { for (int pw = 0; pw < pooled_width; pw++) { int index = index_n_c + ph * pooled_width + pw; T output_val = 0.; for (int iy = 0; iy < roi_bin_grid_h; iy++) { for (int ix = 0; ix < roi_bin_grid_w; ix++) { PreCalc pc = pre_calc[pre_calc_index]; output_val += pc.w1 * offset_input[pc.pos1] + pc.w2 * offset_input[pc.pos2] + pc.w3 * offset_input[pc.pos3] + pc.w4 * offset_input[pc.pos4]; pre_calc_index += 1; } } output_val /= count; output[index] = output_val; } // for pw } // for ph } // for c } // for n } template void bilinear_interpolate_gradient(const int height, const int width, T y, T x, T& w1, T& w2, T& w3, T& w4, int& x_low, int& x_high, int& y_low, int& y_high) { // deal with cases that inverse elements are out of feature map boundary if (y < -1.0 || y > height || x < -1.0 || x > width) { // empty w1 = w2 = w3 = w4 = 0.; x_low = x_high = y_low = y_high = -1; return; } if (y < 0) { y = 0; } if (x < 0) { x = 0; } y_low = (int)y; x_low = (int)x; if (y_low >= height - 1) { y_high = y_low = height - 1; y = (T)y_low; } else { y_high = y_low + 1; } if (x_low >= width - 1) { x_high = x_low = width - 1; x = (T)x_low; } else { x_high = x_low + 1; } T ly = y - y_low; T lx = x - x_low; T hy = 1. - ly, hx = 1. - lx; // reference in forward // T v1 = input[y_low * width + x_low]; // T v2 = input[y_low * width + x_high]; // T v3 = input[y_high * width + x_low]; // T v4 = input[y_high * width + x_high]; // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; return; } template inline void add(T* address, const T& val) { *address += val; } template void ROIAlignRotatedBackward( const int nthreads, // may not be contiguous. should index using n_stride, etc const T* grad_output, const T& spatial_scale, const bool aligned, const bool clockwise, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const int sampling_ratio, T* grad_input, const T* rois, const int n_stride, const int c_stride, const int h_stride, const int w_stride) { for (int index = 0; index < nthreads; index++) { // (n, c, ph, pw) is an element in the pooled output int pw = index % pooled_width; int ph = (index / pooled_width) % pooled_height; int c = (index / pooled_width / pooled_height) % channels; int n = index / pooled_width / pooled_height / channels; const T* current_roi = rois + n * 6; int roi_batch_ind = current_roi[0]; // Do not use rounding; this implementation detail is critical T offset = aligned ? (T)0.5 : (T)0.0; T roi_center_w = current_roi[1] * spatial_scale - offset; T roi_center_h = current_roi[2] * spatial_scale - offset; T roi_width = current_roi[3] * spatial_scale; T roi_height = current_roi[4] * spatial_scale; T theta = current_roi[5]; if (clockwise) { theta = -theta; // If clockwise, the angle needs to be reversed. } T cos_theta = cos(theta); T sin_theta = sin(theta); if (aligned) { AT_ASSERTM(roi_width >= 0 && roi_height >= 0, "ROIs in ROIAlignRotated do not have non-negative size!"); } else { // for backward-compatibility only roi_width = std::max(roi_width, (T)1.); roi_height = std::max(roi_height, (T)1.); } T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); T* offset_grad_input = grad_input + ((roi_batch_ind * channels + c) * height * width); int output_offset = n * n_stride + c * c_stride; const T* offset_grad_output = grad_output + output_offset; const T grad_output_this_bin = offset_grad_output[ph * h_stride + pw * w_stride]; // We use roi_bin_grid to sample the grid and mimic integral int roi_bin_grid_h = (sampling_ratio > 0) ? sampling_ratio : ceilf(roi_height / pooled_height); // e.g., = 2 int roi_bin_grid_w = (sampling_ratio > 0) ? sampling_ratio : ceilf(roi_width / pooled_width); // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). // Appropriate translation needs to be applied after. T roi_start_h = -roi_height / 2.0; T roi_start_w = -roi_width / 2.0; // We do average (integral) pooling inside a bin const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 for (int iy = 0; iy < roi_bin_grid_h; iy++) { const T yy = roi_start_h + ph * bin_size_h + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 for (int ix = 0; ix < roi_bin_grid_w; ix++) { const T xx = roi_start_w + pw * bin_size_w + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); // Rotate by theta around the center and translate T y = yy * cos_theta - xx * sin_theta + roi_center_h; T x = yy * sin_theta + xx * cos_theta + roi_center_w; T w1, w2, w3, w4; int x_low, x_high, y_low, y_high; bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high); T g1 = grad_output_this_bin * w1 / count; T g2 = grad_output_this_bin * w2 / count; T g3 = grad_output_this_bin * w3 / count; T g4 = grad_output_this_bin * w4 / count; if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { // atomic add is not needed for now since it is single threaded add(offset_grad_input + y_low * width + x_low, static_cast(g1)); add(offset_grad_input + y_low * width + x_high, static_cast(g2)); add(offset_grad_input + y_high * width + x_low, static_cast(g3)); add(offset_grad_input + y_high * width + x_high, static_cast(g4)); } // if } // ix } // iy } // for } // ROIAlignRotatedBackward void ROIAlignRotatedForwardCPULauncher(Tensor input, Tensor rois, Tensor output, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, bool aligned, bool clockwise) { int output_size = output.numel(); int channels = input.size(1); int height = input.size(2); int width = input.size(3); AT_DISPATCH_FLOATING_TYPES_AND_HALF( input.scalar_type(), "ROIAlignRotated_forward", [&] { ROIAlignRotatedForward( output_size, input.data_ptr(), static_cast(spatial_scale), aligned, clockwise, channels, height, width, aligned_height, aligned_width, sampling_ratio, rois.data_ptr(), output.data_ptr()); }); } void ROIAlignRotatedBackwardCPULauncher(Tensor grad_output, Tensor rois, Tensor grad_input, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, bool aligned, bool clockwise) { int channels = grad_input.size(1); int height = grad_input.size(2); int width = grad_input.size(3); // get stride values to ensure indexing into gradients is correct. int n_stride = grad_output.stride(0); int c_stride = grad_output.stride(1); int h_stride = grad_output.stride(2); int w_stride = grad_output.stride(3); AT_DISPATCH_FLOATING_TYPES_AND_HALF( grad_output.scalar_type(), "ROIAlignRotated_backward", [&] { ROIAlignRotatedBackward( grad_output.numel(), grad_output.data_ptr(), static_cast(spatial_scale), aligned, clockwise, channels, height, width, aligned_height, aligned_width, sampling_ratio, grad_input.data_ptr(), rois.data_ptr(), n_stride, c_stride, h_stride, w_stride); }); } void roi_align_rotated_forward_cpu(Tensor input, Tensor rois, Tensor output, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, bool aligned, bool clockwise) { ROIAlignRotatedForwardCPULauncher(input, rois, output, aligned_height, aligned_width, spatial_scale, sampling_ratio, aligned, clockwise); } void roi_align_rotated_backward_cpu(Tensor top_grad, Tensor rois, Tensor bottom_grad, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, bool aligned, bool clockwise) { int size_rois = rois.size(1); if (size_rois != 6) { AT_ERROR("wrong roi size"); } ROIAlignRotatedBackwardCPULauncher( top_grad, rois, bottom_grad, aligned_height, aligned_width, spatial_scale, sampling_ratio, aligned, clockwise); } void roi_align_rotated_forward_impl(Tensor features, Tensor rois, Tensor output, int aligned_height, int aligned_width, float spatial_scale, int sample_ratio, bool aligned, bool clockwise); void roi_align_rotated_backward_impl(Tensor top_grad, Tensor rois, Tensor bottom_grad, int aligned_height, int aligned_width, float spatial_scale, int sample_ratio, bool aligned, bool clockwise); REGISTER_DEVICE_IMPL(roi_align_rotated_forward_impl, CPU, roi_align_rotated_forward_cpu); REGISTER_DEVICE_IMPL(roi_align_rotated_backward_impl, CPU, roi_align_rotated_backward_cpu); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cpu/voxelization.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved. #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" template void dynamic_voxelize_forward_cpu_kernel( const torch::TensorAccessor points, torch::TensorAccessor coors, const std::vector voxel_size, const std::vector coors_range, const std::vector grid_size, const int num_points, const int num_features, const int NDim) { const int ndim_minus_1 = NDim - 1; bool failed = false; // int coor[NDim]; int* coor = new int[NDim](); int c; for (int i = 0; i < num_points; ++i) { failed = false; for (int j = 0; j < NDim; ++j) { c = floor((points[i][j] - coors_range[j]) / voxel_size[j]); // necessary to rm points out of range if ((c < 0 || c >= grid_size[j])) { failed = true; break; } coor[ndim_minus_1 - j] = c; } if (failed) memset(&coors[i][0], -1, NDim * sizeof(T_int)); else memcpy(&coors[i][0], &coor[0], NDim * sizeof(T_int)); } delete[] coor; } template void hard_voxelize_forward_cpu_kernel( const torch::TensorAccessor points, torch::TensorAccessor voxels, torch::TensorAccessor coors, torch::TensorAccessor num_points_per_voxel, torch::TensorAccessor coor_to_voxelidx, int& voxel_num, const std::vector voxel_size, const std::vector coors_range, const std::vector grid_size, const int max_points, const int max_voxels, const int num_points, const int num_features, const int NDim) { // declare a temp coors at::Tensor temp_coors = at::zeros( {num_points, NDim}, at::TensorOptions().dtype(at::kInt).device(at::kCPU)); // First use dynamic voxelization to get coors, // then check max points/voxels constraints dynamic_voxelize_forward_cpu_kernel( points, temp_coors.accessor(), voxel_size, coors_range, grid_size, num_points, num_features, NDim); int voxelidx, num; auto coor = temp_coors.accessor(); for (int i = 0; i < num_points; ++i) { // T_int* coor = temp_coors.data_ptr() + i * NDim; if (coor[i][0] == -1) continue; voxelidx = coor_to_voxelidx[coor[i][0]][coor[i][1]][coor[i][2]]; // record voxel if (voxelidx == -1) { voxelidx = voxel_num; if (max_voxels != -1 && voxel_num >= max_voxels) continue; voxel_num += 1; coor_to_voxelidx[coor[i][0]][coor[i][1]][coor[i][2]] = voxelidx; memcpy(&coors[voxelidx][0], &coor[i][0], NDim * sizeof(T_int)); } // put points into voxel num = num_points_per_voxel[voxelidx]; if (max_points == -1 || num < max_points) { memcpy(&voxels[voxelidx][num][0], &points[i][0], num_features * sizeof(T)); num_points_per_voxel[voxelidx] += 1; } } return; } void dynamic_voxelize_forward_cpu(const at::Tensor& points, at::Tensor& coors, const std::vector voxel_size, const std::vector coors_range, const int NDim = 3) { // check device AT_ASSERTM(points.device().is_cpu(), "points must be a CPU tensor"); std::vector grid_size(NDim); const int num_points = points.size(0); const int num_features = points.size(1); for (int i = 0; i < NDim; ++i) { grid_size[i] = round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]); } // coors, num_points_per_voxel, coor_to_voxelidx are int Tensor AT_DISPATCH_FLOATING_TYPES_AND_HALF( points.scalar_type(), "dynamic_voxelize_forward_cpu_kernel", [&] { dynamic_voxelize_forward_cpu_kernel( points.accessor(), coors.accessor(), voxel_size, coors_range, grid_size, num_points, num_features, NDim); }); } int hard_voxelize_forward_cpu(const at::Tensor& points, at::Tensor& voxels, at::Tensor& coors, at::Tensor& num_points_per_voxel, const std::vector voxel_size, const std::vector coors_range, const int max_points, const int max_voxels, const int NDim = 3) { // current version tooks about 0.02s_0.03s for one frame on cpu // check device AT_ASSERTM(points.device().is_cpu(), "points must be a CPU tensor"); std::vector grid_size(NDim); const int num_points = points.size(0); const int num_features = points.size(1); for (int i = 0; i < NDim; ++i) { grid_size[i] = round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]); } // coors, num_points_per_voxel, coor_to_voxelidx are int Tensor // printf("cpu coor_to_voxelidx size: [%d, %d, %d]\n", grid_size[2], // grid_size[1], grid_size[0]); at::Tensor coor_to_voxelidx = -at::ones({grid_size[2], grid_size[1], grid_size[0]}, coors.options()); int voxel_num = 0; AT_DISPATCH_FLOATING_TYPES_AND_HALF( points.scalar_type(), "hard_voxelize_forward_cpu_kernel", [&] { hard_voxelize_forward_cpu_kernel( points.accessor(), voxels.accessor(), coors.accessor(), num_points_per_voxel.accessor(), coor_to_voxelidx.accessor(), voxel_num, voxel_size, coors_range, grid_size, max_points, max_voxels, num_points, num_features, NDim); }); return voxel_num; } int hard_voxelize_forward_impl(const at::Tensor& points, at::Tensor& voxels, at::Tensor& coors, at::Tensor& num_points_per_voxel, const std::vector voxel_size, const std::vector coors_range, const int max_points, const int max_voxels, const int NDim); void dynamic_voxelize_forward_impl(const at::Tensor& points, at::Tensor& coors, const std::vector voxel_size, const std::vector coors_range, const int NDim); REGISTER_DEVICE_IMPL(hard_voxelize_forward_impl, CPU, hard_voxelize_forward_cpu); REGISTER_DEVICE_IMPL(dynamic_voxelize_forward_impl, CPU, dynamic_voxelize_forward_cpu); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/active_rotated_filter_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved. // Modified from // https://github.com/csuhan/s2anet/blob/master/mmdet/ops/orn/src/cuda/ActiveRotatingFilter_cuda.cu #include "active_rotated_filter_cuda_kernel.cuh" #include "pytorch_cuda_helper.hpp" void ActiveRotatedFilterForwardCUDAKernelLauncher(const Tensor input, const Tensor indices, Tensor output) { int num_output_planes = input.size(0); int num_input_planes = input.size(1); int num_orientations = input.size(2); int kH = input.size(3); int kW = input.size(4); int num_rotations = indices.size(3); int nEntry = num_orientations * kH * kW; int output_size = input.numel(); at::cuda::CUDAGuard device_guard(input.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( input.scalar_type(), "active_rotated_filter_forward_cuda_kernel", [&] { active_rotated_filter_forward_cuda_kernel <<>>( output_size, input.data_ptr(), indices.data_ptr(), num_input_planes, num_output_planes, num_orientations, num_rotations, nEntry, output.data_ptr()); }); AT_CUDA_CHECK(cudaGetLastError()); } void ActiveRotatedFilterBackwardCUDAKernelLauncher(const Tensor grad_out, const Tensor indices, Tensor grad_in) { int num_orientations = indices.size(0); int kH = indices.size(1); int kW = indices.size(2); int num_rotations = indices.size(3); int num_output_planes = grad_out.size(0) / num_rotations; int num_input_planes = grad_out.size(1) / num_orientations; int nEntry = num_orientations * kH * kW; int output_size = grad_in.numel(); at::cuda::CUDAGuard device_guard(indices.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( grad_out.scalar_type(), "active_rotated_filter_backward_cuda_kernel", [&] { active_rotated_filter_backward_cuda_kernel <<>>( output_size, grad_out.data_ptr(), indices.data_ptr(), num_input_planes, num_output_planes, num_orientations, num_rotations, nEntry, grad_in.data_ptr()); }); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/assign_score_withk_cuda.cu ================================================ // Modified from // https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/paconv_lib/src/gpu #include #include #include "assign_score_withk_cuda_kernel.cuh" #include "pytorch_cuda_helper.hpp" void AssignScoreWithKForwardCUDAKernelLauncher( int B, int N0, int N1, int M, int K, int O, int aggregate, const Tensor& points, const Tensor& centers, const Tensor& scores, const Tensor& knn_idx, Tensor& output) { at::cuda::CUDAGuard device_guard(points.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 blocks(GET_BLOCKS(B * O * N1 * K, THREADS_PER_BLOCK)); dim3 threads(THREADS_PER_BLOCK); AT_DISPATCH_FLOATING_TYPES_AND_HALF( points.scalar_type(), "assign_score_withk_forward_cuda_kernel", [&] { assign_score_withk_forward_cuda_kernel <<>>( B, N0, N1, M, K, O, aggregate, points.data_ptr(), centers.data_ptr(), scores.data_ptr(), knn_idx.data_ptr(), output.data_ptr()); }); AT_CUDA_CHECK(cudaGetLastError()); } void AssignScoreWithKBackwardCUDAKernelLauncher( int B, int N0, int N1, int M, int K, int O, int aggregate, const Tensor& grad_out, const Tensor& points, const Tensor& centers, const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points, Tensor& grad_centers, Tensor& grad_scores) { at::cuda::CUDAGuard device_guard(grad_out.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 blocks1(GET_BLOCKS(B * M * O, THREADS_PER_BLOCK)); dim3 threads1(THREADS_PER_BLOCK); dim3 blocks2(GET_BLOCKS(B * N1 * K * M, THREADS_PER_BLOCK)); dim3 threads2(THREADS_PER_BLOCK); AT_DISPATCH_FLOATING_TYPES_AND_HALF( grad_out.scalar_type(), "assign_score_withk_points_backward_cuda_kernel", [&] { assign_score_withk_points_backward_cuda_kernel <<>>( B, N0, N1, M, K, O, aggregate, grad_out.data_ptr(), scores.data_ptr(), knn_idx.data_ptr(), grad_points.data_ptr(), grad_centers.data_ptr()); }); AT_DISPATCH_FLOATING_TYPES_AND_HALF( grad_out.scalar_type(), "assign_score_withk_scores_backward_cuda_kernel", [&] { assign_score_withk_scores_backward_cuda_kernel <<>>( B, N0, N1, M, K, O, aggregate, grad_out.data_ptr(), points.data_ptr(), centers.data_ptr(), knn_idx.data_ptr(), grad_scores.data_ptr()); }); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/ball_query_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved // Modified from // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/ball_query_gpu.cu #include #include #include #include "ball_query_cuda_kernel.cuh" #include "pytorch_cuda_helper.hpp" void BallQueryForwardCUDAKernelLauncher(int b, int n, int m, float min_radius, float max_radius, int nsample, const Tensor new_xyz, const Tensor xyz, Tensor idx) { // new_xyz: (B, M, 3) // xyz: (B, N, 3) // output: // idx: (B, M, nsample) at::cuda::CUDAGuard device_guard(new_xyz.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); // blockIdx.x(col), blockIdx.y(row) dim3 blocks(GET_BLOCKS(m, THREADS_PER_BLOCK), b); dim3 threads(THREADS_PER_BLOCK); AT_DISPATCH_FLOATING_TYPES_AND_HALF( new_xyz.scalar_type(), "ball_query_forward_cuda_kernel", [&] { ball_query_forward_cuda_kernel <<>>( b, n, m, min_radius, max_radius, nsample, new_xyz.data_ptr(), xyz.data_ptr(), idx.data_ptr()); }); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/bbox_overlaps_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "bbox_overlaps_cuda_kernel.cuh" #include "pytorch_cuda_helper.hpp" void BBoxOverlapsCUDAKernelLauncher(const Tensor bboxes1, const Tensor bboxes2, Tensor ious, const int mode, const bool aligned, const int offset) { int output_size = ious.numel(); int num_bbox1 = bboxes1.size(0); int num_bbox2 = bboxes2.size(0); at::cuda::CUDAGuard device_guard(bboxes1.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( bboxes1.scalar_type(), "bbox_overlaps_cuda_kernel", ([&] { bbox_overlaps_cuda_kernel <<>>( bboxes1.data_ptr(), bboxes2.data_ptr(), ious.data_ptr(), num_bbox1, num_bbox2, mode, aligned, offset); })); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/border_align_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "border_align_cuda_kernel.cuh" #include "pytorch_cuda_helper.hpp" void BorderAlignForwardCUDAKernelLauncher(const Tensor &input, const Tensor &boxes, Tensor output, Tensor argmax_idx, const int pool_size) { // shape assertion AT_ASSERTM(input.ndimension() == 4, "non-empty 4D(batch mode) tensor expected for input feature"); AT_ASSERTM(boxes.ndimension() == 3, "boxes must be 3D tensor with size of [B, H*W, 4]"); int batch_size = input.size(0); int feat_channels = input.size(1); int channels = feat_channels / 4; int height = input.size(2); int width = input.size(3); // shape [N, box_size, 4] for boxes. (x1, y1, x2, y2) format int box_size = boxes.size(1); // shape [N, channels, box_size, 4] for output int nthreads = batch_size * channels * box_size; at::cuda::CUDAGuard device_guard(input.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 block(128, 4); AT_DISPATCH_FLOATING_TYPES_AND_HALF( input.scalar_type(), "border_align_forward_cuda_kernel", [&] { border_align_forward_cuda_kernel <<>>( nthreads, input.data_ptr(), boxes.data_ptr(), output.data_ptr(), argmax_idx.data_ptr(), channels, box_size, height, width, pool_size); }); AT_CUDA_CHECK(cudaGetLastError()); } void BorderAlignBackwardCUDAKernelLauncher(const Tensor &grad_output, const Tensor &boxes, const Tensor &argmax_idx, Tensor grad_input, const int pool_size) { int batch_size = grad_input.size(0); int feat_channels = grad_input.size(1); int channels = feat_channels / 4; int height = grad_input.size(2); int width = grad_input.size(3); int box_size = boxes.size(1); int nthreads = batch_size * channels * box_size; at::cuda::CUDAGuard device_guard(grad_output.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 block(128, 4); AT_DISPATCH_FLOATING_TYPES_AND_HALF( grad_output.scalar_type(), "border_align_backward_cuda_kernel", [&] { border_align_backward_cuda_kernel <<>>( nthreads, grad_output.data_ptr(), boxes.data_ptr(), argmax_idx.data_ptr(), grad_input.data_ptr(), channels, box_size, height, width, pool_size); }); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/box_iou_rotated_cuda.cu ================================================ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved // modified from // https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu #include "box_iou_rotated_cuda.cuh" #include "pytorch_cuda_helper.hpp" void box_iou_rotated_cuda(const Tensor boxes1, const Tensor boxes2, Tensor ious, const int mode_flag, const bool aligned) { using scalar_t = float; AT_ASSERTM(boxes1.is_cuda(), "boxes1 must be a CUDA tensor"); AT_ASSERTM(boxes2.is_cuda(), "boxes2 must be a CUDA tensor"); int output_size = ious.numel(); int num_boxes1 = boxes1.size(0); int num_boxes2 = boxes2.size(0); at::cuda::CUDAGuard device_guard(boxes1.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); box_iou_rotated_cuda_kernel <<>>( num_boxes1, num_boxes2, boxes1.data_ptr(), boxes2.data_ptr(), (scalar_t*)ious.data_ptr(), mode_flag, aligned); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/carafe_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "carafe_cuda_kernel.cuh" #include "pytorch_cuda_helper.hpp" void CARAFEForwardCUDAKernelLauncher(const Tensor features, const Tensor masks, Tensor rfeatures, Tensor routput, Tensor rmasks, Tensor output, const int kernel_size, const int group_size, const int scale_factor) { const int batch_size = output.size(0); const int channels = output.size(1); const int output_height = output.size(2); const int output_width = output.size(3); const int input_height = features.size(2); const int input_width = features.size(3); const int mask_channels = masks.size(1); rfeatures.resize_({batch_size, input_height, input_width, channels}); routput.resize_({batch_size, output_height, output_width, channels}); rmasks.resize_({batch_size, output_height, output_width, mask_channels}); // one warp per pixel at::cuda::CUDAGuard device_guard(features.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( features.scalar_type(), "NCHW2NHWC_Feature", ([&] { const scalar_t *bottom_data = features.data_ptr(); scalar_t *top_data = rfeatures.data_ptr(); const int dh = divideUP(channels, kTileDim); const int dw = divideUP(input_height * input_width, kTileDim); BatchTranspose2DCUDAKernel <<>>( batch_size, channels, input_height * input_width, dh, dw, bottom_data, top_data); })); AT_DISPATCH_FLOATING_TYPES_AND_HALF( features.scalar_type(), "NCHW2NHWC_Masks", ([&] { const scalar_t *bottom_data = masks.data_ptr(); scalar_t *top_data = rmasks.data_ptr(); const int dh = divideUP(mask_channels, kTileDim); const int dw = divideUP(output_height * output_width, kTileDim); BatchTranspose2DCUDAKernel <<>>( batch_size, mask_channels, output_height * output_width, dh, dw, bottom_data, top_data); })); AT_DISPATCH_FLOATING_TYPES_AND_HALF( features.scalar_type(), "CARAFELaucherForward", ([&] { const int num_kernels = batch_size * output_height * output_width * THREADS_PER_PIXEL; const scalar_t *bottom_data = rfeatures.data_ptr(); const scalar_t *bottom_masks = rmasks.data_ptr(); scalar_t *top_data = routput.data_ptr(); CARAFEForward<<>>( num_kernels, bottom_data, bottom_masks, kernel_size, group_size, scale_factor, channels, input_height, input_width, output_height, output_width, mask_channels, top_data); })); AT_DISPATCH_FLOATING_TYPES_AND_HALF( features.scalar_type(), "NHWC2NCHW", ([&] { const scalar_t *bottom_data = routput.data_ptr(); scalar_t *top_data = output.data_ptr(); const int dh = divideUP(output_height * output_width, kTileDim); const int dw = divideUP(channels, kTileDim); BatchTranspose2DCUDAKernel <<>>( batch_size, output_height * output_width, channels, dh, dw, bottom_data, top_data); })); AT_CUDA_CHECK(cudaGetLastError()); } void CARAFEBackwardCUDAKernelLauncher( const Tensor top_grad, const Tensor rfeatures, const Tensor masks, Tensor rtop_grad, Tensor rbottom_grad_hs, Tensor rbottom_grad, Tensor rmask_grad, Tensor bottom_grad, Tensor mask_grad, const int kernel_size, const int group_size, const int scale_factor) { const int batch_size = top_grad.size(0); const int channels = top_grad.size(1); const int output_height = top_grad.size(2); const int output_width = top_grad.size(3); const int input_height = bottom_grad.size(2); const int input_width = bottom_grad.size(3); const int mask_channels = masks.size(1); rtop_grad.resize_({batch_size, output_height, output_width, channels}); rbottom_grad.resize_({batch_size, input_height, input_width, channels}); rbottom_grad_hs.resize_({batch_size, output_height, output_width, channels}); rmask_grad.resize_({batch_size, output_height, output_width, mask_channels}); at::cuda::CUDAGuard device_guard(top_grad.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( top_grad.scalar_type(), "NCHW2NHWC_Top_Grad", ([&] { const scalar_t *bottom_data = top_grad.data_ptr(); scalar_t *top_data = rtop_grad.data_ptr(); const int dh = divideUP(channels, kTileDim); const int dw = divideUP(output_height * output_width, kTileDim); BatchTranspose2DCUDAKernel <<>>( batch_size, channels, output_height * output_width, dh, dw, bottom_data, top_data); })); AT_DISPATCH_FLOATING_TYPES_AND_HALF( top_grad.scalar_type(), "CARAFELaucherBackward_Feature", ([&] { const int num_kernels = batch_size * output_height * output_width * THREADS_PER_PIXEL; const scalar_t *top_diff = rtop_grad.data_ptr(); const scalar_t *bottom_masks = masks.data_ptr(); scalar_t *bottom_diff = rbottom_grad_hs.data_ptr(); CARAFEBackward_Feature <<>>(num_kernels, top_diff, bottom_masks, kernel_size, group_size, scale_factor, channels, input_height, input_width, output_height, output_width, mask_channels, bottom_diff); })); AT_DISPATCH_FLOATING_TYPES_AND_HALF( top_grad.scalar_type(), "FeatureSum", ([&] { const int num_kernels = batch_size * input_height * input_width * THREADS_PER_PIXEL; const scalar_t *bottom_diff_hs = rbottom_grad_hs.data_ptr(); scalar_t *bottom_diff = rbottom_grad.data_ptr(); FeatureSum <<>>(num_kernels, bottom_diff_hs, scale_factor, channels, input_height, input_width, bottom_diff); })); AT_DISPATCH_FLOATING_TYPES_AND_HALF( top_grad.scalar_type(), "NHWC2NCHW_Bottom_Grad", ([&] { const scalar_t *bottom_data = rbottom_grad.data_ptr(); scalar_t *top_data = bottom_grad.data_ptr(); const int dh = divideUP(input_height * input_width, kTileDim); const int dw = divideUP(channels, kTileDim); BatchTranspose2DCUDAKernel <<>>( batch_size, input_height * input_width, channels, dh, dw, bottom_data, top_data); })); AT_DISPATCH_FLOATING_TYPES_AND_HALF( top_grad.scalar_type(), "CARAFELaucherBackward_Mask", ([&] { const int num_kernels = batch_size * output_height * output_width * mask_channels * WARP_SIZE; const scalar_t *top_diff = rtop_grad.data_ptr(); const scalar_t *bottom_data = rfeatures.data_ptr(); scalar_t *mask_diff = rmask_grad.data_ptr(); CARAFEBackward_Mask <<>>(num_kernels, top_diff, bottom_data, kernel_size, group_size, scale_factor, channels, input_height, input_width, output_height, output_width, mask_channels, mask_diff); })); AT_DISPATCH_FLOATING_TYPES_AND_HALF( top_grad.scalar_type(), "NHWC2NCHW_Mask_Grad", ([&] { const scalar_t *bottom_data = rmask_grad.data_ptr(); scalar_t *top_data = mask_grad.data_ptr(); const int dh = divideUP(output_height * output_width, kTileDim); const int dw = divideUP(mask_channels, kTileDim); BatchTranspose2DCUDAKernel <<>>( batch_size, output_height * output_width, mask_channels, dh, dw, bottom_data, top_data); })); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/carafe_naive_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "carafe_naive_cuda_kernel.cuh" #include "pytorch_cuda_helper.hpp" void CARAFENAIVEForwardCUDAKernelLauncher(const Tensor features, const Tensor masks, Tensor output, const int kernel_size, const int group_size, const int scale_factor) { int output_size = output.numel(); int channels = output.size(1); int height = output.size(2); int width = output.size(3); at::cuda::CUDAGuard device_guard(features.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( features.scalar_type(), "CARAFENAIVEForward", ([&] { carafe_naive_forward_cuda_kernel <<>>( output_size, features.data_ptr(), masks.data_ptr(), output.data_ptr(), kernel_size, group_size, scale_factor, channels, height, width); })); AT_CUDA_CHECK(cudaGetLastError()); } void CARAFENAIVEBackwardCUDAKernelLauncher( const Tensor top_grad, const Tensor features, const Tensor masks, Tensor bottom_grad, Tensor mask_grad, const int kernel_size, const int group_size, const int scale_factor) { int output_size = top_grad.numel(); int channels = top_grad.size(1); int height = top_grad.size(2); int width = top_grad.size(3); at::cuda::CUDAGuard device_guard(top_grad.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( top_grad.scalar_type(), "CARAFENAIVEBackward", ([&] { carafe_naive_backward_cuda_kernel <<>>( output_size, top_grad.data_ptr(), features.data_ptr(), masks.data_ptr(), bottom_grad.data_ptr(), mask_grad.data_ptr(), kernel_size, group_size, scale_factor, channels, height, width); })); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/convex_iou.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved // modified from // https://github.com/SDL-GuoZonghao/BeyondBoundingBox/blob/main/mmdet/ops/iou/src/convex_iou_kernel.cu #include "convex_iou_cuda_kernel.cuh" #include "pytorch_cuda_helper.hpp" void ConvexIoUCUDAKernelLauncher(const Tensor pointsets, const Tensor polygons, Tensor ious) { int output_size = ious.numel(); int num_pointsets = pointsets.size(0); int num_polygons = polygons.size(0); at::cuda::CUDAGuard device_guard(pointsets.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( pointsets.scalar_type(), "convex_iou_cuda_kernel", ([&] { convex_iou_cuda_kernel <<>>( num_pointsets, num_polygons, pointsets.data_ptr(), polygons.data_ptr(), ious.data_ptr()); })); AT_CUDA_CHECK(cudaGetLastError()); } void ConvexGIoUCUDAKernelLauncher(const Tensor pointsets, const Tensor polygons, Tensor output) { int output_size = output.numel(); int num_pointsets = pointsets.size(0); int num_polygons = polygons.size(0); at::cuda::CUDAGuard device_guard(pointsets.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( pointsets.scalar_type(), "convex_giou_cuda_kernel", ([&] { convex_giou_cuda_kernel <<>>( num_pointsets, num_polygons, pointsets.data_ptr(), polygons.data_ptr(), output.data_ptr()); })); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/correlation_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved. // Modified from // https://github.com/ClementPinard/Pytorch-Correlation-extension/blob/master/Correlation_Module/correlation_cuda_kernel.cu // Original licence: Under MIT License #include "correlation_cuda.cuh" #include "pytorch_cuda_helper.hpp" void CorrelationForwardCUDAKernelLauncher(Tensor input1, Tensor input2, Tensor output, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW) { const int batch_size = input1.size(0); const int iH = input1.size(2); const int iW = input1.size(3); const int dilatedKH = (kH - 1) * dilationH + 1; const int dilatedKW = (kW - 1) * dilationW + 1; const auto oH = (iH + 2 * padH - dilatedKH) / dH + 1; const auto oW = (iW + 2 * padW - dilatedKW) / dW + 1; auto trInput1 = input1.permute({0, 2, 3, 1}).contiguous(); auto trInput2 = input2.permute({0, 2, 3, 1}).contiguous(); const int threads = THREADS_FORWARD; const dim3 blocks(batch_size, oH, oW); at::cuda::CUDAGuard device_guard(input1.device()); AT_DISPATCH_FLOATING_TYPES_AND_HALF( input1.scalar_type(), "correlation_forward_cuda", ([&] { TensorAcc4R trInput1_acc = trInput1.packed_accessor32(); TensorAcc4R trInput2_acc = trInput2.packed_accessor32(); TensorAcc5R output_acc = output.packed_accessor32(); correlation_forward_cuda_kernel <<>>( trInput1_acc, trInput2_acc, output_acc, kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH, dilation_patchW, dH, dW); })); } void CorrelationBackwardCUDAKernelLauncher( Tensor grad_output, Tensor input1, Tensor input2, Tensor grad_input1, Tensor grad_input2, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW) { const int batch_size = input1.size(0); const int iH = input1.size(2); const int iW = input1.size(3); const int C = input1.size(1); const dim3 blocks(C, iH, iW); const dim3 threads(THREADS_BACKWARD, THREADS_BACKWARD); at::cuda::CUDAGuard device_guard(input1.device()); AT_DISPATCH_FLOATING_TYPES_AND_HALF( input1.scalar_type(), "correlation_backward_cuda", ([&] { TensorAcc4R input1_acc = input1.packed_accessor32(); TensorAcc4R input2_acc = input2.packed_accessor32(); TensorAcc4R grad_input1_acc = grad_input1.packed_accessor32(); TensorAcc4R grad_input2_acc = grad_input2.packed_accessor32(); TensorAcc5R grad_output_acc = grad_output.packed_accessor32(); for (int n = 0; n < batch_size; ++n) { correlation_backward_cuda_kernel_input1 <<>>( grad_output_acc, input2_acc, grad_input1_acc, kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH, dilation_patchW, dH, dW, n); } for (int n = 0; n < batch_size; ++n) { correlation_backward_cuda_kernel_input2 <<>>( grad_output_acc, input1_acc, grad_input2_acc, kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH, dilation_patchW, dH, dW, n); } })); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/cudabind.cpp ================================================ #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void AssignScoreWithKForwardCUDAKernelLauncher( int B, int N0, int N1, int M, int K, int O, int aggregate, const Tensor& points, const Tensor& centers, const Tensor& scores, const Tensor& knn_idx, Tensor& output); void AssignScoreWithKBackwardCUDAKernelLauncher( int B, int N0, int N1, int M, int K, int O, int aggregate, const Tensor& grad_out, const Tensor& points, const Tensor& centers, const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points, Tensor& grad_centers, Tensor& grad_scores); void assign_score_withk_forward_cuda(int B, int N0, int N1, int M, int K, int O, int aggregate, const Tensor& points, const Tensor& centers, const Tensor& scores, const Tensor& knn_idx, Tensor& output) { AssignScoreWithKForwardCUDAKernelLauncher( B, N0, N1, M, K, O, aggregate, points, centers, scores, knn_idx, output); }; void assign_score_withk_backward_cuda( int B, int N0, int N1, int M, int K, int O, int aggregate, const Tensor& grad_out, const Tensor& points, const Tensor& centers, const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points, Tensor& grad_centers, Tensor& grad_scores) { AssignScoreWithKBackwardCUDAKernelLauncher( B, N0, N1, M, K, O, aggregate, grad_out, points, centers, scores, knn_idx, grad_points, grad_centers, grad_scores); }; void assign_score_withk_forward_impl(int B, int N0, int N1, int M, int K, int O, int aggregate, const Tensor& points, const Tensor& centers, const Tensor& scores, const Tensor& knn_idx, Tensor& output); void assign_score_withk_backward_impl( int B, int N0, int N1, int M, int K, int O, int aggregate, const Tensor& grad_out, const Tensor& points, const Tensor& centers, const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points, Tensor& grad_centers, Tensor& grad_scores); REGISTER_DEVICE_IMPL(assign_score_withk_forward_impl, CUDA, assign_score_withk_forward_cuda); REGISTER_DEVICE_IMPL(assign_score_withk_backward_impl, CUDA, assign_score_withk_backward_cuda); void BallQueryForwardCUDAKernelLauncher(int b, int n, int m, float min_radius, float max_radius, int nsample, const Tensor new_xyz, const Tensor xyz, Tensor idx); void ball_query_forward_cuda(int b, int n, int m, float min_radius, float max_radius, int nsample, const Tensor new_xyz, const Tensor xyz, Tensor idx) { BallQueryForwardCUDAKernelLauncher(b, n, m, min_radius, max_radius, nsample, new_xyz, xyz, idx); }; void ball_query_forward_impl(int b, int n, int m, float min_radius, float max_radius, int nsample, const Tensor new_xyz, const Tensor xyz, Tensor idx); REGISTER_DEVICE_IMPL(ball_query_forward_impl, CUDA, ball_query_forward_cuda); void BBoxOverlapsCUDAKernelLauncher(const Tensor bboxes1, const Tensor bboxes2, Tensor ious, const int mode, const bool aligned, const int offset); void bbox_overlaps_cuda(const Tensor bboxes1, const Tensor bboxes2, Tensor ious, const int mode, const bool aligned, const int offset) { BBoxOverlapsCUDAKernelLauncher(bboxes1, bboxes2, ious, mode, aligned, offset); } void bbox_overlaps_impl(const Tensor bboxes1, const Tensor bboxes2, Tensor ious, const int mode, const bool aligned, const int offset); REGISTER_DEVICE_IMPL(bbox_overlaps_impl, CUDA, bbox_overlaps_cuda); void BorderAlignForwardCUDAKernelLauncher(const Tensor& input, const Tensor& boxes, Tensor output, Tensor argmax_idx, const int pool_size); void BorderAlignBackwardCUDAKernelLauncher(const Tensor& grad_output, const Tensor& boxes, const Tensor& argmax_idx, Tensor grad_input, const int pool_size); void border_align_forward_cuda(const Tensor& input, const Tensor& boxes, Tensor output, Tensor argmax_idx, const int pool_size) { BorderAlignForwardCUDAKernelLauncher(input, boxes, output, argmax_idx, pool_size); } void border_align_backward_cuda(const Tensor& grad_output, const Tensor& boxes, const Tensor& argmax_idx, Tensor grad_input, const int pool_size) { BorderAlignBackwardCUDAKernelLauncher(grad_output, boxes, argmax_idx, grad_input, pool_size); } void border_align_forward_impl(const Tensor& input, const Tensor& boxes, Tensor output, Tensor argmax_idx, const int pool_size); void border_align_backward_impl(const Tensor& grad_output, const Tensor& boxes, const Tensor& argmax_idx, Tensor grad_input, const int pool_size); REGISTER_DEVICE_IMPL(border_align_forward_impl, CUDA, border_align_forward_cuda); REGISTER_DEVICE_IMPL(border_align_backward_impl, CUDA, border_align_backward_cuda); void box_iou_rotated_cuda(const Tensor boxes1, const Tensor boxes2, Tensor ious, const int mode_flag, const bool aligned); void box_iou_rotated_impl(const Tensor boxes1, const Tensor boxes2, Tensor ious, const int mode_flag, const bool aligned); REGISTER_DEVICE_IMPL(box_iou_rotated_impl, CUDA, box_iou_rotated_cuda); void CARAFEForwardCUDAKernelLauncher(const Tensor features, const Tensor masks, Tensor rfeatures, Tensor routput, Tensor rmasks, Tensor output, const int kernel_size, const int group_size, const int scale_factor); void CARAFEBackwardCUDAKernelLauncher( const Tensor top_grad, const Tensor rfeatures, const Tensor masks, Tensor rtop_grad, Tensor rbottom_grad_hs, Tensor rbottom_grad, Tensor rmask_grad, Tensor bottom_grad, Tensor mask_grad, const int kernel_size, const int group_size, const int scale_factor); void carafe_forward_cuda(Tensor features, Tensor masks, Tensor rfeatures, Tensor routput, Tensor rmasks, Tensor output, int kernel_size, int group_size, int scale_factor) { CARAFEForwardCUDAKernelLauncher(features, masks, rfeatures, routput, rmasks, output, kernel_size, group_size, scale_factor); } void carafe_backward_cuda(Tensor top_grad, Tensor rfeatures, Tensor masks, Tensor rtop_grad, Tensor rbottom_grad_hs, Tensor rbottom_grad, Tensor rmask_grad, Tensor bottom_grad, Tensor mask_grad, int kernel_size, int group_size, int scale_factor) { CARAFEBackwardCUDAKernelLauncher(top_grad, rfeatures, masks, rtop_grad, rbottom_grad_hs, rbottom_grad, rmask_grad, bottom_grad, mask_grad, kernel_size, group_size, scale_factor); } void carafe_forward_impl(Tensor features, Tensor masks, Tensor rfeatures, Tensor routput, Tensor rmasks, Tensor output, int kernel_size, int group_size, int scale_factor); void carafe_backward_impl(Tensor top_grad, Tensor rfeatures, Tensor masks, Tensor rtop_grad, Tensor rbottom_grad_hs, Tensor rbottom_grad, Tensor rmask_grad, Tensor bottom_grad, Tensor mask_grad, int kernel_size, int group_size, int scale_factor); REGISTER_DEVICE_IMPL(carafe_forward_impl, CUDA, carafe_forward_cuda); REGISTER_DEVICE_IMPL(carafe_backward_impl, CUDA, carafe_backward_cuda); void CARAFENAIVEForwardCUDAKernelLauncher(const Tensor features, const Tensor masks, Tensor output, const int kernel_size, const int group_size, const int scale_factor); void CARAFENAIVEBackwardCUDAKernelLauncher( const Tensor top_grad, const Tensor features, const Tensor masks, Tensor bottom_grad, Tensor mask_grad, const int kernel_size, const int group_size, const int scale_factor); void carafe_naive_forward_cuda(Tensor features, Tensor masks, Tensor output, int kernel_size, int group_size, int scale_factor) { CARAFENAIVEForwardCUDAKernelLauncher(features, masks, output, kernel_size, group_size, scale_factor); } void carafe_naive_backward_cuda(Tensor top_grad, Tensor features, Tensor masks, Tensor bottom_grad, Tensor mask_grad, int kernel_size, int group_size, int scale_factor) { CARAFENAIVEBackwardCUDAKernelLauncher(top_grad, features, masks, bottom_grad, mask_grad, kernel_size, group_size, scale_factor); } void carafe_naive_forward_impl(Tensor features, Tensor masks, Tensor output, int kernel_size, int group_size, int scale_factor); void carafe_naive_backward_impl(Tensor top_grad, Tensor features, Tensor masks, Tensor bottom_grad, Tensor mask_grad, int kernel_size, int group_size, int scale_factor); REGISTER_DEVICE_IMPL(carafe_naive_forward_impl, CUDA, carafe_naive_forward_cuda); REGISTER_DEVICE_IMPL(carafe_naive_backward_impl, CUDA, carafe_naive_backward_cuda); void CorrelationForwardCUDAKernelLauncher(Tensor input1, Tensor input2, Tensor output, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW); void CorrelationBackwardCUDAKernelLauncher(Tensor grad_output, Tensor input1, Tensor input2, Tensor grad_input1, Tensor grad_input2, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW); void correlation_forward_cuda(Tensor input1, Tensor input2, Tensor output, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW) { CorrelationForwardCUDAKernelLauncher( input1, input2, output, kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH, dilation_patchW, dH, dW); } void correlation_backward_cuda(Tensor grad_output, Tensor input1, Tensor input2, Tensor grad_input1, Tensor grad_input2, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW) { CorrelationBackwardCUDAKernelLauncher( grad_output, input1, input2, grad_input1, grad_input2, kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH, dilation_patchW, dH, dW); } void correlation_forward_impl(Tensor input1, Tensor input2, Tensor output, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW); void correlation_backward_impl(Tensor grad_output, Tensor input1, Tensor input2, Tensor grad_input1, Tensor grad_input2, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW); REGISTER_DEVICE_IMPL(correlation_forward_impl, CUDA, correlation_forward_cuda); REGISTER_DEVICE_IMPL(correlation_backward_impl, CUDA, correlation_backward_cuda); void deformable_im2col_cuda(Tensor data_im, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor data_col); void deformable_col2im_cuda(Tensor data_col, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor grad_im); void deformable_col2im_coord_cuda( Tensor data_col, Tensor data_im, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor grad_offset); void deformable_im2col_impl(Tensor data_im, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor data_col); void deformable_col2im_impl(Tensor data_col, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor grad_im); void deformable_col2im_coord_impl( Tensor data_col, Tensor data_im, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor grad_offset); REGISTER_DEVICE_IMPL(deformable_im2col_impl, CUDA, deformable_im2col_cuda); REGISTER_DEVICE_IMPL(deformable_col2im_impl, CUDA, deformable_col2im_cuda); REGISTER_DEVICE_IMPL(deformable_col2im_coord_impl, CUDA, deformable_col2im_coord_cuda); void DeformRoIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor offset, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma); void DeformRoIPoolBackwardCUDAKernelLauncher( Tensor grad_output, Tensor input, Tensor rois, Tensor offset, Tensor grad_input, Tensor grad_offset, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma); void deform_roi_pool_forward_cuda(Tensor input, Tensor rois, Tensor offset, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma) { DeformRoIPoolForwardCUDAKernelLauncher(input, rois, offset, output, pooled_height, pooled_width, spatial_scale, sampling_ratio, gamma); } void deform_roi_pool_backward_cuda(Tensor grad_output, Tensor input, Tensor rois, Tensor offset, Tensor grad_input, Tensor grad_offset, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma) { DeformRoIPoolBackwardCUDAKernelLauncher( grad_output, input, rois, offset, grad_input, grad_offset, pooled_height, pooled_width, spatial_scale, sampling_ratio, gamma); } void deform_roi_pool_forward_impl(Tensor input, Tensor rois, Tensor offset, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma); void deform_roi_pool_backward_impl(Tensor grad_output, Tensor input, Tensor rois, Tensor offset, Tensor grad_input, Tensor grad_offset, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma); REGISTER_DEVICE_IMPL(deform_roi_pool_forward_impl, CUDA, deform_roi_pool_forward_cuda); REGISTER_DEVICE_IMPL(deform_roi_pool_backward_impl, CUDA, deform_roi_pool_backward_cuda); void SigmoidFocalLossForwardCUDAKernelLauncher(Tensor input, Tensor target, Tensor weight, Tensor output, const float gamma, const float alpha); void SigmoidFocalLossBackwardCUDAKernelLauncher(Tensor input, Tensor target, Tensor weight, Tensor grad_input, const float gamma, const float alpha); void SoftmaxFocalLossForwardCUDAKernelLauncher(Tensor softmax, Tensor target, Tensor weight, Tensor output, const float gamma, const float alpha); void SoftmaxFocalLossBackwardCUDAKernelLauncher(Tensor softmax, Tensor target, Tensor weight, Tensor buff, Tensor grad_input, const float gamma, const float alpha); void sigmoid_focal_loss_forward_cuda(Tensor input, Tensor target, Tensor weight, Tensor output, float gamma, float alpha) { SigmoidFocalLossForwardCUDAKernelLauncher(input, target, weight, output, gamma, alpha); } void sigmoid_focal_loss_backward_cuda(Tensor input, Tensor target, Tensor weight, Tensor grad_input, float gamma, float alpha) { SigmoidFocalLossBackwardCUDAKernelLauncher(input, target, weight, grad_input, gamma, alpha); } void softmax_focal_loss_forward_cuda(Tensor input, Tensor target, Tensor weight, Tensor output, float gamma, float alpha) { SoftmaxFocalLossForwardCUDAKernelLauncher(input, target, weight, output, gamma, alpha); } void softmax_focal_loss_backward_cuda(Tensor input, Tensor target, Tensor weight, Tensor buff, Tensor grad_input, float gamma, float alpha) { SoftmaxFocalLossBackwardCUDAKernelLauncher(input, target, weight, buff, grad_input, gamma, alpha); } void sigmoid_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight, Tensor output, float gamma, float alpha); void sigmoid_focal_loss_backward_impl(Tensor input, Tensor target, Tensor weight, Tensor grad_input, float gamma, float alpha); void softmax_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight, Tensor output, float gamma, float alpha); void softmax_focal_loss_backward_impl(Tensor input, Tensor target, Tensor weight, Tensor buff, Tensor grad_input, float gamma, float alpha); REGISTER_DEVICE_IMPL(sigmoid_focal_loss_forward_impl, CUDA, sigmoid_focal_loss_forward_cuda); REGISTER_DEVICE_IMPL(sigmoid_focal_loss_backward_impl, CUDA, sigmoid_focal_loss_backward_cuda); REGISTER_DEVICE_IMPL(softmax_focal_loss_forward_impl, CUDA, softmax_focal_loss_forward_cuda); REGISTER_DEVICE_IMPL(softmax_focal_loss_backward_impl, CUDA, softmax_focal_loss_backward_cuda); void FurthestPointSamplingForwardCUDAKernelLauncher(int b, int n, int m, const float* dataset, float* temp, int* idxs); void FurthestPointSamplingWithDistForwardCUDAKernelLauncher( int b, int n, int m, const float* dataset, float* temp, int* idxs); void furthest_point_sampling_forward_cuda(Tensor points_tensor, Tensor temp_tensor, Tensor idx_tensor, int b, int n, int m) { const float* dataset = points_tensor.data_ptr(); float* temp = temp_tensor.data_ptr(); int* idxs = idx_tensor.data_ptr(); FurthestPointSamplingForwardCUDAKernelLauncher(b, n, m, dataset, temp, idxs); } void furthest_point_sampling_with_dist_forward_cuda(Tensor points_tensor, Tensor temp_tensor, Tensor idx_tensor, int b, int n, int m) { const float* dataset = points_tensor.data_ptr(); float* temp = temp_tensor.data_ptr(); int* idxs = idx_tensor.data_ptr(); FurthestPointSamplingWithDistForwardCUDAKernelLauncher(b, n, m, dataset, temp, idxs); } void furthest_point_sampling_forward_impl(Tensor points_tensor, Tensor temp_tensor, Tensor idx_tensor, int b, int n, int m); void furthest_point_sampling_with_dist_forward_impl(Tensor points_tensor, Tensor temp_tensor, Tensor idx_tensor, int b, int n, int m); REGISTER_DEVICE_IMPL(furthest_point_sampling_forward_impl, CUDA, furthest_point_sampling_forward_cuda); REGISTER_DEVICE_IMPL(furthest_point_sampling_with_dist_forward_impl, CUDA, furthest_point_sampling_with_dist_forward_cuda); torch::Tensor fused_bias_leakyrelu_op(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer, int act, int grad, float alpha, float scale); torch::Tensor fused_bias_leakyrelu_op_impl(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer, int act, int grad, float alpha, float scale); REGISTER_DEVICE_IMPL(fused_bias_leakyrelu_op_impl, CUDA, fused_bias_leakyrelu_op); void GatherPointsForwardCUDAKernelLauncher(int b, int c, int n, int npoints, const Tensor points, const Tensor idx, Tensor out); void GatherPointsBackwardCUDAKernelLauncher(int b, int c, int n, int npoints, const Tensor grad_out, const Tensor idx, Tensor grad_points); void gather_points_forward_cuda(int b, int c, int n, int npoints, const Tensor points, const Tensor idx, Tensor out) { GatherPointsForwardCUDAKernelLauncher(b, c, n, npoints, points, idx, out); }; void gather_points_backward_cuda(int b, int c, int n, int npoints, const Tensor grad_out, const Tensor idx, Tensor grad_points) { GatherPointsBackwardCUDAKernelLauncher(b, c, n, npoints, grad_out, idx, grad_points); }; void gather_points_forward_impl(int b, int c, int n, int npoints, const Tensor points, const Tensor idx, Tensor out); void gather_points_backward_impl(int b, int c, int n, int npoints, const Tensor grad_out, const Tensor idx, Tensor grad_points); REGISTER_DEVICE_IMPL(gather_points_forward_impl, CUDA, gather_points_forward_cuda); REGISTER_DEVICE_IMPL(gather_points_backward_impl, CUDA, gather_points_backward_cuda); void GroupPointsForwardCUDAKernelLauncher(int b, int c, int n, int npoints, int nsample, const Tensor points, const Tensor idx, Tensor out); void GroupPointsBackwardCUDAKernelLauncher(int b, int c, int n, int npoints, int nsample, const Tensor grad_out, const Tensor idx, Tensor grad_points); void group_points_forward_cuda(int b, int c, int n, int npoints, int nsample, const Tensor points, const Tensor idx, Tensor out) { GroupPointsForwardCUDAKernelLauncher(b, c, n, npoints, nsample, points, idx, out); }; void group_points_backward_cuda(int b, int c, int n, int npoints, int nsample, const Tensor grad_out, const Tensor idx, Tensor grad_points) { GroupPointsBackwardCUDAKernelLauncher(b, c, n, npoints, nsample, grad_out, idx, grad_points); }; void group_points_forward_impl(int b, int c, int n, int npoints, int nsample, const Tensor points, const Tensor idx, Tensor out); void group_points_backward_impl(int b, int c, int n, int npoints, int nsample, const Tensor grad_out, const Tensor idx, Tensor grad_points); REGISTER_DEVICE_IMPL(group_points_forward_impl, CUDA, group_points_forward_cuda); REGISTER_DEVICE_IMPL(group_points_backward_impl, CUDA, group_points_backward_cuda); void IoU3DBoxesOverlapBevForwardCUDAKernelLauncher(const int num_a, const Tensor boxes_a, const int num_b, const Tensor boxes_b, Tensor ans_overlap); void IoU3DBoxesIoUBevForwardCUDAKernelLauncher(const int num_a, const Tensor boxes_a, const int num_b, const Tensor boxes_b, Tensor ans_iou); void IoU3DNMSForwardCUDAKernelLauncher(const Tensor boxes, unsigned long long* mask, int boxes_num, float nms_overlap_thresh); void IoU3DNMSNormalForwardCUDAKernelLauncher(const Tensor boxes, unsigned long long* mask, int boxes_num, float nms_overlap_thresh); void iou3d_boxes_overlap_bev_forward_cuda(const int num_a, const Tensor boxes_a, const int num_b, const Tensor boxes_b, Tensor ans_overlap) { IoU3DBoxesOverlapBevForwardCUDAKernelLauncher(num_a, boxes_a, num_b, boxes_b, ans_overlap); }; void iou3d_boxes_iou_bev_forward_cuda(const int num_a, const Tensor boxes_a, const int num_b, const Tensor boxes_b, Tensor ans_iou) { IoU3DBoxesIoUBevForwardCUDAKernelLauncher(num_a, boxes_a, num_b, boxes_b, ans_iou); }; void iou3d_nms_forward_cuda(const Tensor boxes, unsigned long long* mask, int boxes_num, float nms_overlap_thresh) { IoU3DNMSForwardCUDAKernelLauncher(boxes, mask, boxes_num, nms_overlap_thresh); }; void iou3d_nms_normal_forward_cuda(const Tensor boxes, unsigned long long* mask, int boxes_num, float nms_overlap_thresh) { IoU3DNMSNormalForwardCUDAKernelLauncher(boxes, mask, boxes_num, nms_overlap_thresh); }; void iou3d_boxes_overlap_bev_forward_impl(const int num_a, const Tensor boxes_a, const int num_b, const Tensor boxes_b, Tensor ans_overlap); void iou3d_boxes_iou_bev_forward_impl(const int num_a, const Tensor boxes_a, const int num_b, const Tensor boxes_b, Tensor ans_iou); void iou3d_nms_forward_impl(const Tensor boxes, unsigned long long* mask, int boxes_num, float nms_overlap_thresh); void iou3d_nms_normal_forward_impl(const Tensor boxes, unsigned long long* mask, int boxes_num, float nms_overlap_thresh); REGISTER_DEVICE_IMPL(iou3d_boxes_overlap_bev_forward_impl, CUDA, iou3d_boxes_overlap_bev_forward_cuda); REGISTER_DEVICE_IMPL(iou3d_boxes_iou_bev_forward_impl, CUDA, iou3d_boxes_iou_bev_forward_cuda); REGISTER_DEVICE_IMPL(iou3d_nms_forward_impl, CUDA, iou3d_nms_forward_cuda); REGISTER_DEVICE_IMPL(iou3d_nms_normal_forward_impl, CUDA, iou3d_nms_normal_forward_cuda); void KNNForwardCUDAKernelLauncher(int b, int n, int m, int nsample, const Tensor xyz, const Tensor new_xyz, Tensor idx, Tensor dist2); void knn_forward_cuda(int b, int n, int m, int nsample, const Tensor xyz, const Tensor new_xyz, Tensor idx, Tensor dist2) { KNNForwardCUDAKernelLauncher(b, n, m, nsample, xyz, new_xyz, idx, dist2); } void knn_forward_impl(int b, int n, int m, int nsample, const Tensor xyz, const Tensor new_xyz, Tensor idx, Tensor dist2); REGISTER_DEVICE_IMPL(knn_forward_impl, CUDA, knn_forward_cuda); void MaskedIm2colForwardCUDAKernelLauncher(const Tensor bottom_data, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor top_data, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w); void MaskedCol2imForwardCUDAKernelLauncher(const Tensor bottom_data, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor top_data, const int height, const int width, const int channels); void masked_im2col_forward_cuda(const Tensor im, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w) { // im: (n, ic, h, w), kernel size (kh, kw) // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh) MaskedIm2colForwardCUDAKernelLauncher(im, mask_h_idx, mask_w_idx, col, kernel_h, kernel_w, pad_h, pad_w); } void masked_col2im_forward_cuda(const Tensor col, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor im, int height, int width, int channels) { // im: (n, ic, h, w), kernel size (kh, kw) // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh) MaskedCol2imForwardCUDAKernelLauncher(col, mask_h_idx, mask_w_idx, im, height, width, channels); } void masked_im2col_forward_impl(const Tensor im, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w); void masked_col2im_forward_impl(const Tensor col, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor im, int height, int width, int channels); REGISTER_DEVICE_IMPL(masked_im2col_forward_impl, CUDA, masked_im2col_forward_cuda); REGISTER_DEVICE_IMPL(masked_col2im_forward_impl, CUDA, masked_col2im_forward_cuda); void modulated_deformable_im2col_cuda( const Tensor data_im, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor data_col); void modulated_deformable_col2im_cuda( const Tensor data_col, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor grad_im); void modulated_deformable_col2im_coord_cuda( const Tensor data_col, const Tensor data_im, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor grad_offset, Tensor grad_mask); void modulated_deformable_im2col_impl( const Tensor data_im, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor data_col); void modulated_deformable_col2im_impl( const Tensor data_col, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor grad_im); void modulated_deformable_col2im_coord_impl( const Tensor data_col, const Tensor data_im, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor grad_offset, Tensor grad_mask); REGISTER_DEVICE_IMPL(modulated_deformable_im2col_impl, CUDA, modulated_deformable_im2col_cuda); REGISTER_DEVICE_IMPL(modulated_deformable_col2im_impl, CUDA, modulated_deformable_col2im_cuda); REGISTER_DEVICE_IMPL(modulated_deformable_col2im_coord_impl, CUDA, modulated_deformable_col2im_coord_cuda); Tensor ms_deform_attn_cuda_forward(const Tensor& value, const Tensor& spatial_shapes, const Tensor& level_start_index, const Tensor& sampling_loc, const Tensor& attn_weight, const int im2col_step); void ms_deform_attn_cuda_backward( const Tensor& value, const Tensor& spatial_shapes, const Tensor& level_start_index, const Tensor& sampling_loc, const Tensor& attn_weight, const Tensor& grad_output, Tensor& grad_value, Tensor& grad_sampling_loc, Tensor& grad_attn_weight, const int im2col_step); Tensor ms_deform_attn_impl_forward(const Tensor& value, const Tensor& spatial_shapes, const Tensor& level_start_index, const Tensor& sampling_loc, const Tensor& attn_weight, const int im2col_step); void ms_deform_attn_impl_backward( const Tensor& value, const Tensor& spatial_shapes, const Tensor& level_start_index, const Tensor& sampling_loc, const Tensor& attn_weight, const Tensor& grad_output, Tensor& grad_value, Tensor& grad_sampling_loc, Tensor& grad_attn_weight, const int im2col_step); REGISTER_DEVICE_IMPL(ms_deform_attn_impl_forward, CUDA, ms_deform_attn_cuda_forward); REGISTER_DEVICE_IMPL(ms_deform_attn_impl_backward, CUDA, ms_deform_attn_cuda_backward); Tensor NMSCUDAKernelLauncher(Tensor boxes, Tensor scores, float iou_threshold, int offset); Tensor nms_cuda(Tensor boxes, Tensor scores, float iou_threshold, int offset) { return NMSCUDAKernelLauncher(boxes, scores, iou_threshold, offset); } Tensor nms_impl(Tensor boxes, Tensor scores, float iou_threshold, int offset); REGISTER_DEVICE_IMPL(nms_impl, CUDA, nms_cuda); void PointsInBoxesPartForwardCUDAKernelLauncher(int batch_size, int boxes_num, int pts_num, const Tensor boxes, const Tensor pts, Tensor box_idx_of_points); void PointsInBoxesAllForwardCUDAKernelLauncher(int batch_size, int boxes_num, int pts_num, const Tensor boxes, const Tensor pts, Tensor box_idx_of_points); void points_in_boxes_part_forward_cuda(int batch_size, int boxes_num, int pts_num, const Tensor boxes, const Tensor pts, Tensor box_idx_of_points) { PointsInBoxesPartForwardCUDAKernelLauncher(batch_size, boxes_num, pts_num, boxes, pts, box_idx_of_points); }; void points_in_boxes_all_forward_cuda(int batch_size, int boxes_num, int pts_num, const Tensor boxes, const Tensor pts, Tensor box_idx_of_points) { PointsInBoxesAllForwardCUDAKernelLauncher(batch_size, boxes_num, pts_num, boxes, pts, box_idx_of_points); }; void points_in_boxes_part_forward_impl(int batch_size, int boxes_num, int pts_num, const Tensor boxes, const Tensor pts, Tensor box_idx_of_points); void points_in_boxes_all_forward_impl(int batch_size, int boxes_num, int pts_num, const Tensor boxes, const Tensor pts, Tensor box_idx_of_points); REGISTER_DEVICE_IMPL(points_in_boxes_part_forward_impl, CUDA, points_in_boxes_part_forward_cuda); REGISTER_DEVICE_IMPL(points_in_boxes_all_forward_impl, CUDA, points_in_boxes_all_forward_cuda); void PSAMaskForwardCUDAKernelLauncher(const int psa_type, const Tensor input, Tensor output, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask); void PSAMaskBackwardCUDAKernelLauncher( const int psa_type, const Tensor grad_output, Tensor grad_input, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask); void psamask_forward_cuda(const int psa_type, const Tensor input, Tensor output, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask) { PSAMaskForwardCUDAKernelLauncher(psa_type, input, output, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask); } void psamask_backward_cuda(const int psa_type, const Tensor grad_output, Tensor grad_input, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask) { PSAMaskBackwardCUDAKernelLauncher(psa_type, grad_output, grad_input, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask); } void psamask_forward_impl(const int psa_type, const Tensor input, Tensor output, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask); void psamask_backward_impl(const int psa_type, const Tensor grad_output, Tensor grad_input, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask); REGISTER_DEVICE_IMPL(psamask_forward_impl, CUDA, psamask_forward_cuda); REGISTER_DEVICE_IMPL(psamask_backward_impl, CUDA, psamask_backward_cuda); void ROIAlignForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned); void ROIAlignBackwardCUDAKernelLauncher(Tensor grad_output, Tensor rois, Tensor argmax_y, Tensor argmax_x, Tensor grad_input, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned); void roi_align_forward_cuda(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned) { ROIAlignForwardCUDAKernelLauncher( input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); } void roi_align_backward_cuda(Tensor grad_output, Tensor rois, Tensor argmax_y, Tensor argmax_x, Tensor grad_input, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned) { ROIAlignBackwardCUDAKernelLauncher( grad_output, rois, argmax_y, argmax_x, grad_input, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); } void roi_align_forward_impl(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned); void roi_align_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax_y, Tensor argmax_x, Tensor grad_input, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned); REGISTER_DEVICE_IMPL(roi_align_forward_impl, CUDA, roi_align_forward_cuda); REGISTER_DEVICE_IMPL(roi_align_backward_impl, CUDA, roi_align_backward_cuda); void ROIAlignRotatedForwardCUDAKernelLauncher( const at::Tensor features, const at::Tensor rois, const float spatial_scale, const int sample_num, const bool aligned, const bool clockwise, const int channels, const int height, const int width, const int num_rois, const int pooled_height, const int pooled_width, at::Tensor output); void ROIAlignRotatedBackwardCUDAKernelLauncher( const at::Tensor top_grad, const at::Tensor rois, const float spatial_scale, const int sample_num, const bool aligned, const bool clockwise, const int channels, const int height, const int width, const int num_rois, const int pooled_height, const int pooled_width, at::Tensor bottom_grad); void roi_align_rotated_forward_cuda(Tensor features, Tensor rois, Tensor output, int aligned_height, int aligned_width, float spatial_scale, int sample_ratio, bool aligned, bool clockwise) { // Number of ROIs int num_rois = rois.size(0); int size_rois = rois.size(1); if (size_rois != 6) { AT_ERROR("wrong roi size"); } int num_channels = features.size(1); int data_height = features.size(2); int data_width = features.size(3); ROIAlignRotatedForwardCUDAKernelLauncher( features, rois, spatial_scale, sample_ratio, aligned, clockwise, num_channels, data_height, data_width, num_rois, aligned_height, aligned_width, output); } void roi_align_rotated_backward_cuda(Tensor top_grad, Tensor rois, Tensor bottom_grad, int aligned_height, int aligned_width, float spatial_scale, int sample_ratio, bool aligned, bool clockwise) { // Number of ROIs int num_rois = rois.size(0); int size_rois = rois.size(1); if (size_rois != 6) { AT_ERROR("wrong roi size"); } int num_channels = bottom_grad.size(1); int data_height = bottom_grad.size(2); int data_width = bottom_grad.size(3); ROIAlignRotatedBackwardCUDAKernelLauncher( top_grad, rois, spatial_scale, sample_ratio, aligned, clockwise, num_channels, data_height, data_width, num_rois, aligned_height, aligned_width, bottom_grad); } void roi_align_rotated_forward_impl(Tensor features, Tensor rois, Tensor output, int aligned_height, int aligned_width, float spatial_scale, int sample_ratio, bool aligned, bool clockwise); void roi_align_rotated_backward_impl(Tensor top_grad, Tensor rois, Tensor bottom_grad, int aligned_height, int aligned_width, float spatial_scale, int sample_ratio, bool aligned, bool clockwise); REGISTER_DEVICE_IMPL(roi_align_rotated_forward_impl, CUDA, roi_align_rotated_forward_cuda); REGISTER_DEVICE_IMPL(roi_align_rotated_backward_impl, CUDA, roi_align_rotated_backward_cuda); void RiROIAlignRotatedForwardCUDAKernelLauncher( const at::Tensor features, const at::Tensor rois, const float spatial_scale, const int num_samples, const bool clockwise, const int channels, const int height, const int width, const int num_rois, const int pooled_height, const int pooled_width, const int num_orientations, at::Tensor output); void RiROIAlignRotatedBackwardCUDAKernelLauncher( const at::Tensor top_grad, const at::Tensor rois, const float spatial_scale, const int num_samples, const bool clockwise, const int channels, const int height, const int width, const int num_rois, const int pooled_height, const int pooled_width, const int num_orientations, at::Tensor bottom_grad); void riroi_align_rotated_forward_cuda(Tensor features, Tensor rois, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int num_samples, int num_orientations, bool clockwise) { // Number of ROIs int num_rois = rois.size(0); int size_rois = rois.size(1); if (size_rois != 6) { AT_ERROR("wrong roi size"); } CHECK_CONTIGUOUS(features); CHECK_CONTIGUOUS(rois); int num_channels = features.size(1) / num_orientations; int data_height = features.size(2); int data_width = features.size(3); RiROIAlignRotatedForwardCUDAKernelLauncher( features, rois, spatial_scale, num_samples, clockwise, num_channels, data_height, data_width, num_rois, pooled_height, pooled_width, num_orientations, output); } void riroi_align_rotated_backward_cuda(Tensor top_grad, Tensor rois, Tensor bottom_grad, int pooled_height, int pooled_width, float spatial_scale, int num_samples, int num_orientations, bool clockwise) { // Number of ROIs int num_rois = rois.size(0); int size_rois = rois.size(1); if (size_rois != 6) { AT_ERROR("wrong roi size"); } CHECK_CONTIGUOUS(top_grad); CHECK_CONTIGUOUS(rois); int num_channels = bottom_grad.size(1) / num_orientations; int data_height = bottom_grad.size(2); int data_width = bottom_grad.size(3); RiROIAlignRotatedBackwardCUDAKernelLauncher( top_grad, rois, spatial_scale, num_samples, clockwise, num_channels, data_height, data_width, num_rois, pooled_height, pooled_width, num_orientations, bottom_grad); } void riroi_align_rotated_forward_impl(Tensor features, Tensor rois, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int num_samples, int num_orientations, bool clockwise); void riroi_align_rotated_backward_impl(Tensor top_grad, Tensor rois, Tensor bottom_grad, int pooled_height, int pooled_width, float spatial_scale, int num_samples, int num_orientations, bool clockwise); REGISTER_DEVICE_IMPL(riroi_align_rotated_forward_impl, CUDA, riroi_align_rotated_forward_cuda); REGISTER_DEVICE_IMPL(riroi_align_rotated_backward_impl, CUDA, riroi_align_rotated_backward_cuda); void RoiawarePool3dForwardCUDAKernelLauncher( int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int out_y, int out_z, const Tensor rois, const Tensor pts, const Tensor pts_feature, Tensor argmax, Tensor pts_idx_of_voxels, Tensor pooled_features, int pool_method); void RoiawarePool3dBackwardCUDAKernelLauncher( int boxes_num, int out_x, int out_y, int out_z, int channels, int max_pts_each_voxel, const Tensor pts_idx_of_voxels, const Tensor argmax, const Tensor grad_out, Tensor grad_in, int pool_method); void roiaware_pool3d_forward_cuda(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int out_y, int out_z, const Tensor rois, const Tensor pts, const Tensor pts_feature, Tensor argmax, Tensor pts_idx_of_voxels, Tensor pooled_features, int pool_method) { RoiawarePool3dForwardCUDAKernelLauncher( boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z, rois, pts, pts_feature, argmax, pts_idx_of_voxels, pooled_features, pool_method); }; void roiaware_pool3d_backward_cuda(int boxes_num, int out_x, int out_y, int out_z, int channels, int max_pts_each_voxel, const Tensor pts_idx_of_voxels, const Tensor argmax, const Tensor grad_out, Tensor grad_in, int pool_method) { RoiawarePool3dBackwardCUDAKernelLauncher( boxes_num, out_x, out_y, out_z, channels, max_pts_each_voxel, pts_idx_of_voxels, argmax, grad_out, grad_in, pool_method); }; void roiaware_pool3d_forward_impl(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int out_y, int out_z, const Tensor rois, const Tensor pts, const Tensor pts_feature, Tensor argmax, Tensor pts_idx_of_voxels, Tensor pooled_features, int pool_method); void roiaware_pool3d_backward_impl(int boxes_num, int out_x, int out_y, int out_z, int channels, int max_pts_each_voxel, const Tensor pts_idx_of_voxels, const Tensor argmax, const Tensor grad_out, Tensor grad_in, int pool_method); REGISTER_DEVICE_IMPL(roiaware_pool3d_forward_impl, CUDA, roiaware_pool3d_forward_cuda); REGISTER_DEVICE_IMPL(roiaware_pool3d_backward_impl, CUDA, roiaware_pool3d_backward_cuda); void RoIPointPool3dForwardCUDAKernelLauncher( int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, const Tensor xyz, const Tensor boxes3d, const Tensor pts_feature, Tensor pooled_features, Tensor pooled_empty_flag); void roipoint_pool3d_forward_cuda(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, const Tensor xyz, const Tensor boxes3d, const Tensor pts_feature, Tensor pooled_features, Tensor pooled_empty_flag) { RoIPointPool3dForwardCUDAKernelLauncher( batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, xyz, boxes3d, pts_feature, pooled_features, pooled_empty_flag); }; void roipoint_pool3d_forward_impl(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, const Tensor xyz, const Tensor boxes3d, const Tensor pts_feature, Tensor pooled_features, Tensor pooled_empty_flag); REGISTER_DEVICE_IMPL(roipoint_pool3d_forward_impl, CUDA, roipoint_pool3d_forward_cuda); void ROIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output, Tensor argmax, int pooled_height, int pooled_width, float spatial_scale); void ROIPoolBackwardCUDAKernelLauncher(Tensor grad_output, Tensor rois, Tensor argmax, Tensor grad_input, int pooled_height, int pooled_width, float spatial_scale); void roi_pool_forward_cuda(Tensor input, Tensor rois, Tensor output, Tensor argmax, int pooled_height, int pooled_width, float spatial_scale) { ROIPoolForwardCUDAKernelLauncher(input, rois, output, argmax, pooled_height, pooled_width, spatial_scale); } void roi_pool_backward_cuda(Tensor grad_output, Tensor rois, Tensor argmax, Tensor grad_input, int pooled_height, int pooled_width, float spatial_scale) { ROIPoolBackwardCUDAKernelLauncher(grad_output, rois, argmax, grad_input, pooled_height, pooled_width, spatial_scale); } void roi_pool_forward_impl(Tensor input, Tensor rois, Tensor output, Tensor argmax, int pooled_height, int pooled_width, float spatial_scale); void roi_pool_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax, Tensor grad_input, int pooled_height, int pooled_width, float spatial_scale); REGISTER_DEVICE_IMPL(roi_pool_forward_impl, CUDA, roi_pool_forward_cuda); REGISTER_DEVICE_IMPL(roi_pool_backward_impl, CUDA, roi_pool_backward_cuda); typedef enum { SUM = 0, MEAN = 1, MAX = 2 } reduce_t; std::vector DynamicPointToVoxelForwardCUDAKernelLauncher( const at::Tensor& feats, const at::Tensor& coors, const reduce_t reduce_type); void DynamicPointToVoxelBackwardCUDAKernelLauncher( at::Tensor& grad_feats, const at::Tensor& grad_reduced_feats, const at::Tensor& feats, const at::Tensor& reduced_feats, const at::Tensor& coors_map, const at::Tensor& reduce_count, const reduce_t reduce_type); std::vector dynamic_point_to_voxel_forward_cuda( const torch::Tensor& feats, const torch::Tensor& coors, const reduce_t reduce_type) { return DynamicPointToVoxelForwardCUDAKernelLauncher(feats, coors, reduce_type); }; void dynamic_point_to_voxel_backward_cuda( torch::Tensor& grad_feats, const torch::Tensor& grad_reduced_feats, const torch::Tensor& feats, const torch::Tensor& reduced_feats, const torch::Tensor& coors_idx, const torch::Tensor& reduce_count, const reduce_t reduce_type) { DynamicPointToVoxelBackwardCUDAKernelLauncher(grad_feats, grad_reduced_feats, feats, reduced_feats, coors_idx, reduce_count, reduce_type); }; std::vector dynamic_point_to_voxel_forward_impl( const torch::Tensor& feats, const torch::Tensor& coors, const reduce_t reduce_type); void dynamic_point_to_voxel_backward_impl( torch::Tensor& grad_feats, const torch::Tensor& grad_reduced_feats, const torch::Tensor& feats, const torch::Tensor& reduced_feats, const torch::Tensor& coors_idx, const torch::Tensor& reduce_count, const reduce_t reduce_type); REGISTER_DEVICE_IMPL(dynamic_point_to_voxel_forward_impl, CUDA, dynamic_point_to_voxel_forward_cuda); REGISTER_DEVICE_IMPL(dynamic_point_to_voxel_backward_impl, CUDA, dynamic_point_to_voxel_backward_cuda); void SyncBNForwardMeanCUDAKernelLauncher(const Tensor input, Tensor mean); void SyncBNForwardVarCUDAKernelLauncher(const Tensor input, const Tensor mean, Tensor var); void SyncBNForwardOutputCUDAKernelLauncher( const Tensor input, const Tensor mean, const Tensor var, Tensor running_mean, Tensor running_var, const Tensor weight, const Tensor bias, Tensor norm, Tensor std, Tensor output, float eps, float momentum, int group_size); void SyncBNBackwardParamCUDAKernelLauncher(const Tensor grad_output, const Tensor norm, Tensor grad_weight, Tensor grad_bias); void SyncBNBackwardDataCUDAKernelLauncher(const Tensor grad_output, const Tensor weight, const Tensor grad_weight, const Tensor grad_bias, const Tensor norm, const Tensor std, Tensor grad_input); void sync_bn_forward_mean_cuda(const Tensor input, Tensor mean) { SyncBNForwardMeanCUDAKernelLauncher(input, mean); } void sync_bn_forward_var_cuda(const Tensor input, const Tensor mean, Tensor var) { SyncBNForwardVarCUDAKernelLauncher(input, mean, var); } void sync_bn_forward_output_cuda(const Tensor input, const Tensor mean, const Tensor var, Tensor running_mean, Tensor running_var, const Tensor weight, const Tensor bias, Tensor norm, Tensor std, Tensor output, float eps, float momentum, int group_size) { SyncBNForwardOutputCUDAKernelLauncher(input, mean, var, running_mean, running_var, weight, bias, norm, std, output, eps, momentum, group_size); } void sync_bn_backward_param_cuda(const Tensor grad_output, const Tensor norm, Tensor grad_weight, Tensor grad_bias) { SyncBNBackwardParamCUDAKernelLauncher(grad_output, norm, grad_weight, grad_bias); } void sync_bn_backward_data_cuda(const Tensor grad_output, const Tensor weight, const Tensor grad_weight, const Tensor grad_bias, const Tensor norm, const Tensor std, Tensor grad_input) { SyncBNBackwardDataCUDAKernelLauncher(grad_output, weight, grad_weight, grad_bias, norm, std, grad_input); } void sync_bn_forward_mean_impl(const Tensor input, Tensor mean); void sync_bn_forward_var_impl(const Tensor input, const Tensor mean, Tensor var); void sync_bn_forward_output_impl(const Tensor input, const Tensor mean, const Tensor var, Tensor running_mean, Tensor running_var, const Tensor weight, const Tensor bias, Tensor norm, Tensor std, Tensor output, float eps, float momentum, int group_size); void sync_bn_backward_param_impl(const Tensor grad_output, const Tensor norm, Tensor grad_weight, Tensor grad_bias); void sync_bn_backward_data_impl(const Tensor grad_output, const Tensor weight, const Tensor grad_weight, const Tensor grad_bias, const Tensor norm, const Tensor std, Tensor grad_input); REGISTER_DEVICE_IMPL(sync_bn_forward_mean_impl, CUDA, sync_bn_forward_mean_cuda); REGISTER_DEVICE_IMPL(sync_bn_forward_var_impl, CUDA, sync_bn_forward_var_cuda); REGISTER_DEVICE_IMPL(sync_bn_forward_output_impl, CUDA, sync_bn_forward_output_cuda); REGISTER_DEVICE_IMPL(sync_bn_backward_param_impl, CUDA, sync_bn_backward_param_cuda); REGISTER_DEVICE_IMPL(sync_bn_backward_data_impl, CUDA, sync_bn_backward_data_cuda); void ThreeInterpolateForwardCUDAKernelLauncher(int b, int c, int m, int n, const Tensor points, const Tensor idx, const Tensor weight, Tensor out); void ThreeInterpolateBackwardCUDAKernelLauncher(int b, int c, int n, int m, const Tensor grad_out, const Tensor idx, const Tensor weight, Tensor grad_points); void three_interpolate_forward_cuda(int b, int c, int m, int n, const Tensor points, const Tensor idx, const Tensor weight, Tensor out) { ThreeInterpolateForwardCUDAKernelLauncher(b, c, m, n, points, idx, weight, out); }; void three_interpolate_backward_cuda(int b, int c, int n, int m, const Tensor grad_out, const Tensor idx, const Tensor weight, Tensor grad_points) { ThreeInterpolateBackwardCUDAKernelLauncher(b, c, n, m, grad_out, idx, weight, grad_points); }; void three_interpolate_forward_impl(int b, int c, int m, int n, const Tensor points, const Tensor idx, const Tensor weight, Tensor out); void three_interpolate_backward_impl(int b, int c, int n, int m, const Tensor grad_out, const Tensor idx, const Tensor weight, Tensor grad_points); REGISTER_DEVICE_IMPL(three_interpolate_forward_impl, CUDA, three_interpolate_forward_cuda); REGISTER_DEVICE_IMPL(three_interpolate_backward_impl, CUDA, three_interpolate_backward_cuda); void ThreeNNForwardCUDAKernelLauncher(int b, int n, int m, const Tensor unknown, const Tensor known, Tensor dist2, Tensor idx); void three_nn_forward_cuda(int b, int n, int m, const Tensor unknown, const Tensor known, Tensor dist2, Tensor idx) { ThreeNNForwardCUDAKernelLauncher(b, n, m, unknown, known, dist2, idx); }; void three_nn_forward_impl(int b, int n, int m, const Tensor unknown, const Tensor known, Tensor dist2, Tensor idx); REGISTER_DEVICE_IMPL(three_nn_forward_impl, CUDA, three_nn_forward_cuda); void TINShiftForwardCUDAKernelLauncher(Tensor input, Tensor shift, Tensor output); void TINShiftBackwardCUDAKernelLauncher(Tensor grad_output, Tensor shift, Tensor grad_input); void tin_shift_forward_cuda(Tensor input, Tensor shift, Tensor output) { TINShiftForwardCUDAKernelLauncher(input, shift, output); } void tin_shift_backward_cuda(Tensor grad_output, Tensor shift, Tensor grad_input) { TINShiftBackwardCUDAKernelLauncher(grad_output, shift, grad_input); } void tin_shift_forward_impl(Tensor input, Tensor shift, Tensor output); void tin_shift_backward_impl(Tensor grad_output, Tensor shift, Tensor grad_input); REGISTER_DEVICE_IMPL(tin_shift_forward_impl, CUDA, tin_shift_forward_cuda); REGISTER_DEVICE_IMPL(tin_shift_backward_impl, CUDA, tin_shift_backward_cuda); torch::Tensor upfirdn2d_op(const torch::Tensor& input, const torch::Tensor& kernel, int up_x, int up_y, int down_x, int down_y, int pad_x0, int pad_x1, int pad_y0, int pad_y1); torch::Tensor upfirdn2d_op_impl(const torch::Tensor& input, const torch::Tensor& kernel, int up_x, int up_y, int down_x, int down_y, int pad_x0, int pad_x1, int pad_y0, int pad_y1); REGISTER_DEVICE_IMPL(upfirdn2d_op_impl, CUDA, upfirdn2d_op); int HardVoxelizeForwardCUDAKernelLauncher( const at::Tensor& points, at::Tensor& voxels, at::Tensor& coors, at::Tensor& num_points_per_voxel, const std::vector voxel_size, const std::vector coors_range, const int max_points, const int max_voxels, const int NDim = 3); void DynamicVoxelizeForwardCUDAKernelLauncher( const at::Tensor& points, at::Tensor& coors, const std::vector voxel_size, const std::vector coors_range, const int NDim = 3); int hard_voxelize_forward_cuda(const at::Tensor& points, at::Tensor& voxels, at::Tensor& coors, at::Tensor& num_points_per_voxel, const std::vector voxel_size, const std::vector coors_range, const int max_points, const int max_voxels, const int NDim) { return HardVoxelizeForwardCUDAKernelLauncher( points, voxels, coors, num_points_per_voxel, voxel_size, coors_range, max_points, max_voxels, NDim); }; void dynamic_voxelize_forward_cuda(const at::Tensor& points, at::Tensor& coors, const std::vector voxel_size, const std::vector coors_range, const int NDim) { DynamicVoxelizeForwardCUDAKernelLauncher(points, coors, voxel_size, coors_range, NDim); }; int hard_voxelize_forward_impl(const at::Tensor& points, at::Tensor& voxels, at::Tensor& coors, at::Tensor& num_points_per_voxel, const std::vector voxel_size, const std::vector coors_range, const int max_points, const int max_voxels, const int NDim); void dynamic_voxelize_forward_impl(const at::Tensor& points, at::Tensor& coors, const std::vector voxel_size, const std::vector coors_range, const int NDim); REGISTER_DEVICE_IMPL(hard_voxelize_forward_impl, CUDA, hard_voxelize_forward_cuda); REGISTER_DEVICE_IMPL(dynamic_voxelize_forward_impl, CUDA, dynamic_voxelize_forward_cuda); void RotatedFeatureAlignForwardCUDAKernelLauncher(const Tensor features, const Tensor best_bboxes, const float spatial_scale, const int points, Tensor output); void RotatedFeatureAlignBackwardCUDAKernelLauncher(const Tensor top_grad, const Tensor best_bboxes, const float spatial_scale, const int points, Tensor bottom_grad); void rotated_feature_align_forward_cuda(const Tensor features, const Tensor best_bboxes, const float spatial_scale, const int points, Tensor output) { RotatedFeatureAlignForwardCUDAKernelLauncher(features, best_bboxes, spatial_scale, points, output); }; void rotated_feature_align_backward_cuda(const Tensor top_grad, const Tensor best_bboxes, const float spatial_scale, const int points, Tensor bottom_grad) { RotatedFeatureAlignBackwardCUDAKernelLauncher( top_grad, best_bboxes, spatial_scale, points, bottom_grad); }; void rotated_feature_align_forward_impl(const Tensor features, const Tensor best_bboxes, const float spatial_scale, const int points, Tensor output); void rotated_feature_align_backward_impl(const Tensor top_grad, const Tensor best_bboxes, const float spatial_scale, const int points, Tensor bottom_grad); REGISTER_DEVICE_IMPL(rotated_feature_align_forward_impl, CUDA, rotated_feature_align_forward_cuda); REGISTER_DEVICE_IMPL(rotated_feature_align_backward_impl, CUDA, rotated_feature_align_backward_cuda); void PointsInPolygonsForwardCUDAKernelLauncher(const at::Tensor points, const at::Tensor polygons, const int rows, const int cols, at::Tensor output); void points_in_polygons_forward_cuda(const Tensor points, const Tensor polygons, Tensor output, const int rows, const int cols) { PointsInPolygonsForwardCUDAKernelLauncher(points, polygons, rows, cols, output); }; void points_in_polygons_forward_impl(const Tensor points, const Tensor polygons, Tensor output, const int rows, const int cols); REGISTER_DEVICE_IMPL(points_in_polygons_forward_impl, CUDA, points_in_polygons_forward_cuda); void MinAreaPolygonsCUDAKernelLauncher(const Tensor pointsets, Tensor polygons); void min_area_polygons_cuda(const Tensor pointsets, Tensor polygons) { MinAreaPolygonsCUDAKernelLauncher(pointsets, polygons); } void min_area_polygons_impl(const Tensor pointsets, Tensor polygons); REGISTER_DEVICE_IMPL(min_area_polygons_impl, CUDA, min_area_polygons_cuda); void ActiveRotatedFilterForwardCUDAKernelLauncher(const Tensor input, const Tensor indices, Tensor output); void ActiveRotatedFilterBackwardCUDAKernelLauncher(const Tensor grad_out, const Tensor indices, Tensor grad_in); void active_rotated_filter_forward_cuda(const Tensor input, const Tensor indices, Tensor output) { ActiveRotatedFilterForwardCUDAKernelLauncher(input, indices, output); }; void active_rotated_filter_backward_cuda(const Tensor grad_out, const Tensor indices, Tensor grad_in) { ActiveRotatedFilterBackwardCUDAKernelLauncher(grad_out, indices, grad_in); }; void active_rotated_filter_forward_impl(const Tensor input, const Tensor indices, Tensor output); void active_rotated_filter_backward_impl(const Tensor grad_out, const Tensor indices, Tensor grad_in); REGISTER_DEVICE_IMPL(active_rotated_filter_forward_impl, CUDA, active_rotated_filter_forward_cuda); REGISTER_DEVICE_IMPL(active_rotated_filter_backward_impl, CUDA, active_rotated_filter_backward_cuda); void ConvexIoUCUDAKernelLauncher(const Tensor pointsets, const Tensor polygons, Tensor ious); void ConvexGIoUCUDAKernelLauncher(const Tensor pointsets, const Tensor polygons, Tensor output); void convex_iou_cuda(const Tensor pointsets, const Tensor polygons, Tensor ious) { ConvexIoUCUDAKernelLauncher(pointsets, polygons, ious); } void convex_giou_cuda(const Tensor pointsets, const Tensor polygons, Tensor output) { ConvexGIoUCUDAKernelLauncher(pointsets, polygons, output); } void convex_iou_impl(const Tensor pointsets, const Tensor polygons, Tensor ious); void convex_giou_impl(const Tensor pointsets, const Tensor polygons, Tensor output); REGISTER_DEVICE_IMPL(convex_iou_impl, CUDA, convex_iou_cuda); REGISTER_DEVICE_IMPL(convex_giou_impl, CUDA, convex_giou_cuda); ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/deform_conv_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "deform_conv_cuda_kernel.cuh" #include "pytorch_cuda_helper.hpp" void deformable_im2col_cuda(Tensor data_im, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor data_col) { // num_axes should be smaller than block size // todo: check parallel_imgs is correctly passed in int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; int num_kernels = channels * height_col * width_col * parallel_imgs; int channel_per_deformable_group = channels / deformable_group; AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_im.scalar_type(), "deformable_im2col_gpu", ([&] { const scalar_t *data_im_ = data_im.data_ptr(); const scalar_t *data_offset_ = data_offset.data_ptr(); scalar_t *data_col_ = data_col.data_ptr(); deformable_im2col_gpu_kernel<<>>( num_kernels, data_im_, data_offset_, height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group, parallel_imgs, channels, deformable_group, height_col, width_col, data_col_); })); AT_CUDA_CHECK(cudaGetLastError()); } void deformable_col2im_cuda(Tensor data_col, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor grad_im) { // todo: make sure parallel_imgs is passed in correctly int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; int num_kernels = channels * ksize_h * ksize_w * height_col * width_col * parallel_imgs; int channel_per_deformable_group = channels / deformable_group; AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_col.scalar_type(), "deformable_col2im_gpu", ([&] { const scalar_t *data_col_ = data_col.data_ptr(); const scalar_t *data_offset_ = data_offset.data_ptr(); scalar_t *grad_im_ = grad_im.data_ptr(); deformable_col2im_gpu_kernel<<>>( num_kernels, data_col_, data_offset_, channels, height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group, parallel_imgs, deformable_group, height_col, width_col, grad_im_); })); AT_CUDA_CHECK(cudaGetLastError()); } void deformable_col2im_coord_cuda( Tensor data_col, Tensor data_im, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor grad_offset) { int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; int num_kernels = height_col * width_col * 2 * ksize_h * ksize_w * deformable_group * parallel_imgs; int channel_per_deformable_group = channels * ksize_h * ksize_w / deformable_group; AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_col.scalar_type(), "deformable_col2im_coord_gpu", ([&] { const scalar_t *data_col_ = data_col.data_ptr(); const scalar_t *data_im_ = data_im.data_ptr(); const scalar_t *data_offset_ = data_offset.data_ptr(); scalar_t *grad_offset_ = grad_offset.data_ptr(); deformable_col2im_coord_gpu_kernel<<< GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0, at::cuda::getCurrentCUDAStream()>>>( num_kernels, data_col_, data_im_, data_offset_, channels, height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group, parallel_imgs, 2 * ksize_h * ksize_w * deformable_group, deformable_group, height_col, width_col, grad_offset_); })); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/deform_roi_pool_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "deform_roi_pool_cuda_kernel.cuh" #include "pytorch_cuda_helper.hpp" void DeformRoIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor offset, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma) { int output_size = output.numel(); int channels = input.size(1); int height = input.size(2); int width = input.size(3); at::cuda::CUDAGuard device_guard(input.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( input.scalar_type(), "deform_roi_pool_forward_cuda_kernel", [&] { deform_roi_pool_forward_cuda_kernel <<>>( output_size, input.data_ptr(), rois.data_ptr(), offset.data_ptr(), output.data_ptr(), pooled_height, pooled_width, static_cast(spatial_scale), sampling_ratio, static_cast(gamma), channels, height, width); }); AT_CUDA_CHECK(cudaGetLastError()); } void DeformRoIPoolBackwardCUDAKernelLauncher( Tensor grad_output, Tensor input, Tensor rois, Tensor offset, Tensor grad_input, Tensor grad_offset, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma) { int output_size = grad_output.numel(); int channels = grad_input.size(1); int height = grad_input.size(2); int width = grad_input.size(3); at::cuda::CUDAGuard device_guard(grad_output.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( grad_output.scalar_type(), "deform_roi_pool_backward_cuda_kernel", [&] { deform_roi_pool_backward_cuda_kernel <<>>( output_size, grad_output.data_ptr(), input.data_ptr(), rois.data_ptr(), offset.data_ptr(), grad_input.data_ptr(), grad_offset.data_ptr(), pooled_height, pooled_width, static_cast(spatial_scale), sampling_ratio, static_cast(gamma), channels, height, width); }); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/focal_loss_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cuda_helper.hpp" #include "sigmoid_focal_loss_cuda_kernel.cuh" #include "softmax_focal_loss_cuda_kernel.cuh" void SigmoidFocalLossForwardCUDAKernelLauncher(Tensor input, Tensor target, Tensor weight, Tensor output, const float gamma, const float alpha) { int output_size = output.numel(); int num_classes = input.size(1); AT_ASSERTM(target.max().item() <= (int64_t)num_classes, "target label should smaller or equal than num classes"); at::cuda::CUDAGuard device_guard(input.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( input.scalar_type(), "sigmoid_focal_loss_forward_cuda_kernel", [&] { sigmoid_focal_loss_forward_cuda_kernel <<>>( output_size, input.data_ptr(), target.data_ptr(), weight.data_ptr(), output.data_ptr(), gamma, alpha, num_classes); }); AT_CUDA_CHECK(cudaGetLastError()); } void SigmoidFocalLossBackwardCUDAKernelLauncher(Tensor input, Tensor target, Tensor weight, Tensor grad_input, const float gamma, const float alpha) { int output_size = grad_input.numel(); int num_classes = input.size(1); at::cuda::CUDAGuard device_guard(grad_input.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( input.scalar_type(), "sigmoid_focal_loss_backward_cuda_kernel", [&] { sigmoid_focal_loss_backward_cuda_kernel <<>>( output_size, input.data_ptr(), target.data_ptr(), weight.data_ptr(), grad_input.data_ptr(), gamma, alpha, num_classes); }); AT_CUDA_CHECK(cudaGetLastError()); } void SoftmaxFocalLossForwardCUDAKernelLauncher(Tensor softmax, Tensor target, Tensor weight, Tensor output, const float gamma, const float alpha) { int output_size = output.numel(); int num_classes = softmax.size(1); AT_ASSERTM(target.max().item() <= (int64_t)num_classes, "target label should smaller or equal than num classes"); at::cuda::CUDAGuard device_guard(softmax.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( softmax.scalar_type(), "softmax_focal_loss_forward_cuda_kernel", [&] { softmax_focal_loss_forward_cuda_kernel <<>>( output_size, softmax.data_ptr(), target.data_ptr(), weight.data_ptr(), output.data_ptr(), gamma, alpha, num_classes); }); AT_CUDA_CHECK(cudaGetLastError()); } void SoftmaxFocalLossBackwardCUDAKernelLauncher(Tensor softmax, Tensor target, Tensor weight, Tensor buff, Tensor grad_input, const float gamma, const float alpha) { int num_classes = softmax.size(1); int output_size = buff.numel(); at::cuda::CUDAGuard device_guard(grad_input.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( grad_input.scalar_type(), "softmax_focal_loss_backward_cuda1_" "kernel", [&] { softmax_focal_loss_backward_cuda1_kernel <<>>( output_size, softmax.data_ptr(), target.data_ptr(), weight.data_ptr(), buff.data_ptr(), gamma, alpha, num_classes); }); AT_CUDA_CHECK(cudaGetLastError()); output_size = grad_input.numel(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( grad_input.scalar_type(), "softmax_focal_loss_backward_cuda2_" "kernel", [&] { softmax_focal_loss_backward_cuda2_kernel <<>>( output_size, softmax.data_ptr(), target.data_ptr(), buff.data_ptr(), grad_input.data_ptr(), num_classes); }); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/furthest_point_sample_cuda.cu ================================================ // Modified from // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/sampling_gpu.cu #include #include #include "furthest_point_sample_cuda_kernel.cuh" #include "pytorch_cuda_helper.hpp" inline int opt_n_threads(int work_size) { const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); return max(min(1 << pow_2, 1024), 1); } void FurthestPointSamplingForwardCUDAKernelLauncher(int b, int n, int m, const float* dataset, float* temp, int* idxs) { // dataset: (B, N, 3) // tmp: (B, N) // output: // idx: (B, M) cudaStream_t stream = at::cuda::getCurrentCUDAStream(); unsigned int n_threads = opt_n_threads(n); switch (n_threads) { case 1024: furthest_point_sampling_forward_cuda_kernel<1024> <<>>(b, n, m, dataset, temp, idxs); break; case 512: furthest_point_sampling_forward_cuda_kernel<512> <<>>(b, n, m, dataset, temp, idxs); break; case 256: furthest_point_sampling_forward_cuda_kernel<256> <<>>(b, n, m, dataset, temp, idxs); break; case 128: furthest_point_sampling_forward_cuda_kernel<128> <<>>(b, n, m, dataset, temp, idxs); break; case 64: furthest_point_sampling_forward_cuda_kernel<64> <<>>(b, n, m, dataset, temp, idxs); break; case 32: furthest_point_sampling_forward_cuda_kernel<32> <<>>(b, n, m, dataset, temp, idxs); break; case 16: furthest_point_sampling_forward_cuda_kernel<16> <<>>(b, n, m, dataset, temp, idxs); break; case 8: furthest_point_sampling_forward_cuda_kernel<8> <<>>(b, n, m, dataset, temp, idxs); break; case 4: furthest_point_sampling_forward_cuda_kernel<4> <<>>(b, n, m, dataset, temp, idxs); break; case 2: furthest_point_sampling_forward_cuda_kernel<2> <<>>(b, n, m, dataset, temp, idxs); break; case 1: furthest_point_sampling_forward_cuda_kernel<1> <<>>(b, n, m, dataset, temp, idxs); break; default: furthest_point_sampling_forward_cuda_kernel<512> <<>>(b, n, m, dataset, temp, idxs); } AT_CUDA_CHECK(cudaGetLastError()); } void FurthestPointSamplingWithDistForwardCUDAKernelLauncher( int b, int n, int m, const float* dataset, float* temp, int* idxs) { // dataset: (B, N, N) // temp: (B, N) // output: // idx: (B, M) cudaStream_t stream = at::cuda::getCurrentCUDAStream(); unsigned int n_threads = opt_n_threads(n); switch (n_threads) { case 1024: furthest_point_sampling_with_dist_forward_cuda_kernel<1024> <<>>(b, n, m, dataset, temp, idxs); break; case 512: furthest_point_sampling_with_dist_forward_cuda_kernel<512> <<>>(b, n, m, dataset, temp, idxs); break; case 256: furthest_point_sampling_with_dist_forward_cuda_kernel<256> <<>>(b, n, m, dataset, temp, idxs); break; case 128: furthest_point_sampling_with_dist_forward_cuda_kernel<128> <<>>(b, n, m, dataset, temp, idxs); break; case 64: furthest_point_sampling_with_dist_forward_cuda_kernel<64> <<>>(b, n, m, dataset, temp, idxs); break; case 32: furthest_point_sampling_with_dist_forward_cuda_kernel<32> <<>>(b, n, m, dataset, temp, idxs); break; case 16: furthest_point_sampling_with_dist_forward_cuda_kernel<16> <<>>(b, n, m, dataset, temp, idxs); break; case 8: furthest_point_sampling_with_dist_forward_cuda_kernel<8> <<>>(b, n, m, dataset, temp, idxs); break; case 4: furthest_point_sampling_with_dist_forward_cuda_kernel<4> <<>>(b, n, m, dataset, temp, idxs); break; case 2: furthest_point_sampling_with_dist_forward_cuda_kernel<2> <<>>(b, n, m, dataset, temp, idxs); break; case 1: furthest_point_sampling_with_dist_forward_cuda_kernel<1> <<>>(b, n, m, dataset, temp, idxs); break; default: furthest_point_sampling_with_dist_forward_cuda_kernel<512> <<>>(b, n, m, dataset, temp, idxs); } AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/fused_bias_leakyrelu_cuda.cu ================================================ // Modified from // https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_bias_act_kernel.cu // Copyright (c) 2019, NVIDIA Corporation. All rights reserved. // // This work is made available under the Nvidia Source Code License-NC. // To view a copy of this license, visit // https://nvlabs.github.io/stylegan2/license.html #include #include #include #include #include #include #include template static __global__ void fused_bias_act_kernel( scalar_t* out, const scalar_t* p_x, const scalar_t* p_b, const scalar_t* p_ref, int act, int grad, scalar_t alpha, scalar_t scale, int loop_x, int size_x, int step_b, int size_b, int use_bias, int use_ref) { int xi = blockIdx.x * loop_x * blockDim.x + threadIdx.x; scalar_t zero = 0.0; for (int loop_idx = 0; loop_idx < loop_x && xi < size_x; loop_idx++, xi += blockDim.x) { scalar_t x = p_x[xi]; if (use_bias) { x += p_b[(xi / step_b) % size_b]; } scalar_t ref = use_ref ? p_ref[xi] : zero; scalar_t y; // act = 1: linear layer // act = 3: leaky relu layer // grad = 0: direct forward path // grad = 1: first order deviation // grad = 2: second order deviation switch (act * 10 + grad) { default: case 10: y = x; break; case 11: y = x; break; case 12: y = 0.0; break; case 30: y = (x > 0.0) ? x : x * alpha; break; case 31: y = (ref > 0.0) ? x : x * alpha; break; case 32: y = 0.0; break; } out[xi] = y * scale; } } torch::Tensor fused_bias_leakyrelu_op(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer, int act, int grad, float alpha, float scale) { int curDevice = -1; cudaGetDevice(&curDevice); cudaStream_t stream = at::cuda::getCurrentCUDAStream(curDevice); auto x = input.contiguous(); auto b = bias.contiguous(); auto ref = refer.contiguous(); int use_bias = b.numel() ? 1 : 0; int use_ref = ref.numel() ? 1 : 0; int size_x = x.numel(); int size_b = b.numel(); int step_b = 1; for (int i = 1 + 1; i < x.dim(); i++) { step_b *= x.size(i); } int loop_x = 4; int block_size = 4 * 32; int grid_size = (size_x - 1) / (loop_x * block_size) + 1; auto y = torch::empty_like(x); AT_DISPATCH_FLOATING_TYPES_AND_HALF( x.scalar_type(), "fused_bias_act_kernel", [&] { fused_bias_act_kernel<<>>( y.data_ptr(), x.data_ptr(), b.data_ptr(), ref.data_ptr(), act, grad, alpha, scale, loop_x, size_x, step_b, size_b, use_bias, use_ref); }); return y; } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/gather_points_cuda.cu ================================================ #include #include #include "gather_points_cuda_kernel.cuh" #include "pytorch_cuda_helper.hpp" void GatherPointsForwardCUDAKernelLauncher(int b, int c, int n, int npoints, const Tensor points, const Tensor idx, Tensor out) { // points: (B, C, N) // idx: (B, npoints) // output: // out: (B, C, npoints) at::cuda::CUDAGuard device_guard(points.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); // blockIdx.x(col), blockIdx.y(row) dim3 blocks(GET_BLOCKS(npoints, THREADS_PER_BLOCK), c, b); dim3 threads(THREADS_PER_BLOCK); AT_DISPATCH_FLOATING_TYPES_AND_HALF( points.scalar_type(), "gather_points_forward_cuda_kernel", [&] { gather_points_forward_cuda_kernel <<>>( b, c, n, npoints, points.data_ptr(), idx.data_ptr(), out.data_ptr()); }); AT_CUDA_CHECK(cudaGetLastError()); } void GatherPointsBackwardCUDAKernelLauncher(int b, int c, int n, int npoints, const Tensor grad_out, const Tensor idx, Tensor grad_points) { // grad_out: (B, C, npoints) // idx: (B, npoints) // output: // grad_points: (B, C, N) at::cuda::CUDAGuard device_guard(grad_out.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); // blockIdx.x(col), blockIdx.y(row) dim3 blocks(GET_BLOCKS(npoints, THREADS_PER_BLOCK), c, b); dim3 threads(THREADS_PER_BLOCK); AT_DISPATCH_FLOATING_TYPES_AND_HALF( grad_out.scalar_type(), "gather_points_backward_cuda_kernel", [&] { gather_points_backward_cuda_kernel <<>>( b, c, n, npoints, grad_out.data_ptr(), idx.data_ptr(), grad_points.data_ptr()); }); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/group_points_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved. // Modified from // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/group_points_gpu.cu #include #include #include "group_points_cuda_kernel.cuh" #include "pytorch_cuda_helper.hpp" void GroupPointsForwardCUDAKernelLauncher(int b, int c, int n, int npoints, int nsample, const Tensor points, const Tensor idx, Tensor out) { // points: (B, C, N) // idx: (B, npoints, nsample) // output: // out: (B, C, npoints, nsample) at::cuda::CUDAGuard device_guard(points.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); // blockIdx.x(col), blockIdx.y(row) dim3 blocks(GET_BLOCKS(npoints * nsample, THREADS_PER_BLOCK), c, b); dim3 threads(THREADS_PER_BLOCK); AT_DISPATCH_FLOATING_TYPES_AND_HALF( points.scalar_type(), "group_points_forward_cuda_kernel", [&] { group_points_forward_cuda_kernel <<>>( b, c, n, npoints, nsample, points.data_ptr(), idx.data_ptr(), out.data_ptr()); }); AT_CUDA_CHECK(cudaGetLastError()); } void GroupPointsBackwardCUDAKernelLauncher(int b, int c, int n, int npoints, int nsample, const Tensor grad_out, const Tensor idx, Tensor grad_points) { // grad_out: (B, C, npoints, nsample) // idx: (B, npoints, nsample) // output: // grad_points: (B, C, N) at::cuda::CUDAGuard device_guard(grad_out.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); // blockIdx.x(col), blockIdx.y(row) dim3 blocks(GET_BLOCKS(npoints * nsample, THREADS_PER_BLOCK), c, b); dim3 threads(THREADS_PER_BLOCK); AT_DISPATCH_FLOATING_TYPES_AND_HALF( grad_out.scalar_type(), "group_points_backward_cuda_kernel", [&] { group_points_backward_cuda_kernel <<>>( b, c, n, npoints, nsample, grad_out.data_ptr(), idx.data_ptr(), grad_points.data_ptr()); }); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/iou3d_cuda.cu ================================================ // Modified from // https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/iou3d_nms/src/iou3d_nms_kernel.cu /* 3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others) Written by Shaoshuai Shi All Rights Reserved 2019-2020. */ #include #include "iou3d_cuda_kernel.cuh" #include "pytorch_cuda_helper.hpp" void IoU3DBoxesOverlapBevForwardCUDAKernelLauncher(const int num_a, const Tensor boxes_a, const int num_b, const Tensor boxes_b, Tensor ans_overlap) { at::cuda::CUDAGuard device_guard(boxes_a.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); // blockIdx.x(col), blockIdx.y(row) dim3 blocks(GET_BLOCKS(num_b, THREADS_PER_BLOCK_IOU3D), GET_BLOCKS(num_a, THREADS_PER_BLOCK_IOU3D)); dim3 threads(THREADS_PER_BLOCK_IOU3D, THREADS_PER_BLOCK_IOU3D); iou3d_boxes_overlap_bev_forward_cuda_kernel<<>>( num_a, boxes_a.data_ptr(), num_b, boxes_b.data_ptr(), ans_overlap.data_ptr()); AT_CUDA_CHECK(cudaGetLastError()); } void IoU3DBoxesIoUBevForwardCUDAKernelLauncher(const int num_a, const Tensor boxes_a, const int num_b, const Tensor boxes_b, Tensor ans_iou) { at::cuda::CUDAGuard device_guard(boxes_a.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); // blockIdx.x(col), blockIdx.y(row) dim3 blocks(GET_BLOCKS(num_b, THREADS_PER_BLOCK_IOU3D), GET_BLOCKS(num_a, THREADS_PER_BLOCK_IOU3D)); dim3 threads(THREADS_PER_BLOCK_IOU3D, THREADS_PER_BLOCK_IOU3D); iou3d_boxes_iou_bev_forward_cuda_kernel<<>>( num_a, boxes_a.data_ptr(), num_b, boxes_b.data_ptr(), ans_iou.data_ptr()); AT_CUDA_CHECK(cudaGetLastError()); } void IoU3DNMSForwardCUDAKernelLauncher(const Tensor boxes, unsigned long long *mask, int boxes_num, float nms_overlap_thresh) { at::cuda::CUDAGuard device_guard(boxes.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 blocks(GET_BLOCKS(boxes_num, THREADS_PER_BLOCK_NMS), GET_BLOCKS(boxes_num, THREADS_PER_BLOCK_NMS)); dim3 threads(THREADS_PER_BLOCK_NMS); nms_forward_cuda_kernel<<>>( boxes_num, nms_overlap_thresh, boxes.data_ptr(), mask); AT_CUDA_CHECK(cudaGetLastError()); } void IoU3DNMSNormalForwardCUDAKernelLauncher(const Tensor boxes, unsigned long long *mask, int boxes_num, float nms_overlap_thresh) { at::cuda::CUDAGuard device_guard(boxes.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 blocks(GET_BLOCKS(boxes_num, THREADS_PER_BLOCK_NMS), GET_BLOCKS(boxes_num, THREADS_PER_BLOCK_NMS)); dim3 threads(THREADS_PER_BLOCK_NMS); nms_normal_forward_cuda_kernel<<>>( boxes_num, nms_overlap_thresh, boxes.data_ptr(), mask); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/knn_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved // Modified from // https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/pointops/src/knnquery_heap #include #include #include "knn_cuda_kernel.cuh" #include "pytorch_cuda_helper.hpp" void KNNForwardCUDAKernelLauncher(int b, int n, int m, int nsample, const Tensor xyz, const Tensor new_xyz, Tensor idx, Tensor dist2) { // param new_xyz: (B, m, 3) // param xyz: (B, n, 3) // param idx: (B, m, nsample) at::cuda::CUDAGuard device_guard(new_xyz.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); // blockIdx.x(col), blockIdx.y(row) dim3 blocks(GET_BLOCKS(m, THREADS_PER_BLOCK), b); dim3 threads(THREADS_PER_BLOCK); AT_DISPATCH_FLOATING_TYPES_AND_HALF( new_xyz.scalar_type(), "knn_forward_cuda_kernel", [&] { knn_forward_cuda_kernel<<>>( b, n, m, nsample, xyz.data_ptr(), new_xyz.data_ptr(), idx.data_ptr(), dist2.data_ptr()); }); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/masked_conv2d_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "masked_conv2d_cuda_kernel.cuh" #include "pytorch_cuda_helper.hpp" void MaskedIm2colForwardCUDAKernelLauncher(const Tensor bottom_data, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor top_data, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w) { int channels = bottom_data.size(1); int height = bottom_data.size(2); int width = bottom_data.size(3); int mask_cnt = mask_h_idx.size(0); int output_size = mask_cnt * channels; at::cuda::CUDAGuard device_guard(bottom_data.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( bottom_data.scalar_type(), "MaskedIm2colLaucherForward", ([&] { const scalar_t *bottom_data_ = bottom_data.data_ptr(); const int64_t *mask_h_idx_ = mask_h_idx.data_ptr(); const int64_t *mask_w_idx_ = mask_w_idx.data_ptr(); scalar_t *top_data_ = top_data.data_ptr(); MaskedIm2colForward <<>>( output_size, bottom_data_, height, width, kernel_h, kernel_w, pad_h, pad_w, mask_h_idx_, mask_w_idx_, mask_cnt, top_data_); })); AT_CUDA_CHECK(cudaGetLastError()); } void MaskedCol2imForwardCUDAKernelLauncher( const Tensor bottom_data, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor top_data, const int height, const int width, const int channels) { int mask_cnt = mask_h_idx.size(0); int output_size = mask_cnt * channels; at::cuda::CUDAGuard device_guard(bottom_data.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( bottom_data.scalar_type(), "MaskedCol2imLaucherForward", ([&] { const scalar_t *bottom_data_ = bottom_data.data_ptr(); const int64_t *mask_h_idx_ = mask_h_idx.data_ptr(); const int64_t *mask_w_idx_ = mask_w_idx.data_ptr(); scalar_t *top_data_ = top_data.data_ptr(); MaskedCol2imForward <<>>( output_size, bottom_data_, height, width, channels, mask_h_idx_, mask_w_idx_, mask_cnt, top_data_); })); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/min_area_polygons.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved // modified from // https://github.com/SDL-GuoZonghao/BeyondBoundingBox/blob/main/mmdet/ops/minareabbox/src/minareabbox_kernel.cu #include "min_area_polygons_cuda.cuh" #include "pytorch_cuda_helper.hpp" void MinAreaPolygonsCUDAKernelLauncher(const Tensor pointsets, Tensor polygons) { int num_pointsets = pointsets.size(0); const int output_size = polygons.numel(); at::cuda::CUDAGuard device_guard(pointsets.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( pointsets.scalar_type(), "min_area_polygons_cuda_kernel", ([&] { min_area_polygons_cuda_kernel <<>>( num_pointsets, pointsets.data_ptr(), polygons.data_ptr()); })); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/modulated_deform_conv_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "modulated_deform_conv_cuda_kernel.cuh" #include "pytorch_cuda_helper.hpp" void modulated_deformable_im2col_cuda( const Tensor data_im, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor data_col) { // num_axes should be smaller than block size const int channel_per_deformable_group = channels / deformable_group; const int num_kernels = channels * batch_size * height_col * width_col; AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_im.scalar_type(), "modulated_deformable_im2col_gpu", ([&] { const scalar_t *data_im_ = data_im.data_ptr(); const scalar_t *data_offset_ = data_offset.data_ptr(); const scalar_t *data_mask_ = data_mask.data_ptr(); scalar_t *data_col_ = data_col.data_ptr(); modulated_deformable_im2col_gpu_kernel<<< GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0, at::cuda::getCurrentCUDAStream()>>>( num_kernels, data_im_, data_offset_, data_mask_, height_im, width_im, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group, batch_size, channels, deformable_group, height_col, width_col, data_col_); })); AT_CUDA_CHECK(cudaGetLastError()); } void modulated_deformable_col2im_cuda( const Tensor data_col, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor grad_im) { const int channel_per_deformable_group = channels / deformable_group; const int num_kernels = channels * kernel_h * kernel_w * batch_size * height_col * width_col; AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_col.scalar_type(), "modulated_deformable_col2im_gpu", ([&] { const scalar_t *data_col_ = data_col.data_ptr(); const scalar_t *data_offset_ = data_offset.data_ptr(); const scalar_t *data_mask_ = data_mask.data_ptr(); scalar_t *grad_im_ = grad_im.data_ptr(); modulated_deformable_col2im_gpu_kernel<<< GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0, at::cuda::getCurrentCUDAStream()>>>( num_kernels, data_col_, data_offset_, data_mask_, channels, height_im, width_im, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group, batch_size, deformable_group, height_col, width_col, grad_im_); })); AT_CUDA_CHECK(cudaGetLastError()); } void modulated_deformable_col2im_coord_cuda( const Tensor data_col, const Tensor data_im, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor grad_offset, Tensor grad_mask) { const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * kernel_w * deformable_group; const int channel_per_deformable_group = channels * kernel_h * kernel_w / deformable_group; AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_col.scalar_type(), "modulated_deformable_col2im_coord_gpu", ([&] { const scalar_t *data_col_ = data_col.data_ptr(); const scalar_t *data_im_ = data_im.data_ptr(); const scalar_t *data_offset_ = data_offset.data_ptr(); const scalar_t *data_mask_ = data_mask.data_ptr(); scalar_t *grad_offset_ = grad_offset.data_ptr(); scalar_t *grad_mask_ = grad_mask.data_ptr(); modulated_deformable_col2im_coord_gpu_kernel<<< GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0, at::cuda::getCurrentCUDAStream()>>>( num_kernels, data_col_, data_im_, data_offset_, data_mask_, channels, height_im, width_im, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group, batch_size, 2 * kernel_h * kernel_w * deformable_group, deformable_group, height_col, width_col, grad_offset_, grad_mask_); })); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/ms_deform_attn_cuda.cu ================================================ /*! ************************************************************************************************** * Deformable DETR * Copyright (c) 2020 SenseTime. All Rights Reserved. * Licensed under the Apache License, Version 2.0 [see LICENSE for details] ************************************************************************************************** * Modified from *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 ************************************************************************************************** */ #include #include #include #include #include #include #include "ms_deform_attn_cuda_kernel.cuh" template void ms_deformable_im2col_cuda(cudaStream_t stream, const scalar_t *data_value, const int64_t *data_spatial_shapes, const int64_t *data_level_start_index, const scalar_t *data_sampling_loc, const scalar_t *data_attn_weight, const int batch_size, const int spatial_size, const int num_heads, const int channels, const int num_levels, const int num_query, const int num_point, scalar_t *data_col) { const int num_kernels = batch_size * num_query * num_heads * channels; const int num_actual_kernels = batch_size * num_query * num_heads * channels; const int num_threads = CUDA_NUM_THREADS; ms_deformable_im2col_gpu_kernel <<>>( num_kernels, data_value, data_spatial_shapes, data_level_start_index, data_sampling_loc, data_attn_weight, batch_size, spatial_size, num_heads, channels, num_levels, num_query, num_point, data_col); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { printf("error in ms_deformable_im2col_cuda: %s\n", cudaGetErrorString(err)); } } template void ms_deformable_col2im_cuda( cudaStream_t stream, const scalar_t *grad_col, const scalar_t *data_value, const int64_t *data_spatial_shapes, const int64_t *data_level_start_index, const scalar_t *data_sampling_loc, const scalar_t *data_attn_weight, const int batch_size, const int spatial_size, const int num_heads, const int channels, const int num_levels, const int num_query, const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc, scalar_t *grad_attn_weight) { const int num_threads = (channels > CUDA_NUM_THREADS) ? CUDA_NUM_THREADS : channels; const int num_kernels = batch_size * num_query * num_heads * channels; const int num_actual_kernels = batch_size * num_query * num_heads * channels; if (channels > 1024) { if ((channels & 1023) == 0) { ms_deformable_col2im_gpu_kernel_shm_reduce_v2_multi_blocks <<>>( num_kernels, grad_col, data_value, data_spatial_shapes, data_level_start_index, data_sampling_loc, data_attn_weight, batch_size, spatial_size, num_heads, channels, num_levels, num_query, num_point, grad_value, grad_sampling_loc, grad_attn_weight); } else { ms_deformable_col2im_gpu_kernel_gm <<>>(num_kernels, grad_col, data_value, data_spatial_shapes, data_level_start_index, data_sampling_loc, data_attn_weight, batch_size, spatial_size, num_heads, channels, num_levels, num_query, num_point, grad_value, grad_sampling_loc, grad_attn_weight); } } else { switch (channels) { case 1: ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1 <<>>(num_kernels, grad_col, data_value, data_spatial_shapes, data_level_start_index, data_sampling_loc, data_attn_weight, batch_size, spatial_size, num_heads, channels, num_levels, num_query, num_point, grad_value, grad_sampling_loc, grad_attn_weight); break; case 2: ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1 <<>>(num_kernels, grad_col, data_value, data_spatial_shapes, data_level_start_index, data_sampling_loc, data_attn_weight, batch_size, spatial_size, num_heads, channels, num_levels, num_query, num_point, grad_value, grad_sampling_loc, grad_attn_weight); break; case 4: ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1 <<>>(num_kernels, grad_col, data_value, data_spatial_shapes, data_level_start_index, data_sampling_loc, data_attn_weight, batch_size, spatial_size, num_heads, channels, num_levels, num_query, num_point, grad_value, grad_sampling_loc, grad_attn_weight); break; case 8: ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1 <<>>(num_kernels, grad_col, data_value, data_spatial_shapes, data_level_start_index, data_sampling_loc, data_attn_weight, batch_size, spatial_size, num_heads, channels, num_levels, num_query, num_point, grad_value, grad_sampling_loc, grad_attn_weight); break; case 16: ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1 <<>>(num_kernels, grad_col, data_value, data_spatial_shapes, data_level_start_index, data_sampling_loc, data_attn_weight, batch_size, spatial_size, num_heads, channels, num_levels, num_query, num_point, grad_value, grad_sampling_loc, grad_attn_weight); break; case 32: ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1 <<>>(num_kernels, grad_col, data_value, data_spatial_shapes, data_level_start_index, data_sampling_loc, data_attn_weight, batch_size, spatial_size, num_heads, channels, num_levels, num_query, num_point, grad_value, grad_sampling_loc, grad_attn_weight); break; case 64: ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2 <<>>(num_kernels, grad_col, data_value, data_spatial_shapes, data_level_start_index, data_sampling_loc, data_attn_weight, batch_size, spatial_size, num_heads, channels, num_levels, num_query, num_point, grad_value, grad_sampling_loc, grad_attn_weight); break; case 128: ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2 <<>>(num_kernels, grad_col, data_value, data_spatial_shapes, data_level_start_index, data_sampling_loc, data_attn_weight, batch_size, spatial_size, num_heads, channels, num_levels, num_query, num_point, grad_value, grad_sampling_loc, grad_attn_weight); break; case 256: ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2 <<>>(num_kernels, grad_col, data_value, data_spatial_shapes, data_level_start_index, data_sampling_loc, data_attn_weight, batch_size, spatial_size, num_heads, channels, num_levels, num_query, num_point, grad_value, grad_sampling_loc, grad_attn_weight); break; case 512: ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2 <<>>(num_kernels, grad_col, data_value, data_spatial_shapes, data_level_start_index, data_sampling_loc, data_attn_weight, batch_size, spatial_size, num_heads, channels, num_levels, num_query, num_point, grad_value, grad_sampling_loc, grad_attn_weight); break; case 1024: ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2 <<>>(num_kernels, grad_col, data_value, data_spatial_shapes, data_level_start_index, data_sampling_loc, data_attn_weight, batch_size, spatial_size, num_heads, channels, num_levels, num_query, num_point, grad_value, grad_sampling_loc, grad_attn_weight); break; default: if (channels < 64) { ms_deformable_col2im_gpu_kernel_shm_reduce_v1 <<>>( num_kernels, grad_col, data_value, data_spatial_shapes, data_level_start_index, data_sampling_loc, data_attn_weight, batch_size, spatial_size, num_heads, channels, num_levels, num_query, num_point, grad_value, grad_sampling_loc, grad_attn_weight); } else { ms_deformable_col2im_gpu_kernel_shm_reduce_v2 <<>>( num_kernels, grad_col, data_value, data_spatial_shapes, data_level_start_index, data_sampling_loc, data_attn_weight, batch_size, spatial_size, num_heads, channels, num_levels, num_query, num_point, grad_value, grad_sampling_loc, grad_attn_weight); } } } cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { printf("error in ms_deformable_col2im_cuda: %s\n", cudaGetErrorString(err)); } } at::Tensor ms_deform_attn_cuda_forward(const at::Tensor &value, const at::Tensor &spatial_shapes, const at::Tensor &level_start_index, const at::Tensor &sampling_loc, const at::Tensor &attn_weight, const int im2col_step) { AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous"); AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous"); AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous"); AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous"); AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous"); AT_ASSERTM(value.is_cuda(), "value must be a CUDA tensor"); AT_ASSERTM(spatial_shapes.is_cuda(), "spatial_shapes must be a CUDA tensor"); AT_ASSERTM(level_start_index.is_cuda(), "level_start_index must be a CUDA tensor"); AT_ASSERTM(sampling_loc.is_cuda(), "sampling_loc must be a CUDA tensor"); AT_ASSERTM(attn_weight.is_cuda(), "attn_weight must be a CUDA tensor"); const int batch = value.size(0); const int spatial_size = value.size(1); const int num_heads = value.size(2); const int channels = value.size(3); const int num_levels = spatial_shapes.size(0); const int num_query = sampling_loc.size(1); const int num_point = sampling_loc.size(4); const int im2col_step_ = std::min(batch, im2col_step); AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_); auto output = at::zeros({batch, num_query, num_heads, channels}, value.options()); const int batch_n = im2col_step_; auto output_n = output.view( {batch / im2col_step_, batch_n, num_query, num_heads, channels}); auto per_value_size = spatial_size * num_heads * channels; auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2; auto per_attn_weight_size = num_query * num_heads * num_levels * num_point; for (int n = 0; n < batch / im2col_step_; ++n) { auto columns = output_n.select(0, n); AT_DISPATCH_FLOATING_TYPES( value.scalar_type(), "ms_deform_attn_forward_cuda", ([&] { ms_deformable_im2col_cuda( at::cuda::getCurrentCUDAStream(), value.data_ptr() + n * im2col_step_ * per_value_size, spatial_shapes.data_ptr(), level_start_index.data_ptr(), sampling_loc.data_ptr() + n * im2col_step_ * per_sample_loc_size, attn_weight.data_ptr() + n * im2col_step_ * per_attn_weight_size, batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point, columns.data_ptr()); })); } output = output.view({batch, num_query, num_heads * channels}); return output; } void ms_deform_attn_cuda_backward( const at::Tensor &value, const at::Tensor &spatial_shapes, const at::Tensor &level_start_index, const at::Tensor &sampling_loc, const at::Tensor &attn_weight, const at::Tensor &grad_output, at::Tensor &grad_value, at::Tensor &grad_sampling_loc, at::Tensor &grad_attn_weight, const int im2col_step) { AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous"); AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous"); AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous"); AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous"); AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous"); AT_ASSERTM(grad_output.is_contiguous(), "grad_output tensor has to be contiguous"); AT_ASSERTM(value.is_cuda(), "value must be a CUDA tensor"); AT_ASSERTM(spatial_shapes.is_cuda(), "spatial_shapes must be a CUDA tensor"); AT_ASSERTM(level_start_index.is_cuda(), "level_start_index must be a CUDA tensor"); AT_ASSERTM(sampling_loc.is_cuda(), "sampling_loc must be a CUDA tensor"); AT_ASSERTM(attn_weight.is_cuda(), "attn_weight must be a CUDA tensor"); AT_ASSERTM(grad_output.is_cuda(), "grad_output must be a CUDA tensor"); const int batch = value.size(0); const int spatial_size = value.size(1); const int num_heads = value.size(2); const int channels = value.size(3); const int num_levels = spatial_shapes.size(0); const int num_query = sampling_loc.size(1); const int num_point = sampling_loc.size(4); const int im2col_step_ = std::min(batch, im2col_step); AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_); const int batch_n = im2col_step_; auto per_value_size = spatial_size * num_heads * channels; auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2; auto per_attn_weight_size = num_query * num_heads * num_levels * num_point; auto grad_output_n = grad_output.view( {batch / im2col_step_, batch_n, num_query, num_heads, channels}); for (int n = 0; n < batch / im2col_step_; ++n) { auto grad_output_g = grad_output_n.select(0, n); AT_DISPATCH_FLOATING_TYPES( value.scalar_type(), "ms_deform_attn_backward_cuda", ([&] { ms_deformable_col2im_cuda( at::cuda::getCurrentCUDAStream(), grad_output_g.data_ptr(), value.data_ptr() + n * im2col_step_ * per_value_size, spatial_shapes.data_ptr(), level_start_index.data_ptr(), sampling_loc.data_ptr() + n * im2col_step_ * per_sample_loc_size, attn_weight.data_ptr() + n * im2col_step_ * per_attn_weight_size, batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point, grad_value.data_ptr() + n * im2col_step_ * per_value_size, grad_sampling_loc.data_ptr() + n * im2col_step_ * per_sample_loc_size, grad_attn_weight.data_ptr() + n * im2col_step_ * per_attn_weight_size); })); } } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/nms_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "nms_cuda_kernel.cuh" #include "pytorch_cuda_helper.hpp" Tensor NMSCUDAKernelLauncher(Tensor boxes, Tensor scores, float iou_threshold, int offset) { at::cuda::CUDAGuard device_guard(boxes.device()); if (boxes.numel() == 0) { return at::empty({0}, boxes.options().dtype(at::kLong)); } auto order_t = std::get<1>(scores.sort(0, /*descending=*/true)); auto boxes_sorted = boxes.index_select(0, order_t); int boxes_num = boxes.size(0); const int col_blocks = (boxes_num + threadsPerBlock - 1) / threadsPerBlock; const int col_blocks_alloc = GET_BLOCKS(boxes_num, threadsPerBlock); Tensor mask = at::empty({boxes_num, col_blocks}, boxes.options().dtype(at::kLong)); dim3 blocks(col_blocks_alloc, col_blocks_alloc); dim3 threads(threadsPerBlock); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); nms_cuda<<>>( boxes_num, iou_threshold, offset, boxes_sorted.data_ptr(), (unsigned long long*)mask.data_ptr()); at::Tensor mask_cpu = mask.to(at::kCPU); unsigned long long* mask_host = (unsigned long long*)mask_cpu.data_ptr(); std::vector remv(col_blocks); memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); at::Tensor keep_t = at::zeros({boxes_num}, boxes.options().dtype(at::kBool).device(at::kCPU)); bool* keep = keep_t.data_ptr(); for (int i = 0; i < boxes_num; i++) { int nblock = i / threadsPerBlock; int inblock = i % threadsPerBlock; if (!(remv[nblock] & (1ULL << inblock))) { keep[i] = true; // set every overlap box with bit 1 in remv unsigned long long* p = mask_host + i * col_blocks; for (int j = nblock; j < col_blocks; j++) { remv[j] |= p[j]; } } } AT_CUDA_CHECK(cudaGetLastError()); return order_t.masked_select(keep_t.to(at::kCUDA)); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/nms_rotated_cuda.cu ================================================ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved // modified from // https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu #include "nms_rotated_cuda.cuh" #include "pytorch_cuda_helper.hpp" Tensor nms_rotated_cuda(const Tensor dets, const Tensor scores, const Tensor order_t, const Tensor dets_sorted, float iou_threshold, const int multi_label) { // using scalar_t = float; AT_ASSERTM(dets.is_cuda(), "dets must be a CUDA tensor"); AT_ASSERTM(scores.is_cuda(), "scores must be a CUDA tensor"); at::cuda::CUDAGuard device_guard(dets.device()); int dets_num = dets.size(0); const int col_blocks = at::cuda::ATenCeilDiv(dets_num, threadsPerBlock); Tensor mask = at::empty({dets_num * col_blocks}, dets.options().dtype(at::kLong)); dim3 blocks(col_blocks, col_blocks); dim3 threads(threadsPerBlock); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( dets_sorted.scalar_type(), "nms_rotated_kernel_cuda", [&] { nms_rotated_cuda_kernel<<>>( dets_num, iou_threshold, dets_sorted.data_ptr(), (unsigned long long*)mask.data_ptr(), multi_label); }); Tensor mask_cpu = mask.to(at::kCPU); unsigned long long* mask_host = (unsigned long long*)mask_cpu.data_ptr(); std::vector remv(col_blocks); memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); Tensor keep = at::empty({dets_num}, dets.options().dtype(at::kLong).device(at::kCPU)); int64_t* keep_out = keep.data_ptr(); int num_to_keep = 0; for (int i = 0; i < dets_num; i++) { int nblock = i / threadsPerBlock; int inblock = i % threadsPerBlock; if (!(remv[nblock] & (1ULL << inblock))) { keep_out[num_to_keep++] = i; unsigned long long* p = mask_host + i * col_blocks; for (int j = nblock; j < col_blocks; j++) { remv[j] |= p[j]; } } } AT_CUDA_CHECK(cudaGetLastError()); return order_t.index( {keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep) .to(order_t.device(), keep.scalar_type())}); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/points_in_boxes_cuda.cu ================================================ // Modified from // https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu // Written by Shaoshuai Shi // All Rights Reserved 2019. #include #include "points_in_boxes_cuda_kernel.cuh" #include "pytorch_cuda_helper.hpp" void PointsInBoxesPartForwardCUDAKernelLauncher(int batch_size, int boxes_num, int pts_num, const Tensor boxes, const Tensor pts, Tensor box_idx_of_points) { // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR // coordinate, z is // the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x, // y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default // -1 at::cuda::CUDAGuard device_guard(boxes.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 blocks(GET_BLOCKS(pts_num, THREADS_PER_BLOCK), batch_size); dim3 threads(THREADS_PER_BLOCK); AT_DISPATCH_FLOATING_TYPES_AND_HALF( boxes.scalar_type(), "points_in_boxes_part_forward_cuda_kernel", [&] { points_in_boxes_part_forward_cuda_kernel <<>>( batch_size, boxes_num, pts_num, boxes.data_ptr(), pts.data_ptr(), box_idx_of_points.data_ptr()); }); AT_CUDA_CHECK(cudaGetLastError()); } void PointsInBoxesAllForwardCUDAKernelLauncher(int batch_size, int boxes_num, int pts_num, const Tensor boxes, const Tensor pts, Tensor box_idx_of_points) { // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR // coordinate, z is the bottom center, each box params pts: (B, npoints, 3) // [x, y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), // default -1 at::cuda::CUDAGuard device_guard(boxes.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 blocks(GET_BLOCKS(pts_num, THREADS_PER_BLOCK), batch_size); dim3 threads(THREADS_PER_BLOCK); AT_DISPATCH_FLOATING_TYPES_AND_HALF( boxes.scalar_type(), "points_in_boxes_all_forward_cuda_kernel", [&] { points_in_boxes_all_forward_cuda_kernel <<>>( batch_size, boxes_num, pts_num, boxes.data_ptr(), pts.data_ptr(), box_idx_of_points.data_ptr()); }); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/points_in_polygons_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved // Modified from // https://github.com/ming71/CUDA/blob/master/point_justify/points_justify_kernel.cu #include #include "points_in_polygons_cuda_kernel.cuh" #include "pytorch_cuda_helper.hpp" void PointsInPolygonsForwardCUDAKernelLauncher(const at::Tensor points, const at::Tensor polygons, const int rows, const int cols, at::Tensor output) { const int output_size = rows * cols; at::cuda::CUDAGuard device_guard(points.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( points.scalar_type(), "points_in_polygons_forward_cuda_kernel", ([&] { const scalar_t *vertex1 = points.data_ptr(); const scalar_t *vertex2 = polygons.data_ptr(); scalar_t *inside_flag = output.data_ptr(); points_in_polygons_forward_cuda_kernel <<>>( output_size, vertex1, vertex2, rows, cols, inside_flag); })); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/psamask_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved // Modified from // https://github.com/hszhao/semseg/blob/master/lib/psa/src #include #include #include #include "psamask_cuda_kernel.cuh" #include "pytorch_cuda_helper.hpp" void PSAMaskForwardCUDAKernelLauncher(const int psa_type, const Tensor input, Tensor output, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask) { int nthreads = num_ * h_feature * w_feature; cudaStream_t stream = at::cuda::getCurrentCUDAStream(); if (psa_type == 0) AT_DISPATCH_FLOATING_TYPES( input.scalar_type(), "psamask_collect_forward_cuda", [&] { psamask_collect_forward_cuda<<>>( nthreads, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask, input.data_ptr(), output.data_ptr()); }); else AT_DISPATCH_FLOATING_TYPES( input.scalar_type(), "psamask_distribute_forward_cuda", [&] { psamask_distribute_forward_cuda <<>>( nthreads, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask, input.data_ptr(), output.data_ptr()); }); } void PSAMaskBackwardCUDAKernelLauncher( const int psa_type, const Tensor grad_output, Tensor grad_input, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask) { int nthreads = num_ * h_feature * w_feature; cudaStream_t stream = at::cuda::getCurrentCUDAStream(); if (psa_type == 0) AT_DISPATCH_FLOATING_TYPES( grad_input.scalar_type(), "psamask_collect_backward_cuda", [&] { psamask_collect_backward_cuda<<>>( nthreads, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask, grad_output.data_ptr(), grad_input.data_ptr()); }); else AT_DISPATCH_FLOATING_TYPES( grad_input.scalar_type(), "psamask_distribute_backward_cuda", [&] { psamask_distribute_backward_cuda <<>>( nthreads, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask, grad_output.data_ptr(), grad_input.data_ptr()); }); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/riroi_align_rotated_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cuda_helper.hpp" #include "riroi_align_rotated_cuda_kernel.cuh" void RiROIAlignRotatedForwardCUDAKernelLauncher( const at::Tensor features, const at::Tensor rois, const float spatial_scale, const int num_samples, const bool clockwise, const int channels, const int height, const int width, const int num_rois, const int pooled_height, const int pooled_width, const int num_orientations, at::Tensor output) { const int output_size = num_rois * pooled_height * pooled_width * channels * num_orientations; at::cuda::CUDAGuard device_guard(features.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( features.scalar_type(), "riroi_align_rotated_forward_cuda_kernel", ([&] { const scalar_t *bottom_data = features.data_ptr(); const scalar_t *rois_data = rois.data_ptr(); scalar_t *top_data = output.data_ptr(); riroi_align_rotated_forward_cuda_kernel <<>>( output_size, bottom_data, rois_data, scalar_t(spatial_scale), num_samples, clockwise, channels, height, width, pooled_height, pooled_width, num_orientations, top_data); })); AT_CUDA_CHECK(cudaGetLastError()); } void RiROIAlignRotatedBackwardCUDAKernelLauncher( const at::Tensor top_grad, const at::Tensor rois, const float spatial_scale, const int num_samples, const bool clockwise, const int channels, const int height, const int width, const int num_rois, const int pooled_height, const int pooled_width, const int num_orientations, at::Tensor bottom_grad) { const int output_size = num_rois * pooled_height * pooled_width * channels * num_orientations; at::cuda::CUDAGuard device_guard(top_grad.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( top_grad.scalar_type(), "riroi_align_rotated_backward_cuda_kernel", ([&] { const scalar_t *top_diff = top_grad.data_ptr(); const scalar_t *rois_data = rois.data_ptr(); scalar_t *bottom_diff = bottom_grad.data_ptr(); riroi_align_rotated_backward_cuda_kernel <<>>( output_size, top_diff, rois_data, spatial_scale, num_samples, clockwise, channels, height, width, pooled_height, pooled_width, num_orientations, bottom_diff); })); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/roi_align_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cuda_helper.hpp" #include "roi_align_cuda_kernel.cuh" void ROIAlignForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned) { int output_size = output.numel(); int channels = input.size(1); int height = input.size(2); int width = input.size(3); at::cuda::CUDAGuard device_guard(input.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( input.scalar_type(), "roi_align_forward_cuda_kernel", [&] { roi_align_forward_cuda_kernel <<>>( output_size, input.data_ptr(), rois.data_ptr(), output.data_ptr(), argmax_y.data_ptr(), argmax_x.data_ptr(), aligned_height, aligned_width, static_cast(spatial_scale), sampling_ratio, pool_mode, aligned, channels, height, width); }); AT_CUDA_CHECK(cudaGetLastError()); } void ROIAlignBackwardCUDAKernelLauncher(Tensor grad_output, Tensor rois, Tensor argmax_y, Tensor argmax_x, Tensor grad_input, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned) { int output_size = grad_output.numel(); int channels = grad_input.size(1); int height = grad_input.size(2); int width = grad_input.size(3); at::cuda::CUDAGuard device_guard(grad_output.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( grad_output.scalar_type(), "roi_align_backward_cuda_kernel", [&] { roi_align_backward_cuda_kernel <<>>( output_size, grad_output.data_ptr(), rois.data_ptr(), argmax_y.data_ptr(), argmax_x.data_ptr(), grad_input.data_ptr(), aligned_height, aligned_width, static_cast(spatial_scale), sampling_ratio, pool_mode, aligned, channels, height, width); }); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/roi_align_rotated_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cuda_helper.hpp" #include "roi_align_rotated_cuda_kernel.cuh" void ROIAlignRotatedForwardCUDAKernelLauncher( const at::Tensor features, const at::Tensor rois, const float spatial_scale, const int sample_num, const bool aligned, const bool clockwise, const int channels, const int height, const int width, const int num_rois, const int pooled_height, const int pooled_width, at::Tensor output) { const int output_size = num_rois * pooled_height * pooled_width * channels; AT_DISPATCH_FLOATING_TYPES_AND_HALF( features.scalar_type(), "ROIAlignRotatedLaucherForward", ([&] { const scalar_t *bottom_data = features.data_ptr(); const scalar_t *rois_data = rois.data_ptr(); scalar_t *top_data = output.data_ptr(); roi_align_rotated_forward_cuda_kernel <<>>( output_size, bottom_data, rois_data, scalar_t(spatial_scale), sample_num, aligned, clockwise, channels, height, width, pooled_height, pooled_width, top_data); })); AT_CUDA_CHECK(cudaGetLastError()); } void ROIAlignRotatedBackwardCUDAKernelLauncher( const at::Tensor top_grad, const at::Tensor rois, const float spatial_scale, const int sample_num, const bool aligned, const bool clockwise, const int channels, const int height, const int width, const int num_rois, const int pooled_height, const int pooled_width, at::Tensor bottom_grad) { const int output_size = num_rois * pooled_height * pooled_width * channels; AT_DISPATCH_FLOATING_TYPES_AND_HALF( top_grad.scalar_type(), "ROIAlignLaucherBackward", ([&] { const scalar_t *top_diff = top_grad.data_ptr(); const scalar_t *rois_data = rois.data_ptr(); scalar_t *bottom_diff = bottom_grad.data_ptr(); roi_align_rotated_backward_cuda_kernel <<>>( output_size, top_diff, rois_data, spatial_scale, sample_num, aligned, clockwise, channels, height, width, pooled_height, pooled_width, bottom_diff); })); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/roi_pool_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cuda_helper.hpp" #include "roi_pool_cuda_kernel.cuh" void ROIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output, Tensor argmax, int pooled_height, int pooled_width, float spatial_scale) { int output_size = output.numel(); int channels = input.size(1); int height = input.size(2); int width = input.size(3); at::cuda::CUDAGuard device_guard(input.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( input.scalar_type(), "roi_pool_forward_cuda_kernel", [&] { roi_pool_forward_cuda_kernel <<>>( output_size, input.data_ptr(), rois.data_ptr(), output.data_ptr(), argmax.data_ptr(), pooled_height, pooled_width, static_cast(spatial_scale), channels, height, width); }); AT_CUDA_CHECK(cudaGetLastError()); } void ROIPoolBackwardCUDAKernelLauncher(Tensor grad_output, Tensor rois, Tensor argmax, Tensor grad_input, int pooled_height, int pooled_width, float spatial_scale) { int output_size = grad_output.numel(); int channels = grad_input.size(1); int height = grad_input.size(2); int width = grad_input.size(3); at::cuda::CUDAGuard device_guard(grad_output.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( grad_output.scalar_type(), "roi_pool_backward_cuda_kernel", [&] { roi_pool_backward_cuda_kernel <<>>( output_size, grad_output.data_ptr(), rois.data_ptr(), argmax.data_ptr(), grad_input.data_ptr(), pooled_height, pooled_width, channels, height, width); }); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/roiaware_pool3d_cuda.cu ================================================ // Modified from // https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu // Written by Shaoshuai Shi // All Rights Reserved 2019. #include #include "pytorch_cuda_helper.hpp" #include "roiaware_pool3d_cuda_kernel.cuh" void RoiawarePool3dForwardCUDAKernelLauncher( int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int out_y, int out_z, const Tensor rois, const Tensor pts, const Tensor pts_feature, Tensor argmax, Tensor pts_idx_of_voxels, Tensor pooled_features, int pool_method) { // params rois: (N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR // coordinate params pts: (npoints, 3) [x, y, z] in LiDAR coordinate params // pts_feature: (npoints, C) params argmax: (N, out_x, out_y, out_z, C) params // pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel) params // pooled_features: (N, out_x, out_y, out_z, C) params pool_method: 0: // max_pool 1: avg_pool at::cuda::CUDAGuard device_guard(pts_feature.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); Tensor pts_mask = -at::ones({boxes_num, pts_num}, pts_feature.options().dtype(at::kInt)); dim3 blocks_mask(GET_BLOCKS(pts_num, THREADS_PER_BLOCK), boxes_num); dim3 threads(THREADS_PER_BLOCK); AT_DISPATCH_FLOATING_TYPES_AND_HALF( rois.scalar_type(), "generate_pts_mask_for_box3d", [&] { generate_pts_mask_for_box3d <<>>( boxes_num, pts_num, out_x, out_y, out_z, rois.data_ptr(), pts.data_ptr(), pts_mask.data_ptr()); }); AT_CUDA_CHECK(cudaGetLastError()); // TODO: Merge the collect and pool functions, SS dim3 blocks_collect(GET_BLOCKS(boxes_num, THREADS_PER_BLOCK)); AT_DISPATCH_INTEGRAL_TYPES( pts_idx_of_voxels.scalar_type(), "collect_inside_pts_for_box3d", [&] { collect_inside_pts_for_box3d <<>>( boxes_num, pts_num, max_pts_each_voxel, out_x, out_y, out_z, pts_mask.data_ptr(), pts_idx_of_voxels.data_ptr()); }); AT_CUDA_CHECK(cudaGetLastError()); dim3 blocks_pool(GET_BLOCKS(out_x * out_y * out_z, THREADS_PER_BLOCK), channels, boxes_num); if (pool_method == 0) { AT_DISPATCH_FLOATING_TYPES_AND_HALF( pts_feature.scalar_type(), "roiaware_maxpool3d", [&] { roiaware_maxpool3d<<>>( boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z, pts_feature.data_ptr(), pts_idx_of_voxels.data_ptr(), pooled_features.data_ptr(), argmax.data_ptr()); }); } else if (pool_method == 1) { AT_DISPATCH_FLOATING_TYPES_AND_HALF( pts_feature.scalar_type(), "roiaware_avgpool3d", [&] { roiaware_avgpool3d<<>>( boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z, pts_feature.data_ptr(), pts_idx_of_voxels.data_ptr(), pooled_features.data_ptr()); }); } AT_CUDA_CHECK(cudaGetLastError()); } void RoiawarePool3dBackwardCUDAKernelLauncher( int boxes_num, int out_x, int out_y, int out_z, int channels, int max_pts_each_voxel, const Tensor pts_idx_of_voxels, const Tensor argmax, const Tensor grad_out, Tensor grad_in, int pool_method) { // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel) // params argmax: (N, out_x, out_y, out_z, C) // params grad_out: (N, out_x, out_y, out_z, C) // params grad_in: (npoints, C), return value // params pool_method: 0: max_pool, 1: avg_pool at::cuda::CUDAGuard device_guard(grad_out.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 blocks(GET_BLOCKS(out_x * out_y * out_z, THREADS_PER_BLOCK), channels, boxes_num); dim3 threads(THREADS_PER_BLOCK); if (pool_method == 0) { AT_DISPATCH_FLOATING_TYPES_AND_HALF( grad_in.scalar_type(), "roiaware_maxpool3d_backward", [&] { roiaware_maxpool3d_backward<<>>( boxes_num, channels, out_x, out_y, out_z, argmax.data_ptr(), grad_out.data_ptr(), grad_in.data_ptr()); }); } else if (pool_method == 1) { AT_DISPATCH_FLOATING_TYPES_AND_HALF( grad_in.scalar_type(), "roiaware_avgpool3d_backward", [&] { roiaware_avgpool3d_backward<<>>( boxes_num, channels, out_x, out_y, out_z, max_pts_each_voxel, pts_idx_of_voxels.data_ptr(), grad_out.data_ptr(), grad_in.data_ptr()); }); } AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/roipoint_pool3d_cuda.cu ================================================ /* Modified from https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/roipoint_pool3d/src/roipoint_pool3d_kernel.cu Point cloud feature pooling Written by Shaoshuai Shi All Rights Reserved 2018. */ #include #include #include "pytorch_cuda_helper.hpp" #include "roipoint_pool3d_cuda_kernel.cuh" void RoIPointPool3dForwardCUDAKernelLauncher( int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, const Tensor xyz, const Tensor boxes3d, const Tensor pts_feature, Tensor pooled_features, Tensor pooled_empty_flag) { Tensor pts_assign = at::empty({batch_size, pts_num, boxes_num}, boxes3d.options().dtype(at::kInt)); at::cuda::CUDAGuard device_guard(xyz.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); // blockIdx.x(col), blockIdx.y(row) dim3 blocks(GET_BLOCKS(pts_num, THREADS_PER_BLOCK), boxes_num, batch_size); dim3 threads(THREADS_PER_BLOCK); AT_DISPATCH_FLOATING_TYPES_AND_HALF( xyz.scalar_type(), "assign_pts_to_box3d", [&] { assign_pts_to_box3d<<>>( batch_size, pts_num, boxes_num, xyz.data_ptr(), boxes3d.data_ptr(), pts_assign.data_ptr()); }); Tensor pts_idx = at::empty({batch_size, boxes_num, sampled_pts_num}, boxes3d.options().dtype(at::kInt)); // blockIdx.x(col), blockIdx.y(row) dim3 blocks2(GET_BLOCKS(boxes_num, THREADS_PER_BLOCK), batch_size); get_pooled_idx<<>>( batch_size, pts_num, boxes_num, sampled_pts_num, pts_assign.data_ptr(), pts_idx.data_ptr(), pooled_empty_flag.data_ptr()); dim3 blocks_pool(GET_BLOCKS(sampled_pts_num, THREADS_PER_BLOCK), boxes_num, batch_size); AT_DISPATCH_FLOATING_TYPES_AND_HALF( xyz.scalar_type(), "roipoint_pool3d_forward", [&] { roipoint_pool3d_forward<<>>( batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, xyz.data_ptr(), pts_idx.data_ptr(), pts_feature.data_ptr(), pooled_features.data_ptr(), pooled_empty_flag.data_ptr()); }); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/rotated_feature_align_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved. // Modified from // https://github.com/SJTU-Thinklab-Det/r3det-on-mmdetection/blob/master/mmdet/ops/fr/src/feature_refine_kernel.cu #include "pytorch_cuda_helper.hpp" #include "rotated_feature_align_cuda_kernel.cuh" void RotatedFeatureAlignForwardCUDAKernelLauncher(const Tensor features, const Tensor best_bboxes, const float spatial_scale, const int points, Tensor output) { at::cuda::CUDAGuard device_guard(features.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); const int output_size = features.numel(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( features.scalar_type(), "rotated_feature_align_forward_cuda_kernel", ([&] { const scalar_t* bottom_data = features.data_ptr(); const scalar_t* bboxes_data = best_bboxes.data_ptr(); scalar_t* top_data = output.data_ptr(); rotated_feature_align_forward_kernel <<>>( output_size, points, bottom_data, bboxes_data, scalar_t(spatial_scale), features.size(1), features.size(2), features.size(3), top_data); })); AT_CUDA_CHECK(cudaGetLastError()); } void RotatedFeatureAlignBackwardCUDAKernelLauncher(const Tensor top_grad, const Tensor best_bboxes, const float spatial_scale, const int points, Tensor bottom_grad) { at::cuda::CUDAGuard device_guard(top_grad.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); const int output_size = top_grad.numel(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( top_grad.scalar_type(), "rotated_feature_align_backward_cuda_kernel", ([&] { const scalar_t* top_diff = top_grad.data_ptr(); const scalar_t* bboxes_data = best_bboxes.data_ptr(); scalar_t* bottom_diff = bottom_grad.data_ptr(); rotated_feature_align_backward_kernel <<>>( output_size, points, top_diff, bboxes_data, scalar_t(spatial_scale), top_grad.size(1), top_grad.size(2), top_grad.size(3), bottom_diff); })); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/scatter_points_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved. #include #include #include #include "pytorch_cuda_helper.hpp" #include "scatter_points_cuda_kernel.cuh" std::vector DynamicPointToVoxelForwardCUDAKernelLauncher( const at::Tensor &feats, const at::Tensor &coors, const reduce_t reduce_type) { const int num_input = feats.size(0); const int num_feats = feats.size(1); if (num_input == 0) return {feats.clone().detach(), coors.clone().detach(), coors.new_empty({0}, torch::kInt32), coors.new_empty({0}, torch::kInt32)}; at::Tensor out_coors; at::Tensor coors_map; at::Tensor reduce_count; auto coors_clean = coors.masked_fill(coors.lt(0).any(-1, true), -1); std::tie(out_coors, coors_map, reduce_count) = at::unique_dim(coors_clean, 0, true, true, true); // the first element of out_coors is always (-1,-1,-1) and should be removed out_coors = out_coors.slice(0, 1); reduce_count = reduce_count.slice(0, 1).to(torch::kInt32); coors_map = coors_map.to(torch::kInt32) - 1; auto reduced_feats = at::empty({out_coors.size(0), num_feats}, feats.options()); at::cuda::CUDAGuard device_guard(feats.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES( feats.scalar_type(), "feats_reduce_kernel", ([&] { if (reduce_type == reduce_t::MAX) reduced_feats.fill_(-std::numeric_limits::infinity()); else reduced_feats.fill_(static_cast(0)); dim3 blocks(std::min( at::cuda::ATenCeilDiv(num_input, THREADS_PER_BLOCK), maxGridDim)); dim3 threads(THREADS_PER_BLOCK); feats_reduce_kernel<<>>( feats.data_ptr(), coors_map.data_ptr(), reduced_feats.data_ptr(), num_input, num_feats, reduce_type); if (reduce_type == reduce_t::MEAN) reduced_feats /= reduce_count.unsqueeze(-1).to(reduced_feats.dtype()); })); AT_CUDA_CHECK(cudaGetLastError()); return {reduced_feats, out_coors, coors_map, reduce_count}; } void DynamicPointToVoxelBackwardCUDAKernelLauncher( at::Tensor &grad_feats, const at::Tensor &grad_reduced_feats, const at::Tensor &feats, const at::Tensor &reduced_feats, const at::Tensor &coors_map, const at::Tensor &reduce_count, const reduce_t reduce_type) { const int num_input = feats.size(0); const int num_reduced = reduced_feats.size(0); const int num_feats = feats.size(1); grad_feats.fill_(0); // copy voxel grad to points if (num_input == 0 || num_reduced == 0) return; at::cuda::CUDAGuard device_guard(feats.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); if (reduce_type == reduce_t::MEAN || reduce_type == reduce_t::SUM) { AT_DISPATCH_FLOATING_TYPES( grad_reduced_feats.scalar_type(), "add_reduce_traceback_grad_kernel", ([&] { dim3 blocks(std::min( at::cuda::ATenCeilDiv(num_input, THREADS_PER_BLOCK), maxGridDim)); dim3 threads(THREADS_PER_BLOCK); add_reduce_traceback_grad_kernel<<>>( grad_feats.data_ptr(), grad_reduced_feats.data_ptr(), coors_map.data_ptr(), reduce_count.data_ptr(), num_input, num_feats, reduce_type); })); AT_CUDA_CHECK(cudaGetLastError()); } else { auto reduce_from = at::full({num_reduced, num_feats}, num_input, coors_map.options().dtype(torch::kInt32)); AT_DISPATCH_FLOATING_TYPES( grad_reduced_feats.scalar_type(), "max_reduce_traceback_scatter_idx_kernel", ([&] { dim3 blocks(std::min( at::cuda::ATenCeilDiv(num_input, THREADS_PER_BLOCK), maxGridDim)); dim3 threads(THREADS_PER_BLOCK); max_reduce_traceback_scatter_idx_kernel<<>>( feats.data_ptr(), reduced_feats.data_ptr(), reduce_from.data_ptr(), coors_map.data_ptr(), num_input, num_feats); })); AT_CUDA_CHECK(cudaGetLastError()); AT_DISPATCH_FLOATING_TYPES( grad_reduced_feats.scalar_type(), "max_reduce_traceback_scatter_idx_kernel", ([&] { dim3 blocks( std::min(at::cuda::ATenCeilDiv(num_reduced, THREADS_PER_BLOCK), maxGridDim)); dim3 threads(THREADS_PER_BLOCK); max_reduce_scatter_grad_kernel<<>>( grad_feats.data_ptr(), grad_reduced_feats.data_ptr(), reduce_from.data_ptr(), num_reduced, num_feats); })); AT_CUDA_CHECK(cudaGetLastError()); } } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/sync_bn_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cuda_helper.hpp" #include "sync_bn_cuda_kernel.cuh" void SyncBNForwardMeanCUDAKernelLauncher(const Tensor input, Tensor mean) { int num = input.size(0); int channels = input.size(1); int spatial = input.size(2); at::cuda::CUDAGuard device_guard(input.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( input.scalar_type(), "sync_bn_forward_mean_cuda_kernel", [&] { sync_bn_forward_mean_cuda_kernel <<>>( input.data_ptr(), mean.data_ptr(), num, channels, spatial); }); AT_CUDA_CHECK(cudaGetLastError()); } void SyncBNForwardVarCUDAKernelLauncher(const Tensor input, const Tensor mean, Tensor var) { int num = input.size(0); int channels = input.size(1); int spatial = input.size(2); at::cuda::CUDAGuard device_guard(input.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( input.scalar_type(), "sync_bn_forward_mean_cuda_kernel", [&] { sync_bn_forward_var_cuda_kernel <<>>( input.data_ptr(), mean.data_ptr(), var.data_ptr(), num, channels, spatial); }); AT_CUDA_CHECK(cudaGetLastError()); } void SyncBNForwardOutputCUDAKernelLauncher( const Tensor input, const Tensor mean, const Tensor var, Tensor running_mean, Tensor running_var, const Tensor weight, const Tensor bias, Tensor norm, Tensor std, Tensor output, float eps, float momentum, int group_size) { int num = input.size(0); int channels = input.size(1); int spatial = input.size(2); at::cuda::CUDAGuard device_guard(input.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( input.scalar_type(), "sync_bn_forward_mean_cuda_kernel", [&] { sync_bn_forward_output_cuda_kernel <<>>( input.data_ptr(), mean.data_ptr(), var.data_ptr(), running_mean.data_ptr(), running_var.data_ptr(), weight.data_ptr(), bias.data_ptr(), norm.data_ptr(), std.data_ptr(), output.data_ptr(), num, channels, spatial, eps, momentum, group_size); }); AT_CUDA_CHECK(cudaGetLastError()); } void SyncBNBackwardParamCUDAKernelLauncher(const Tensor grad_output, const Tensor norm, Tensor grad_weight, Tensor grad_bias) { int num = grad_output.size(0); int channels = grad_output.size(1); int spatial = grad_output.size(2); at::cuda::CUDAGuard device_guard(grad_output.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( grad_output.scalar_type(), "sync_bn_backward_param_cuda_kernel", [&] { sync_bn_backward_param_cuda_kernel <<>>( grad_output.data_ptr(), norm.data_ptr(), grad_weight.data_ptr(), grad_bias.data_ptr(), num, channels, spatial); }); AT_CUDA_CHECK(cudaGetLastError()); } void SyncBNBackwardDataCUDAKernelLauncher(const Tensor grad_output, const Tensor weight, const Tensor grad_weight, const Tensor grad_bias, const Tensor norm, const Tensor std, Tensor grad_input) { int output_size = grad_input.numel(); int num = grad_input.size(0); int channels = grad_input.size(1); int spatial = grad_input.size(2); at::cuda::CUDAGuard device_guard(grad_input.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( grad_output.scalar_type(), "sync_bn_backward_data_cuda_kernel", [&] { sync_bn_backward_data_cuda_kernel <<>>( output_size, grad_output.data_ptr(), weight.data_ptr(), grad_weight.data_ptr(), grad_bias.data_ptr(), norm.data_ptr(), std.data_ptr(), grad_input.data_ptr(), num, channels, spatial); }); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/three_interpolate_cuda.cu ================================================ // Modified from // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate_gpu.cu #include #include #include #include "pytorch_cuda_helper.hpp" #include "three_interpolate_cuda_kernel.cuh" void ThreeInterpolateForwardCUDAKernelLauncher(int b, int c, int m, int n, const Tensor points, const Tensor idx, const Tensor weight, Tensor out) { // points: (B, C, M) // idx: (B, N, 3) // weight: (B, N, 3) // output: // out: (B, C, N) at::cuda::CUDAGuard device_guard(points.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); // blockIdx.x(col), blockIdx.y(row) dim3 blocks(GET_BLOCKS(n, THREADS_PER_BLOCK), c, b); dim3 threads(THREADS_PER_BLOCK); AT_DISPATCH_FLOATING_TYPES_AND_HALF( points.scalar_type(), "three_interpolate_forward_cuda_kernel", [&] { three_interpolate_forward_cuda_kernel <<>>( b, c, m, n, points.data_ptr(), idx.data_ptr(), weight.data_ptr(), out.data_ptr()); }); AT_CUDA_CHECK(cudaGetLastError()); } void ThreeInterpolateBackwardCUDAKernelLauncher(int b, int c, int n, int m, const Tensor grad_out, const Tensor idx, const Tensor weight, Tensor grad_points) { // grad_out: (B, C, N) // weight: (B, N, 3) // output: // grad_points: (B, C, M) at::cuda::CUDAGuard device_guard(grad_out.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); // blockIdx.x(col), blockIdx.y(row) dim3 blocks(GET_BLOCKS(n, THREADS_PER_BLOCK), c, b); dim3 threads(THREADS_PER_BLOCK); AT_DISPATCH_FLOATING_TYPES_AND_HALF( grad_out.scalar_type(), "three_interpolate_backward_cuda_kernel", [&] { three_interpolate_backward_cuda_kernel <<>>( b, c, n, m, grad_out.data_ptr(), idx.data_ptr(), weight.data_ptr(), grad_points.data_ptr()); }); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/three_nn_cuda.cu ================================================ // Modified from // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate_gpu.cu #include #include #include #include "pytorch_cuda_helper.hpp" #include "three_nn_cuda_kernel.cuh" void ThreeNNForwardCUDAKernelLauncher(int b, int n, int m, const Tensor unknown, const Tensor known, Tensor dist2, Tensor idx) { // unknown: (B, N, 3) // known: (B, M, 3) // output: // dist2: (B, N, 3) // idx: (B, N, 3) at::cuda::CUDAGuard device_guard(unknown.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); // blockIdx.x(col), blockIdx.y(row) dim3 blocks(GET_BLOCKS(n, THREADS_PER_BLOCK), b); dim3 threads(THREADS_PER_BLOCK); AT_DISPATCH_FLOATING_TYPES_AND_HALF( unknown.scalar_type(), "three_nn_forward_cuda_kernel", [&] { three_nn_forward_cuda_kernel<<>>( b, n, m, unknown.data_ptr(), known.data_ptr(), dist2.data_ptr(), idx.data_ptr()); }); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/tin_shift_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cuda_helper.hpp" #include "pytorch_device_registry.hpp" #include "tin_shift_cuda_kernel.cuh" void TINShiftForwardCUDAKernelLauncher(Tensor input, Tensor shift, Tensor output) { int output_size = output.numel(); int batch_size = input.size(0); int t_size = input.size(1); int channels = input.size(2); int hw_size = input.size(3); int group_size = shift.size(1); int group_channel = channels / group_size; int num_kernels = batch_size * hw_size * channels; at::cuda::CUDAGuard device_guard(input.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( input.scalar_type(), "tin_shift_forward_cuda_kernel", [&] { tin_shift_forward_cuda_kernel <<>>( output_size, input.data_ptr(), shift.data_ptr(), output.data_ptr(), batch_size, channels, t_size, hw_size, group_size, group_channel); }); AT_CUDA_CHECK(cudaGetLastError()); } void TINShiftBackwardCUDAKernelLauncher(Tensor grad_output, Tensor shift, Tensor grad_input) { int output_size = grad_output.numel(); int batch_size = grad_output.size(0); int t_size = grad_output.size(1); int channels = grad_output.size(2); int hw_size = grad_output.size(3); int group_size = shift.size(1); int group_channel = channels / group_size; int num_kernels = batch_size * hw_size * channels; at::cuda::CUDAGuard device_guard(grad_output.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( grad_output.scalar_type(), "tin_shift_backward_cuda_kernel", [&] { tin_shift_backward_cuda_kernel <<>>( output_size, grad_output.data_ptr(), shift.data_ptr(), grad_input.data_ptr(), batch_size, channels, t_size, hw_size, group_size, group_channel); }); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/upfirdn2d_kernel.cu ================================================ // Modified from // https://github.com/rosinality/stylegan2-pytorch/blob/master/op/upfirdn2d_kernel.cu // Copyright (c) 2019, NVIDIA Corporation. All rights reserved. // // This work is made available under the Nvidia Source Code License-NC. // To view a copy of this license, visit // https://nvlabs.github.io/stylegan2/license.html #include #include #include #include #include #include #include static __host__ __device__ __forceinline__ int floor_div(int a, int b) { int c = a / b; if (c * b > a) { c--; } return c; } struct UpFirDn2DKernelParams { int up_x; int up_y; int down_x; int down_y; int pad_x0; int pad_x1; int pad_y0; int pad_y1; int major_dim; int in_h; int in_w; int minor_dim; int kernel_h; int kernel_w; int out_h; int out_w; int loop_major; int loop_x; }; template __global__ void upfirdn2d_kernel_large(scalar_t *out, const scalar_t *input, const scalar_t *kernel, const UpFirDn2DKernelParams p) { int minor_idx = blockIdx.x * blockDim.x + threadIdx.x; int out_y = minor_idx / p.minor_dim; minor_idx -= out_y * p.minor_dim; int out_x_base = blockIdx.y * p.loop_x * blockDim.y + threadIdx.y; int major_idx_base = blockIdx.z * p.loop_major; if (out_x_base >= p.out_w || out_y >= p.out_h || major_idx_base >= p.major_dim) { return; } int mid_y = out_y * p.down_y + p.up_y - 1 - p.pad_y0; int in_y = min(max(floor_div(mid_y, p.up_y), 0), p.in_h); int h = min(max(floor_div(mid_y + p.kernel_h, p.up_y), 0), p.in_h) - in_y; int kernel_y = mid_y + p.kernel_h - (in_y + 1) * p.up_y; for (int loop_major = 0, major_idx = major_idx_base; loop_major < p.loop_major && major_idx < p.major_dim; loop_major++, major_idx++) { for (int loop_x = 0, out_x = out_x_base; loop_x < p.loop_x && out_x < p.out_w; loop_x++, out_x += blockDim.y) { int mid_x = out_x * p.down_x + p.up_x - 1 - p.pad_x0; int in_x = min(max(floor_div(mid_x, p.up_x), 0), p.in_w); int w = min(max(floor_div(mid_x + p.kernel_w, p.up_x), 0), p.in_w) - in_x; int kernel_x = mid_x + p.kernel_w - (in_x + 1) * p.up_x; const scalar_t *x_p = &input[((major_idx * p.in_h + in_y) * p.in_w + in_x) * p.minor_dim + minor_idx]; const scalar_t *k_p = &kernel[kernel_y * p.kernel_w + kernel_x]; int x_px = p.minor_dim; int k_px = -p.up_x; int x_py = p.in_w * p.minor_dim; int k_py = -p.up_y * p.kernel_w; scalar_t v = 0.0f; for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { v += static_cast(*x_p) * static_cast(*k_p); x_p += x_px; k_p += k_px; } x_p += x_py - w * x_px; k_p += k_py - w * k_px; } out[((major_idx * p.out_h + out_y) * p.out_w + out_x) * p.minor_dim + minor_idx] = v; } } } template __global__ void upfirdn2d_kernel(scalar_t *out, const scalar_t *input, const scalar_t *kernel, const UpFirDn2DKernelParams p) { const int tile_in_h = ((tile_out_h - 1) * down_y + kernel_h - 1) / up_y + 1; const int tile_in_w = ((tile_out_w - 1) * down_x + kernel_w - 1) / up_x + 1; __shared__ volatile float sk[kernel_h][kernel_w]; __shared__ volatile float sx[tile_in_h][tile_in_w]; int minor_idx = blockIdx.x; int tile_out_y = minor_idx / p.minor_dim; minor_idx -= tile_out_y * p.minor_dim; tile_out_y *= tile_out_h; int tile_out_x_base = blockIdx.y * p.loop_x * tile_out_w; int major_idx_base = blockIdx.z * p.loop_major; if (tile_out_x_base >= p.out_w | tile_out_y >= p.out_h | major_idx_base >= p.major_dim) { return; } for (int tap_idx = threadIdx.x; tap_idx < kernel_h * kernel_w; tap_idx += blockDim.x) { int ky = tap_idx / kernel_w; int kx = tap_idx - ky * kernel_w; scalar_t v = 0.0; if (kx < p.kernel_w & ky < p.kernel_h) { v = kernel[(p.kernel_h - 1 - ky) * p.kernel_w + (p.kernel_w - 1 - kx)]; } sk[ky][kx] = v; } for (int loop_major = 0, major_idx = major_idx_base; loop_major < p.loop_major & major_idx < p.major_dim; loop_major++, major_idx++) { for (int loop_x = 0, tile_out_x = tile_out_x_base; loop_x < p.loop_x & tile_out_x < p.out_w; loop_x++, tile_out_x += tile_out_w) { int tile_mid_x = tile_out_x * down_x + up_x - 1 - p.pad_x0; int tile_mid_y = tile_out_y * down_y + up_y - 1 - p.pad_y0; int tile_in_x = floor_div(tile_mid_x, up_x); int tile_in_y = floor_div(tile_mid_y, up_y); __syncthreads(); for (int in_idx = threadIdx.x; in_idx < tile_in_h * tile_in_w; in_idx += blockDim.x) { int rel_in_y = in_idx / tile_in_w; int rel_in_x = in_idx - rel_in_y * tile_in_w; int in_x = rel_in_x + tile_in_x; int in_y = rel_in_y + tile_in_y; scalar_t v = 0.0; if (in_x >= 0 & in_y >= 0 & in_x < p.in_w & in_y < p.in_h) { v = input[((major_idx * p.in_h + in_y) * p.in_w + in_x) * p.minor_dim + minor_idx]; } sx[rel_in_y][rel_in_x] = v; } __syncthreads(); for (int out_idx = threadIdx.x; out_idx < tile_out_h * tile_out_w; out_idx += blockDim.x) { int rel_out_y = out_idx / tile_out_w; int rel_out_x = out_idx - rel_out_y * tile_out_w; int out_x = rel_out_x + tile_out_x; int out_y = rel_out_y + tile_out_y; int mid_x = tile_mid_x + rel_out_x * down_x; int mid_y = tile_mid_y + rel_out_y * down_y; int in_x = floor_div(mid_x, up_x); int in_y = floor_div(mid_y, up_y); int rel_in_x = in_x - tile_in_x; int rel_in_y = in_y - tile_in_y; int kernel_x = (in_x + 1) * up_x - mid_x - 1; int kernel_y = (in_y + 1) * up_y - mid_y - 1; scalar_t v = 0.0; #pragma unroll for (int y = 0; y < kernel_h / up_y; y++) #pragma unroll for (int x = 0; x < kernel_w / up_x; x++) v += sx[rel_in_y + y][rel_in_x + x] * sk[kernel_y + y * up_y][kernel_x + x * up_x]; if (out_x < p.out_w & out_y < p.out_h) { out[((major_idx * p.out_h + out_y) * p.out_w + out_x) * p.minor_dim + minor_idx] = v; } } } } } torch::Tensor upfirdn2d_op(const torch::Tensor &input, const torch::Tensor &kernel, int up_x, int up_y, int down_x, int down_y, int pad_x0, int pad_x1, int pad_y0, int pad_y1) { int curDevice = -1; cudaGetDevice(&curDevice); cudaStream_t stream = at::cuda::getCurrentCUDAStream(curDevice); UpFirDn2DKernelParams p; auto x = input.contiguous(); auto k = kernel.contiguous(); p.major_dim = x.size(0); p.in_h = x.size(1); p.in_w = x.size(2); p.minor_dim = x.size(3); p.kernel_h = k.size(0); p.kernel_w = k.size(1); p.up_x = up_x; p.up_y = up_y; p.down_x = down_x; p.down_y = down_y; p.pad_x0 = pad_x0; p.pad_x1 = pad_x1; p.pad_y0 = pad_y0; p.pad_y1 = pad_y1; p.out_h = (p.in_h * p.up_y + p.pad_y0 + p.pad_y1 - p.kernel_h + p.down_y) / p.down_y; p.out_w = (p.in_w * p.up_x + p.pad_x0 + p.pad_x1 - p.kernel_w + p.down_x) / p.down_x; auto out = at::empty({p.major_dim, p.out_h, p.out_w, p.minor_dim}, x.options()); int mode = -1; int tile_out_h = -1; int tile_out_w = -1; if (p.up_x == 1 && p.up_y == 1 && p.down_x == 1 && p.down_y == 1 && p.kernel_h <= 4 && p.kernel_w <= 4) { mode = 1; tile_out_h = 16; tile_out_w = 64; } if (p.up_x == 1 && p.up_y == 1 && p.down_x == 1 && p.down_y == 1 && p.kernel_h <= 3 && p.kernel_w <= 3) { mode = 2; tile_out_h = 16; tile_out_w = 64; } if (p.up_x == 2 && p.up_y == 2 && p.down_x == 1 && p.down_y == 1 && p.kernel_h <= 4 && p.kernel_w <= 4) { mode = 3; tile_out_h = 16; tile_out_w = 64; } if (p.up_x == 2 && p.up_y == 2 && p.down_x == 1 && p.down_y == 1 && p.kernel_h <= 2 && p.kernel_w <= 2) { mode = 4; tile_out_h = 16; tile_out_w = 64; } if (p.up_x == 1 && p.up_y == 1 && p.down_x == 2 && p.down_y == 2 && p.kernel_h <= 4 && p.kernel_w <= 4) { mode = 5; tile_out_h = 8; tile_out_w = 32; } if (p.up_x == 1 && p.up_y == 1 && p.down_x == 2 && p.down_y == 2 && p.kernel_h <= 2 && p.kernel_w <= 2) { mode = 6; tile_out_h = 8; tile_out_w = 32; } dim3 block_size; dim3 grid_size; if (tile_out_h > 0 && tile_out_w > 0) { p.loop_major = (p.major_dim - 1) / 16384 + 1; p.loop_x = 1; block_size = dim3(32 * 8, 1, 1); grid_size = dim3(((p.out_h - 1) / tile_out_h + 1) * p.minor_dim, (p.out_w - 1) / (p.loop_x * tile_out_w) + 1, (p.major_dim - 1) / p.loop_major + 1); } else { p.loop_major = (p.major_dim - 1) / 16384 + 1; p.loop_x = 4; block_size = dim3(4, 32, 1); grid_size = dim3((p.out_h * p.minor_dim - 1) / block_size.x + 1, (p.out_w - 1) / (p.loop_x * block_size.y) + 1, (p.major_dim - 1) / p.loop_major + 1); } AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "upfirdn2d_cuda", [&] { switch (mode) { case 1: upfirdn2d_kernel <<>>(out.data_ptr(), x.data_ptr(), k.data_ptr(), p); break; case 2: upfirdn2d_kernel <<>>(out.data_ptr(), x.data_ptr(), k.data_ptr(), p); break; case 3: upfirdn2d_kernel <<>>(out.data_ptr(), x.data_ptr(), k.data_ptr(), p); break; case 4: upfirdn2d_kernel <<>>(out.data_ptr(), x.data_ptr(), k.data_ptr(), p); break; case 5: upfirdn2d_kernel <<>>(out.data_ptr(), x.data_ptr(), k.data_ptr(), p); break; case 6: upfirdn2d_kernel <<>>(out.data_ptr(), x.data_ptr(), k.data_ptr(), p); break; default: upfirdn2d_kernel_large<<>>( out.data_ptr(), x.data_ptr(), k.data_ptr(), p); } }); return out; } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/voxelization_cuda.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved. #include #include #include "pytorch_cuda_helper.hpp" #include "voxelization_cuda_kernel.cuh" int HardVoxelizeForwardCUDAKernelLauncher( const at::Tensor &points, at::Tensor &voxels, at::Tensor &coors, at::Tensor &num_points_per_voxel, const std::vector voxel_size, const std::vector coors_range, const int max_points, const int max_voxels, const int NDim = 3) { // current version tooks about 0.04s for one frame on cpu // check device at::cuda::CUDAGuard device_guard(points.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); const int num_points = points.size(0); const int num_features = points.size(1); const float voxel_x = voxel_size[0]; const float voxel_y = voxel_size[1]; const float voxel_z = voxel_size[2]; const float coors_x_min = coors_range[0]; const float coors_y_min = coors_range[1]; const float coors_z_min = coors_range[2]; const float coors_x_max = coors_range[3]; const float coors_y_max = coors_range[4]; const float coors_z_max = coors_range[5]; const int grid_x = round((coors_x_max - coors_x_min) / voxel_x); const int grid_y = round((coors_y_max - coors_y_min) / voxel_y); const int grid_z = round((coors_z_max - coors_z_min) / voxel_z); // map points to voxel coors at::Tensor temp_coors = at::zeros({num_points, NDim}, points.options().dtype(at::kInt)); dim3 grid(std::min(at::cuda::ATenCeilDiv(num_points, 512), 4096)); dim3 block(512); // 1. link point to corresponding voxel coors AT_DISPATCH_ALL_TYPES( points.scalar_type(), "hard_voxelize_kernel", ([&] { dynamic_voxelize_kernel<<>>( points.contiguous().data_ptr(), temp_coors.contiguous().data_ptr(), voxel_x, voxel_y, voxel_z, coors_x_min, coors_y_min, coors_z_min, coors_x_max, coors_y_max, coors_z_max, grid_x, grid_y, grid_z, num_points, num_features, NDim); })); AT_CUDA_CHECK(cudaGetLastError()); // 2. map point to the idx of the corresponding voxel, find duplicate coor // create some temporary variables auto point_to_pointidx = -at::ones( { num_points, }, points.options().dtype(at::kInt)); auto point_to_voxelidx = -at::ones( { num_points, }, points.options().dtype(at::kInt)); dim3 map_grid(std::min(at::cuda::ATenCeilDiv(num_points, 512), 4096)); dim3 map_block(512); AT_DISPATCH_ALL_TYPES( temp_coors.scalar_type(), "determin_duplicate", ([&] { point_to_voxelidx_kernel<<>>( temp_coors.contiguous().data_ptr(), point_to_voxelidx.contiguous().data_ptr(), point_to_pointidx.contiguous().data_ptr(), max_points, max_voxels, num_points, NDim); })); AT_CUDA_CHECK(cudaGetLastError()); // 3. determine voxel num and voxel's coor index // make the logic in the CUDA device could accelerate about 10 times auto coor_to_voxelidx = -at::ones( { num_points, }, points.options().dtype(at::kInt)); auto voxel_num = at::zeros( { 1, }, points.options().dtype(at::kInt)); // must be zero from the beginning AT_DISPATCH_ALL_TYPES(temp_coors.scalar_type(), "determin_duplicate", ([&] { determin_voxel_num<<<1, 1, 0, stream>>>( num_points_per_voxel.contiguous().data_ptr(), point_to_voxelidx.contiguous().data_ptr(), point_to_pointidx.contiguous().data_ptr(), coor_to_voxelidx.contiguous().data_ptr(), voxel_num.contiguous().data_ptr(), max_points, max_voxels, num_points); })); AT_CUDA_CHECK(cudaGetLastError()); // 4. copy point features to voxels // Step 4 & 5 could be parallel auto pts_output_size = num_points * num_features; dim3 cp_grid(std::min(at::cuda::ATenCeilDiv(pts_output_size, 512), 4096)); dim3 cp_block(512); AT_DISPATCH_ALL_TYPES( points.scalar_type(), "assign_point_to_voxel", ([&] { assign_point_to_voxel<<>>( pts_output_size, points.contiguous().data_ptr(), point_to_voxelidx.contiguous().data_ptr(), coor_to_voxelidx.contiguous().data_ptr(), voxels.contiguous().data_ptr(), max_points, num_features, num_points, NDim); })); // cudaDeviceSynchronize(); // AT_CUDA_CHECK(cudaGetLastError()); // 5. copy coors of each voxels auto coors_output_size = num_points * NDim; dim3 coors_cp_grid( std::min(at::cuda::ATenCeilDiv(coors_output_size, 512), 4096)); dim3 coors_cp_block(512); AT_DISPATCH_ALL_TYPES( points.scalar_type(), "assign_point_to_voxel", ([&] { assign_voxel_coors <<>>( coors_output_size, temp_coors.contiguous().data_ptr(), point_to_voxelidx.contiguous().data_ptr(), coor_to_voxelidx.contiguous().data_ptr(), coors.contiguous().data_ptr(), num_points, NDim); })); AT_CUDA_CHECK(cudaGetLastError()); auto voxel_num_cpu = voxel_num.to(at::kCPU); int voxel_num_int = voxel_num_cpu.data_ptr()[0]; return voxel_num_int; } void DynamicVoxelizeForwardCUDAKernelLauncher( const at::Tensor &points, at::Tensor &coors, const std::vector voxel_size, const std::vector coors_range, const int NDim = 3) { // current version tooks about 0.04s for one frame on cpu // check device at::cuda::CUDAGuard device_guard(points.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); const int num_points = points.size(0); const int num_features = points.size(1); const float voxel_x = voxel_size[0]; const float voxel_y = voxel_size[1]; const float voxel_z = voxel_size[2]; const float coors_x_min = coors_range[0]; const float coors_y_min = coors_range[1]; const float coors_z_min = coors_range[2]; const float coors_x_max = coors_range[3]; const float coors_y_max = coors_range[4]; const float coors_z_max = coors_range[5]; const int grid_x = round((coors_x_max - coors_x_min) / voxel_x); const int grid_y = round((coors_y_max - coors_y_min) / voxel_y); const int grid_z = round((coors_z_max - coors_z_min) / voxel_z); const int col_blocks = at::cuda::ATenCeilDiv(num_points, THREADS_PER_BLOCK); dim3 blocks(col_blocks); dim3 threads(THREADS_PER_BLOCK); AT_DISPATCH_ALL_TYPES(points.scalar_type(), "dynamic_voxelize_kernel", [&] { dynamic_voxelize_kernel<<>>( points.contiguous().data_ptr(), coors.contiguous().data_ptr(), voxel_x, voxel_y, voxel_z, coors_x_min, coors_y_min, coors_z_min, coors_x_max, coors_y_max, coors_z_max, grid_x, grid_y, grid_z, num_points, num_features, NDim); }); AT_CUDA_CHECK(cudaGetLastError()); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/deform_conv.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void deformable_im2col_impl(Tensor data_im, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor data_col) { DISPATCH_DEVICE_IMPL(deformable_im2col_impl, data_im, data_offset, channels, height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, parallel_imgs, deformable_group, data_col); } void deformable_col2im_impl(Tensor data_col, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor grad_im) { DISPATCH_DEVICE_IMPL(deformable_col2im_impl, data_col, data_offset, channels, height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, parallel_imgs, deformable_group, grad_im); } void deformable_col2im_coord_impl( Tensor data_col, Tensor data_im, Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, Tensor grad_offset) { DISPATCH_DEVICE_IMPL(deformable_col2im_coord_impl, data_col, data_im, data_offset, channels, height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, parallel_imgs, deformable_group, grad_offset); } void deform_conv_shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput, at::Tensor weight, int kH, int kW, int dH, int dW, int padH, int padW, int dilationH, int dilationW, int group, int deformable_group) { TORCH_CHECK( weight.ndimension() == 4, "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, but got: %s", weight.ndimension()); TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); TORCH_CHECK(kW > 0 && kH > 0, "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); TORCH_CHECK((weight.size(2) == kH && weight.size(3) == kW), "kernel size should be consistent with weight, ", "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", kH, kW, weight.size(2), weight.size(3)); TORCH_CHECK(dW > 0 && dH > 0, "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); TORCH_CHECK( dilationW > 0 && dilationH > 0, "dilation should be greater than 0, but got dilationH: %d dilationW: %d", dilationH, dilationW); int ndim = input.ndimension(); int dimf = 0; int dimh = 1; int dimw = 2; if (ndim == 4) { dimf++; dimh++; dimw++; } TORCH_CHECK(ndim == 3 || ndim == 4, "3D or 4D input tensor expected but got: %s", ndim); long nInputPlane = weight.size(1) * group; long inputHeight = input.size(dimh); long inputWidth = input.size(dimw); long nOutputPlane = weight.size(0); long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; long outputWidth = (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; TORCH_CHECK(nInputPlane % deformable_group == 0, "input channels must divide deformable group size"); if (outputWidth < 1 || outputHeight < 1) AT_ERROR( "Given input size: (%ld x %ld x %ld). " "Calculated output size: (%ld x %ld x %ld). Output size is too small", nInputPlane, inputHeight, inputWidth, nOutputPlane, outputHeight, outputWidth); TORCH_CHECK(input.size(1) == nInputPlane, "invalid number of input planes, expected: %d, but got: %d", nInputPlane, input.size(1)); TORCH_CHECK((inputHeight >= kH && inputWidth >= kW), "input image is smaller than kernel"); TORCH_CHECK( (offset.size(2) == outputHeight && offset.size(3) == outputWidth), "invalid spatial size of offset, expected height: %d width: %d, but " "got height: %d width: %d", outputHeight, outputWidth, offset.size(2), offset.size(3)); TORCH_CHECK((offset.size(1) == deformable_group * 2 * kH * kW), "invalid number of channels of offset"); if (gradOutput != NULL) { TORCH_CHECK( gradOutput->size(dimf) == nOutputPlane, "invalid number of gradOutput planes, expected: %d, but got: %d", nOutputPlane, gradOutput->size(dimf)); TORCH_CHECK( (gradOutput->size(dimh) == outputHeight && gradOutput->size(dimw) == outputWidth), "invalid size of gradOutput, expected height: %d width: %d , but " "got height: %d width: %d", outputHeight, outputWidth, gradOutput->size(dimh), gradOutput->size(dimw)); } } void deform_conv_forward(Tensor input, Tensor weight, Tensor offset, Tensor output, Tensor columns, Tensor ones, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, int group, int deformable_group, int im2col_step) { if (input.device().is_cuda()) { #ifdef MMCV_WITH_CUDA CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(offset); CHECK_CUDA_INPUT(weight); CHECK_CUDA_INPUT(output); CHECK_CUDA_INPUT(columns); CHECK_CUDA_INPUT(ones); #else AT_ERROR("DeformConv is not compiled with GPU support"); #endif } else { CHECK_CPU_INPUT(input); CHECK_CPU_INPUT(offset); CHECK_CPU_INPUT(weight); CHECK_CPU_INPUT(output); CHECK_CPU_INPUT(columns); CHECK_CPU_INPUT(ones); } deform_conv_shape_check(input, offset, NULL, weight, kH, kW, dH, dW, padH, padW, dilationH, dilationW, group, deformable_group); at::DeviceGuard guard(input.device()); int batch = 1; if (input.ndimension() == 3) { // Force batch batch = 0; input.unsqueeze_(0); offset.unsqueeze_(0); } // todo: assert batchsize dividable by im2col_step long batchSize = input.size(0); long nInputPlane = input.size(1); long inputHeight = input.size(2); long inputWidth = input.size(3); long nOutputPlane = weight.size(0); long outputWidth = (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane, outputHeight, outputWidth}); columns = at::zeros( {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, input.options()); if (ones.ndimension() != 2 || ones.size(0) * ones.size(1) < outputHeight * outputWidth) { ones = at::ones({outputHeight, outputWidth}, input.options()); } input = input.view({batchSize / im2col_step, im2col_step, nInputPlane, inputHeight, inputWidth}); offset = offset.view({batchSize / im2col_step, im2col_step, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); Tensor output_buffer = at::zeros({batchSize / im2col_step, nOutputPlane, im2col_step * outputHeight, outputWidth}, output.options()); output_buffer = output_buffer.view( {output_buffer.size(0), group, output_buffer.size(1) / group, output_buffer.size(2), output_buffer.size(3)}); for (int elt = 0; elt < batchSize / im2col_step; elt++) { deformable_im2col_impl(input[elt], offset[elt], nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW, dilationH, dilationW, im2col_step, deformable_group, columns); columns = columns.view({group, columns.size(0) / group, columns.size(1)}); weight = weight.view({group, weight.size(0) / group, weight.size(1), weight.size(2), weight.size(3)}); for (int g = 0; g < group; g++) { output_buffer[elt][g] = output_buffer[elt][g] .flatten(1) .addmm_(weight[g].flatten(1), columns[g]) .view_as(output_buffer[elt][g]); } columns = columns.view({columns.size(0) * columns.size(1), columns.size(2)}); weight = weight.view({weight.size(0) * weight.size(1), weight.size(2), weight.size(3), weight.size(4)}); } output_buffer = output_buffer.view( {output_buffer.size(0), output_buffer.size(1) * output_buffer.size(2), output_buffer.size(3), output_buffer.size(4)}); output_buffer = output_buffer.view({batchSize / im2col_step, nOutputPlane, im2col_step, outputHeight, outputWidth}); output_buffer.transpose_(1, 2); output.copy_(output_buffer); output = output.view({batchSize, nOutputPlane, outputHeight, outputWidth}); input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); offset = offset.view( {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); if (batch == 0) { output = output.view({nOutputPlane, outputHeight, outputWidth}); input = input.view({nInputPlane, inputHeight, inputWidth}); offset = offset.view({offset.size(1), offset.size(2), offset.size(3)}); } } void deform_conv_backward_input(Tensor input, Tensor offset, Tensor gradOutput, Tensor gradInput, Tensor gradOffset, Tensor weight, Tensor columns, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, int group, int deformable_group, int im2col_step) { if (input.device().is_cuda()) { #ifdef MMCV_WITH_CUDA CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(offset); CHECK_CUDA_INPUT(gradOutput); CHECK_CUDA_INPUT(gradInput); CHECK_CUDA_INPUT(gradOffset); CHECK_CUDA_INPUT(weight); CHECK_CUDA_INPUT(columns); #else AT_ERROR("DeformConv is not compiled with GPU support"); #endif } else { CHECK_CPU_INPUT(input); CHECK_CPU_INPUT(offset); CHECK_CPU_INPUT(gradOutput); CHECK_CPU_INPUT(gradInput); CHECK_CPU_INPUT(gradOffset); CHECK_CPU_INPUT(weight); CHECK_CPU_INPUT(columns); } deform_conv_shape_check(input, offset, &gradOutput, weight, kH, kW, dH, dW, padH, padW, dilationH, dilationW, group, deformable_group); at::DeviceGuard guard(input.device()); int batch = 1; if (input.ndimension() == 3) { // Force batch batch = 0; input = input.view({1, input.size(0), input.size(1), input.size(2)}); offset = offset.view({1, offset.size(0), offset.size(1), offset.size(2)}); gradOutput = gradOutput.view( {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)}); } long batchSize = input.size(0); long nInputPlane = input.size(1); long inputHeight = input.size(2); long inputWidth = input.size(3); long nOutputPlane = weight.size(0); long outputWidth = (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset"); gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); columns = at::zeros( {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, input.options()); // change order of grad output gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step, nOutputPlane, outputHeight, outputWidth}); gradOutput.transpose_(1, 2); gradInput = gradInput.view({batchSize / im2col_step, im2col_step, nInputPlane, inputHeight, inputWidth}); input = input.view({batchSize / im2col_step, im2col_step, nInputPlane, inputHeight, inputWidth}); gradOffset = gradOffset.view({batchSize / im2col_step, im2col_step, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); offset = offset.view({batchSize / im2col_step, im2col_step, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); for (int elt = 0; elt < batchSize / im2col_step; elt++) { // divide into groups columns = columns.view({group, columns.size(0) / group, columns.size(1)}); weight = weight.view({group, weight.size(0) / group, weight.size(1), weight.size(2), weight.size(3)}); gradOutput = gradOutput.view( {gradOutput.size(0), group, gradOutput.size(1) / group, gradOutput.size(2), gradOutput.size(3), gradOutput.size(4)}); for (int g = 0; g < group; g++) { columns[g] = columns[g].addmm_(weight[g].flatten(1).transpose(0, 1), gradOutput[elt][g].flatten(1), 0.0f, 1.0f); } columns = columns.view({columns.size(0) * columns.size(1), columns.size(2)}); gradOutput = gradOutput.view( {gradOutput.size(0), gradOutput.size(1) * gradOutput.size(2), gradOutput.size(3), gradOutput.size(4), gradOutput.size(5)}); deformable_col2im_coord_impl(columns, input[elt], offset[elt], nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW, dilationH, dilationW, im2col_step, deformable_group, gradOffset[elt]); deformable_col2im_impl(columns, offset[elt], nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW, dilationH, dilationW, im2col_step, deformable_group, gradInput[elt]); weight = weight.view({weight.size(0) * weight.size(1), weight.size(2), weight.size(3), weight.size(4)}); } gradOutput.transpose_(1, 2); gradOutput = gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth}); gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); gradOffset = gradOffset.view( {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); offset = offset.view( {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); if (batch == 0) { gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth}); input = input.view({nInputPlane, inputHeight, inputWidth}); gradInput = gradInput.view({nInputPlane, inputHeight, inputWidth}); offset = offset.view({offset.size(1), offset.size(2), offset.size(3)}); gradOffset = gradOffset.view({offset.size(1), offset.size(2), offset.size(3)}); } } void deform_conv_backward_parameters(Tensor input, Tensor offset, Tensor gradOutput, Tensor gradWeight, Tensor columns, Tensor ones, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, int group, int deformable_group, float scale, int im2col_step) { if (input.device().is_cuda()) { #ifdef MMCV_WITH_CUDA CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(offset); CHECK_CUDA_INPUT(gradOutput); CHECK_CUDA_INPUT(gradWeight); CHECK_CUDA_INPUT(columns); CHECK_CUDA_INPUT(ones); #else AT_ERROR("DeformConv is not compiled with GPU support"); #endif } else { CHECK_CPU_INPUT(input); CHECK_CPU_INPUT(offset); CHECK_CPU_INPUT(gradOutput); CHECK_CPU_INPUT(gradWeight); CHECK_CPU_INPUT(columns); CHECK_CPU_INPUT(ones); } deform_conv_shape_check(input, offset, &gradOutput, gradWeight, kH, kW, dH, dW, padH, padW, dilationH, dilationW, group, deformable_group); at::DeviceGuard guard(input.device()); int batch = 1; if (input.ndimension() == 3) { // Force batch batch = 0; input = input.view( at::IntList({1, input.size(0), input.size(1), input.size(2)})); gradOutput = gradOutput.view( {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)}); } long batchSize = input.size(0); long nInputPlane = input.size(1); long inputHeight = input.size(2); long inputWidth = input.size(3); long nOutputPlane = gradWeight.size(0); long outputWidth = (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); columns = at::zeros( {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, input.options()); gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step, nOutputPlane, outputHeight, outputWidth}); gradOutput.transpose_(1, 2); Tensor gradOutputBuffer = at::zeros_like(gradOutput); gradOutputBuffer = gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane, im2col_step, outputHeight, outputWidth}); gradOutputBuffer = gradOutputBuffer.contiguous(); gradOutputBuffer.copy_(gradOutput); gradOutputBuffer = gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane, im2col_step * outputHeight, outputWidth}); gradOutput.transpose_(1, 2); gradOutput = gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth}); input = input.view({batchSize / im2col_step, im2col_step, nInputPlane, inputHeight, inputWidth}); offset = offset.view({batchSize / im2col_step, im2col_step, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); for (int elt = 0; elt < batchSize / im2col_step; elt++) { deformable_im2col_impl(input[elt], offset[elt], nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW, dilationH, dilationW, im2col_step, deformable_group, columns); // divide into group gradOutputBuffer = gradOutputBuffer.view( {gradOutputBuffer.size(0), group, gradOutputBuffer.size(1) / group, gradOutputBuffer.size(2), gradOutputBuffer.size(3)}); columns = columns.view({group, columns.size(0) / group, columns.size(1)}); gradWeight = gradWeight.view({group, gradWeight.size(0) / group, gradWeight.size(1), gradWeight.size(2), gradWeight.size(3)}); for (int g = 0; g < group; g++) { gradWeight[g] = gradWeight[g] .flatten(1) .addmm_(gradOutputBuffer[elt][g].flatten(1), columns[g].transpose(1, 0), 1.0, scale) .view_as(gradWeight[g]); } gradOutputBuffer = gradOutputBuffer.view( {gradOutputBuffer.size(0), gradOutputBuffer.size(1) * gradOutputBuffer.size(2), gradOutputBuffer.size(3), gradOutputBuffer.size(4)}); columns = columns.view({columns.size(0) * columns.size(1), columns.size(2)}); gradWeight = gradWeight.view({gradWeight.size(0) * gradWeight.size(1), gradWeight.size(2), gradWeight.size(3), gradWeight.size(4)}); } input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); offset = offset.view( {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); if (batch == 0) { gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth}); input = input.view({nInputPlane, inputHeight, inputWidth}); } } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/deform_roi_pool.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void deform_roi_pool_forward_impl(Tensor input, Tensor rois, Tensor offset, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma) { DISPATCH_DEVICE_IMPL(deform_roi_pool_forward_impl, input, rois, offset, output, pooled_height, pooled_width, spatial_scale, sampling_ratio, gamma); } void deform_roi_pool_backward_impl(Tensor grad_output, Tensor input, Tensor rois, Tensor offset, Tensor grad_input, Tensor grad_offset, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma) { DISPATCH_DEVICE_IMPL(deform_roi_pool_backward_impl, grad_output, input, rois, offset, grad_input, grad_offset, pooled_height, pooled_width, spatial_scale, sampling_ratio, gamma); } void deform_roi_pool_forward(Tensor input, Tensor rois, Tensor offset, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma) { deform_roi_pool_forward_impl(input, rois, offset, output, pooled_height, pooled_width, spatial_scale, sampling_ratio, gamma); } void deform_roi_pool_backward(Tensor grad_output, Tensor input, Tensor rois, Tensor offset, Tensor grad_input, Tensor grad_offset, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma) { deform_roi_pool_backward_impl(grad_output, input, rois, offset, grad_input, grad_offset, pooled_height, pooled_width, spatial_scale, sampling_ratio, gamma); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/focal_loss.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void sigmoid_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight, Tensor output, float gamma, float alpha) { DISPATCH_DEVICE_IMPL(sigmoid_focal_loss_forward_impl, input, target, weight, output, gamma, alpha); } void sigmoid_focal_loss_backward_impl(Tensor input, Tensor target, Tensor weight, Tensor grad_input, float gamma, float alpha) { DISPATCH_DEVICE_IMPL(sigmoid_focal_loss_backward_impl, input, target, weight, grad_input, gamma, alpha); } void softmax_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight, Tensor output, float gamma, float alpha) { DISPATCH_DEVICE_IMPL(softmax_focal_loss_forward_impl, input, target, weight, output, gamma, alpha); } void softmax_focal_loss_backward_impl(Tensor input, Tensor target, Tensor weight, Tensor buff, Tensor grad_input, float gamma, float alpha) { DISPATCH_DEVICE_IMPL(softmax_focal_loss_backward_impl, input, target, weight, buff, grad_input, gamma, alpha); } void sigmoid_focal_loss_forward(Tensor input, Tensor target, Tensor weight, Tensor output, float gamma, float alpha) { sigmoid_focal_loss_forward_impl(input, target, weight, output, gamma, alpha); } void sigmoid_focal_loss_backward(Tensor input, Tensor target, Tensor weight, Tensor grad_input, float gamma, float alpha) { sigmoid_focal_loss_backward_impl(input, target, weight, grad_input, gamma, alpha); } void softmax_focal_loss_forward(Tensor input, Tensor target, Tensor weight, Tensor output, float gamma, float alpha) { softmax_focal_loss_forward_impl(input, target, weight, output, gamma, alpha); } void softmax_focal_loss_backward(Tensor input, Tensor target, Tensor weight, Tensor buff, Tensor grad_input, float gamma, float alpha) { softmax_focal_loss_backward_impl(input, target, weight, buff, grad_input, gamma, alpha); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/furthest_point_sample.cpp ================================================ // Modified from // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/sampling.cpp #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void furthest_point_sampling_forward_impl(Tensor points_tensor, Tensor temp_tensor, Tensor idx_tensor, int b, int n, int m) { DISPATCH_DEVICE_IMPL(furthest_point_sampling_forward_impl, points_tensor, temp_tensor, idx_tensor, b, n, m); } void furthest_point_sampling_with_dist_forward_impl(Tensor points_tensor, Tensor temp_tensor, Tensor idx_tensor, int b, int n, int m) { DISPATCH_DEVICE_IMPL(furthest_point_sampling_with_dist_forward_impl, points_tensor, temp_tensor, idx_tensor, b, n, m); } void furthest_point_sampling_forward(Tensor points_tensor, Tensor temp_tensor, Tensor idx_tensor, int b, int n, int m) { furthest_point_sampling_forward_impl(points_tensor, temp_tensor, idx_tensor, b, n, m); } void furthest_point_sampling_with_dist_forward(Tensor points_tensor, Tensor temp_tensor, Tensor idx_tensor, int b, int n, int m) { furthest_point_sampling_with_dist_forward_impl(points_tensor, temp_tensor, idx_tensor, b, n, m); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/fused_bias_leakyrelu.cpp ================================================ // Modified from // https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_bias_act.cpp /* Copyright (c) 2021, NVIDIA Corporation. All rights reserved. NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator Augmentation (ADA) ======================================================================= 1. Definitions "Licensor" means any person or entity that distributes its Work. "Software" means the original work of authorship made available under this License. "Work" means the Software and any additions to or derivative works of the Software that are made available under this License. The terms "reproduce," "reproduction," "derivative works," and "distribution" have the meaning as provided under U.S. copyright law; provided, however, that for the purposes of this License, derivative works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work. Works, including the Software, are "made available" under this License by including in or with the Work either (a) a copyright notice referencing the applicability of this License to the Work, or (b) a copy of this License. 2. License Grants 2.1 Copyright Grant. Subject to the terms and conditions of this License, each Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free, copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense and distribute its Work and any resulting derivative works in any form. 3. Limitations 3.1 Redistribution. You may reproduce or distribute the Work only if (a) you do so under this License, (b) you include a complete copy of this License with your distribution, and (c) you retain without modification any copyright, patent, trademark, or attribution notices that are present in the Work. 3.2 Derivative Works. You may specify that additional or different terms apply to the use, reproduction, and distribution of your derivative works of the Work ("Your Terms") only if (a) Your Terms provide that the use limitation in Section 3.3 applies to your derivative works, and (b) you identify the specific derivative works that are subject to Your Terms. Notwithstanding Your Terms, this License (including the redistribution requirements in Section 3.1) will continue to apply to the Work itself. 3.3 Use Limitation. The Work and any derivative works thereof only may be used or intended for use non-commercially. Notwithstanding the foregoing, NVIDIA and its affiliates may use the Work and any derivative works commercially. As used herein, "non-commercially" means for research or evaluation purposes only. 3.4 Patent Claims. If you bring or threaten to bring a patent claim against any Licensor (including any claim, cross-claim or counterclaim in a lawsuit) to enforce any patents that you allege are infringed by any Work, then your rights under this License from such Licensor (including the grant in Section 2.1) will terminate immediately. 3.5 Trademarks. This License does not grant any rights to use any Licensor’s or its affiliates’ names, logos, or trademarks, except as necessary to reproduce the notices described in this License. 3.6 Termination. If you violate any term of this License, then your rights under this License (including the grant in Section 2.1) will terminate immediately. 4. Disclaimer of Warranty. THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE. 5. Limitation of Liability. EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. ======================================================================= */ #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" torch::Tensor fused_bias_leakyrelu_op_impl(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer, int act, int grad, float alpha, float scale) { return DISPATCH_DEVICE_IMPL(fused_bias_leakyrelu_op_impl, input, bias, refer, act, grad, alpha, scale); } torch::Tensor fused_bias_leakyrelu(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer, int act, int grad, float alpha, float scale) { return fused_bias_leakyrelu_op_impl(input, bias, refer, act, grad, alpha, scale); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/gather_points.cpp ================================================ #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void gather_points_forward_impl(int b, int c, int n, int npoints, const Tensor points, const Tensor idx, Tensor out) { DISPATCH_DEVICE_IMPL(gather_points_forward_impl, b, c, n, npoints, points, idx, out); } void gather_points_backward_impl(int b, int c, int n, int npoints, const Tensor grad_out, const Tensor idx, Tensor grad_points) { DISPATCH_DEVICE_IMPL(gather_points_backward_impl, b, c, n, npoints, grad_out, idx, grad_points); } void gather_points_forward(Tensor points_tensor, Tensor idx_tensor, Tensor out_tensor, int b, int c, int n, int npoints) { gather_points_forward_impl(b, c, n, npoints, points_tensor, idx_tensor, out_tensor); } void gather_points_backward(Tensor grad_out_tensor, Tensor idx_tensor, Tensor grad_points_tensor, int b, int c, int n, int npoints) { gather_points_backward_impl(b, c, n, npoints, grad_out_tensor, idx_tensor, grad_points_tensor); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/group_points.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved. // Modified from // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/group_points.cpp #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void group_points_forward_impl(int b, int c, int n, int npoints, int nsample, const Tensor points, const Tensor idx, Tensor out) { DISPATCH_DEVICE_IMPL(group_points_forward_impl, b, c, n, npoints, nsample, points, idx, out); } void group_points_backward_impl(int b, int c, int n, int npoints, int nsample, const Tensor grad_out, const Tensor idx, Tensor grad_points) { DISPATCH_DEVICE_IMPL(group_points_backward_impl, b, c, n, npoints, nsample, grad_out, idx, grad_points); } void group_points_forward(Tensor points_tensor, Tensor idx_tensor, Tensor out_tensor, int b, int c, int n, int npoints, int nsample) { DISPATCH_DEVICE_IMPL(group_points_forward_impl, b, c, n, npoints, nsample, points_tensor, idx_tensor, out_tensor); } void group_points_backward(Tensor grad_out_tensor, Tensor idx_tensor, Tensor grad_points_tensor, int b, int c, int n, int npoints, int nsample) { group_points_backward_impl(b, c, n, npoints, nsample, grad_out_tensor, idx_tensor, grad_points_tensor); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/info.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved // modified from // https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/vision.cpp #include "pytorch_cpp_helper.hpp" #ifdef MMCV_WITH_CUDA #ifndef HIP_DIFF #include int get_cudart_version() { return CUDART_VERSION; } #endif #endif std::string get_compiling_cuda_version() { #ifdef MMCV_WITH_CUDA #ifndef HIP_DIFF std::ostringstream oss; // copied from // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231 auto printCudaStyleVersion = [&](int v) { oss << (v / 1000) << "." << (v / 10 % 100); if (v % 10 != 0) { oss << "." << (v % 10); } }; printCudaStyleVersion(get_cudart_version()); return oss.str(); #else return std::string("rocm not available"); #endif #else return std::string("not available"); #endif } // similar to // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp std::string get_compiler_version() { std::ostringstream ss; #if defined(__GNUC__) #ifndef __clang__ { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; } #endif #endif #if defined(__clang_major__) { ss << "clang " << __clang_major__ << "." << __clang_minor__ << "." << __clang_patchlevel__; } #endif #if defined(_MSC_VER) { ss << "MSVC " << _MSC_FULL_VER; } #endif return ss.str(); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/iou3d.cpp ================================================ // Modified from // https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/iou3d_nms/src/iou3d_nms.cpp /* 3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others) Written by Shaoshuai Shi All Rights Reserved 2019-2020. */ #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8; void iou3d_boxes_overlap_bev_forward_impl(const int num_a, const Tensor boxes_a, const int num_b, const Tensor boxes_b, Tensor ans_overlap) { DISPATCH_DEVICE_IMPL(iou3d_boxes_overlap_bev_forward_impl, num_a, boxes_a, num_b, boxes_b, ans_overlap); } void iou3d_boxes_iou_bev_forward_impl(const int num_a, const Tensor boxes_a, const int num_b, const Tensor boxes_b, Tensor ans_iou) { DISPATCH_DEVICE_IMPL(iou3d_boxes_iou_bev_forward_impl, num_a, boxes_a, num_b, boxes_b, ans_iou); } void iou3d_nms_forward_impl(const Tensor boxes, unsigned long long *mask, int boxes_num, float nms_overlap_thresh) { DISPATCH_DEVICE_IMPL(iou3d_nms_forward_impl, boxes, mask, boxes_num, nms_overlap_thresh); } void iou3d_nms_normal_forward_impl(const Tensor boxes, unsigned long long *mask, int boxes_num, float nms_overlap_thresh) { DISPATCH_DEVICE_IMPL(iou3d_nms_normal_forward_impl, boxes, mask, boxes_num, nms_overlap_thresh); } void iou3d_boxes_overlap_bev_forward(Tensor boxes_a, Tensor boxes_b, Tensor ans_overlap) { // params boxes_a: (N, 5) [x1, y1, x2, y2, ry] // params boxes_b: (M, 5) // params ans_overlap: (N, M) int num_a = boxes_a.size(0); int num_b = boxes_b.size(0); iou3d_boxes_overlap_bev_forward_impl(num_a, boxes_a, num_b, boxes_b, ans_overlap); } void iou3d_boxes_iou_bev_forward(Tensor boxes_a, Tensor boxes_b, Tensor ans_iou) { // params boxes_a: (N, 5) [x1, y1, x2, y2, ry] // params boxes_b: (M, 5) // params ans_overlap: (N, M) int num_a = boxes_a.size(0); int num_b = boxes_b.size(0); iou3d_boxes_iou_bev_forward_impl(num_a, boxes_a, num_b, boxes_b, ans_iou); } void iou3d_nms_forward(Tensor boxes, Tensor keep, Tensor keep_num, float nms_overlap_thresh) { // params boxes: (N, 5) [x1, y1, x2, y2, ry] // params keep: (N) CHECK_CONTIGUOUS(boxes); CHECK_CONTIGUOUS(keep); int boxes_num = boxes.size(0); int64_t *keep_data = keep.data_ptr(); int64_t *keep_num_data = keep_num.data_ptr(); const int col_blocks = (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS; Tensor mask = at::empty({boxes_num, col_blocks}, boxes.options().dtype(at::kLong)); unsigned long long *mask_data = (unsigned long long *)mask.data_ptr(); iou3d_nms_forward_impl(boxes, mask_data, boxes_num, nms_overlap_thresh); at::Tensor mask_cpu = mask.to(at::kCPU); unsigned long long *mask_host = (unsigned long long *)mask_cpu.data_ptr(); std::vector remv_cpu(col_blocks); memset(&remv_cpu[0], 0, sizeof(unsigned long long) * col_blocks); int num_to_keep = 0; for (int i = 0; i < boxes_num; i++) { int nblock = i / THREADS_PER_BLOCK_NMS; int inblock = i % THREADS_PER_BLOCK_NMS; if (!(remv_cpu[nblock] & (1ULL << inblock))) { keep_data[num_to_keep++] = i; unsigned long long *p = &mask_host[0] + i * col_blocks; for (int j = nblock; j < col_blocks; j++) { remv_cpu[j] |= p[j]; } } *keep_num_data = num_to_keep; } } void iou3d_nms_normal_forward(Tensor boxes, Tensor keep, Tensor keep_num, float nms_overlap_thresh) { // params boxes: (N, 5) [x1, y1, x2, y2, ry] // params keep: (N) CHECK_CONTIGUOUS(boxes); CHECK_CONTIGUOUS(keep); int boxes_num = boxes.size(0); int64_t *keep_data = keep.data_ptr(); int64_t *keep_num_data = keep_num.data_ptr(); const int col_blocks = (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS; Tensor mask = at::empty({boxes_num, col_blocks}, boxes.options().dtype(at::kLong)); unsigned long long *mask_data = (unsigned long long *)mask.data_ptr(); iou3d_nms_normal_forward_impl(boxes, mask_data, boxes_num, nms_overlap_thresh); at::Tensor mask_cpu = mask.to(at::kCPU); unsigned long long *mask_host = (unsigned long long *)mask_cpu.data_ptr(); std::vector remv_cpu(col_blocks); memset(&remv_cpu[0], 0, sizeof(unsigned long long) * col_blocks); int num_to_keep = 0; for (int i = 0; i < boxes_num; i++) { int nblock = i / THREADS_PER_BLOCK_NMS; int inblock = i % THREADS_PER_BLOCK_NMS; if (!(remv_cpu[nblock] & (1ULL << inblock))) { keep_data[num_to_keep++] = i; unsigned long long *p = &mask_host[0] + i * col_blocks; for (int j = nblock; j < col_blocks; j++) { remv_cpu[j] |= p[j]; } } } *keep_num_data = num_to_keep; } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/knn.cpp ================================================ // Modified from // https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/pointops/src/knnquery_heap #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void knn_forward_impl(int b, int n, int m, int nsample, const Tensor xyz, const Tensor new_xyz, Tensor idx, Tensor dist2) { DISPATCH_DEVICE_IMPL(knn_forward_impl, b, n, m, nsample, xyz, new_xyz, idx, dist2); } void knn_forward(Tensor xyz_tensor, Tensor new_xyz_tensor, Tensor idx_tensor, Tensor dist2_tensor, int b, int n, int m, int nsample) { knn_forward_impl(b, n, m, nsample, xyz_tensor, new_xyz_tensor, idx_tensor, dist2_tensor); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/masked_conv2d.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void masked_im2col_forward_impl(const Tensor im, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w) { DISPATCH_DEVICE_IMPL(masked_im2col_forward_impl, im, mask_h_idx, mask_w_idx, col, kernel_h, kernel_w, pad_h, pad_w); } void masked_col2im_forward_impl(const Tensor col, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor im, int height, int width, int channels) { DISPATCH_DEVICE_IMPL(masked_col2im_forward_impl, col, mask_h_idx, mask_w_idx, im, height, width, channels); } void masked_im2col_forward(const Tensor im, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w) { masked_im2col_forward_impl(im, mask_h_idx, mask_w_idx, col, kernel_h, kernel_w, pad_h, pad_w); } void masked_col2im_forward(const Tensor col, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor im, int height, int width, int channels) { masked_col2im_forward_impl(col, mask_h_idx, mask_w_idx, im, height, width, channels); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/min_area_polygons.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void min_area_polygons_impl(const Tensor pointsets, Tensor polygons) { DISPATCH_DEVICE_IMPL(min_area_polygons_impl, pointsets, polygons); } void min_area_polygons(const Tensor pointsets, Tensor polygons) { min_area_polygons_impl(pointsets, polygons); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/modulated_deform_conv.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void modulated_deformable_im2col_impl( const Tensor data_im, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor data_col) { DISPATCH_DEVICE_IMPL(modulated_deformable_im2col_impl, data_im, data_offset, data_mask, batch_size, channels, height_im, width_im, height_col, width_col, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, deformable_group, data_col); } void modulated_deformable_col2im_impl( const Tensor data_col, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor grad_im) { DISPATCH_DEVICE_IMPL(modulated_deformable_col2im_impl, data_col, data_offset, data_mask, batch_size, channels, height_im, width_im, height_col, width_col, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, deformable_group, grad_im); } void modulated_deformable_col2im_coord_impl( const Tensor data_col, const Tensor data_im, const Tensor data_offset, const Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, Tensor grad_offset, Tensor grad_mask) { DISPATCH_DEVICE_IMPL(modulated_deformable_col2im_coord_impl, data_col, data_im, data_offset, data_mask, batch_size, channels, height_im, width_im, height_col, width_col, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, deformable_group, grad_offset, grad_mask); } void modulated_deform_conv_forward( Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset, Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const int group, const int deformable_group, const bool with_bias) { at::DeviceGuard guard(input.device()); const int batch = input.size(0); const int channels = input.size(1); const int height = input.size(2); const int width = input.size(3); const int channels_out = weight.size(0); const int channels_kernel = weight.size(1); const int kernel_h_ = weight.size(2); const int kernel_w_ = weight.size(3); if (kernel_h_ != kernel_h || kernel_w_ != kernel_w) AT_ERROR("Input shape and kernel shape won't match: (%d x %d vs %d x %d).", kernel_h_, kernel_w, kernel_h_, kernel_w_); if (channels != channels_kernel * group) AT_ERROR("Input shape and kernel channels won't match: (%d vs %d).", channels, channels_kernel * group); const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; if (ones.ndimension() != 2 || ones.size(0) * ones.size(1) < height_out * width_out) { // Resize plane and fill with ones... ones = at::ones({height_out, width_out}, input.options()); } // resize output output = output.view({batch, channels_out, height_out, width_out}).zero_(); // resize temporary columns columns = at::zeros({channels * kernel_h * kernel_w, 1 * height_out * width_out}, input.options()); output = output.view({output.size(0), group, output.size(1) / group, output.size(2), output.size(3)}); for (int b = 0; b < batch; b++) { modulated_deformable_im2col_impl( input[b], offset[b], mask[b], 1, channels, height, width, height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, deformable_group, columns); // divide into group weight = weight.view({group, weight.size(0) / group, weight.size(1), weight.size(2), weight.size(3)}); columns = columns.view({group, columns.size(0) / group, columns.size(1)}); for (int g = 0; g < group; g++) { output[b][g] = output[b][g] .flatten(1) .addmm_(weight[g].flatten(1), columns[g]) .view_as(output[b][g]); } weight = weight.view({weight.size(0) * weight.size(1), weight.size(2), weight.size(3), weight.size(4)}); columns = columns.view({columns.size(0) * columns.size(1), columns.size(2)}); } output = output.view({output.size(0), output.size(1) * output.size(2), output.size(3), output.size(4)}); if (with_bias) { output += bias.view({1, bias.size(0), 1, 1}); } } void modulated_deform_conv_backward( Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset, Tensor mask, Tensor columns, Tensor grad_input, Tensor grad_weight, Tensor grad_bias, Tensor grad_offset, Tensor grad_mask, Tensor grad_output, int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, const bool with_bias) { at::DeviceGuard guard(input.device()); const int batch = input.size(0); const int channels = input.size(1); const int height = input.size(2); const int width = input.size(3); const int channels_kernel = weight.size(1); const int kernel_h_ = weight.size(2); const int kernel_w_ = weight.size(3); if (kernel_h_ != kernel_h || kernel_w_ != kernel_w) AT_ERROR("Input shape and kernel shape won't match: (%d x %d vs %d x %d).", kernel_h_, kernel_w, kernel_h_, kernel_w_); if (channels != channels_kernel * group) AT_ERROR("Input shape and kernel channels won't match: (%d vs %d).", channels, channels_kernel * group); const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; if (ones.ndimension() != 2 || ones.size(0) * ones.size(1) < height_out * width_out) { // Resize plane and fill with ones... ones = at::ones({height_out, width_out}, input.options()); } grad_input = grad_input.view({batch, channels, height, width}); columns = at::zeros({channels * kernel_h * kernel_w, height_out * width_out}, input.options()); grad_output = grad_output.view({grad_output.size(0), group, grad_output.size(1) / group, grad_output.size(2), grad_output.size(3)}); for (int b = 0; b < batch; b++) { // divide int group columns = columns.view({group, columns.size(0) / group, columns.size(1)}); weight = weight.view({group, weight.size(0) / group, weight.size(1), weight.size(2), weight.size(3)}); for (int g = 0; g < group; g++) { columns[g].addmm_(weight[g].flatten(1).transpose(0, 1), grad_output[b][g].flatten(1), 0.0f, 1.0f); } columns = columns.view({columns.size(0) * columns.size(1), columns.size(2)}); weight = weight.view({weight.size(0) * weight.size(1), weight.size(2), weight.size(3), weight.size(4)}); // gradient w.r.t. input coordinate data modulated_deformable_col2im_coord_impl( columns, input[b], offset[b], mask[b], 1, channels, height, width, height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, deformable_group, grad_offset[b], grad_mask[b]); // gradient w.r.t. input data modulated_deformable_col2im_impl( columns, offset[b], mask[b], 1, channels, height, width, height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, deformable_group, grad_input[b]); // gradient w.r.t. weight, dWeight should accumulate across the batch and // group modulated_deformable_im2col_impl( input[b], offset[b], mask[b], 1, channels, height, width, height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, deformable_group, columns); columns = columns.view({group, columns.size(0) / group, columns.size(1)}); grad_weight = grad_weight.view({group, grad_weight.size(0) / group, grad_weight.size(1), grad_weight.size(2), grad_weight.size(3)}); if (with_bias) grad_bias = grad_bias.view({group, grad_bias.size(0) / group}); for (int g = 0; g < group; g++) { grad_weight[g] = grad_weight[g] .flatten(1) .addmm_(grad_output[b][g].flatten(1), columns[g].transpose(0, 1)) .view_as(grad_weight[g]); if (with_bias) { grad_bias[g] = grad_bias[g] .view({-1, 1}) .addmm_(grad_output[b][g].flatten(1), ones.view({-1, 1})) .view(-1); } } columns = columns.view({columns.size(0) * columns.size(1), columns.size(2)}); grad_weight = grad_weight.view({grad_weight.size(0) * grad_weight.size(1), grad_weight.size(2), grad_weight.size(3), grad_weight.size(4)}); if (with_bias) grad_bias = grad_bias.view({grad_bias.size(0) * grad_bias.size(1)}); } grad_output = grad_output.view({grad_output.size(0) * grad_output.size(1), grad_output.size(2), grad_output.size(3), grad_output.size(4)}); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/ms_deform_attn.cpp ================================================ /*! ************************************************************************************************** * Deformable DETR * Copyright (c) 2020 SenseTime. All Rights Reserved. * Licensed under the Apache License, Version 2.0 [see LICENSE for details] ************************************************************************************************** * Modified from *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 ************************************************************************************************** */ #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" Tensor ms_deform_attn_impl_forward(const Tensor &value, const Tensor &spatial_shapes, const Tensor &level_start_index, const Tensor &sampling_loc, const Tensor &attn_weight, const int im2col_step) { return DISPATCH_DEVICE_IMPL(ms_deform_attn_impl_forward, value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step); } void ms_deform_attn_impl_backward( const Tensor &value, const Tensor &spatial_shapes, const Tensor &level_start_index, const Tensor &sampling_loc, const Tensor &attn_weight, const Tensor &grad_output, Tensor &grad_value, Tensor &grad_sampling_loc, Tensor &grad_attn_weight, const int im2col_step) { DISPATCH_DEVICE_IMPL(ms_deform_attn_impl_backward, value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, grad_value, grad_sampling_loc, grad_attn_weight, im2col_step); } Tensor ms_deform_attn_forward(const Tensor &value, const Tensor &spatial_shapes, const Tensor &level_start_index, const Tensor &sampling_loc, const Tensor &attn_weight, const int im2col_step) { at::DeviceGuard guard(value.device()); return ms_deform_attn_impl_forward(value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step); } void ms_deform_attn_backward(const Tensor &value, const Tensor &spatial_shapes, const Tensor &level_start_index, const Tensor &sampling_loc, const Tensor &attn_weight, const Tensor &grad_output, Tensor &grad_value, Tensor &grad_sampling_loc, Tensor &grad_attn_weight, const int im2col_step) { at::DeviceGuard guard(value.device()); ms_deform_attn_impl_backward(value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, grad_value, grad_sampling_loc, grad_attn_weight, im2col_step); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/nms.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" Tensor nms_impl(Tensor boxes, Tensor scores, float iou_threshold, int offset) { return DISPATCH_DEVICE_IMPL(nms_impl, boxes, scores, iou_threshold, offset); } Tensor softnms_impl(Tensor boxes, Tensor scores, Tensor dets, float iou_threshold, float sigma, float min_score, int method, int offset) { return DISPATCH_DEVICE_IMPL(softnms_impl, boxes, scores, dets, iou_threshold, sigma, min_score, method, offset); } std::vector > nms_match_impl(Tensor dets, float iou_threshold) { return DISPATCH_DEVICE_IMPL(nms_match_impl, dets, iou_threshold); } Tensor nms(Tensor boxes, Tensor scores, float iou_threshold, int offset) { return nms_impl(boxes, scores, iou_threshold, offset); } Tensor softnms(Tensor boxes, Tensor scores, Tensor dets, float iou_threshold, float sigma, float min_score, int method, int offset) { return softnms_impl(boxes, scores, dets, iou_threshold, sigma, min_score, method, offset); } std::vector > nms_match(Tensor dets, float iou_threshold) { return nms_match_impl(dets, iou_threshold); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/nms_rotated.cpp ================================================ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved // modified from // https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/nms_rotated/nms_rotated.h #include "pytorch_cpp_helper.hpp" Tensor nms_rotated_cpu(const Tensor dets, const Tensor scores, const float iou_threshold); #ifdef MMCV_WITH_CUDA Tensor nms_rotated_cuda(const Tensor dets, const Tensor scores, const Tensor order, const Tensor dets_sorted, const float iou_threshold, const int multi_label); #endif // Interface for Python // inline is needed to prevent multiple function definitions when this header is // included by different cpps Tensor nms_rotated(const Tensor dets, const Tensor scores, const Tensor order, const Tensor dets_sorted, const float iou_threshold, const int multi_label) { assert(dets.device().is_cuda() == scores.device().is_cuda()); if (dets.device().is_cuda()) { #ifdef MMCV_WITH_CUDA return nms_rotated_cuda(dets, scores, order, dets_sorted, iou_threshold, multi_label); #else AT_ERROR("Not compiled with GPU support"); #endif } return nms_rotated_cpu(dets, scores, iou_threshold); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/pixel_group.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved // It is modified from https://github.com/WenmuZhou/PAN.pytorch #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" std::vector> pixel_group_impl( Tensor score, Tensor mask, Tensor embedding, Tensor kernel_label, Tensor kernel_contour, int kernel_region_num, float dis_threshold) { return DISPATCH_DEVICE_IMPL(pixel_group_impl, score, mask, embedding, kernel_label, kernel_contour, kernel_region_num, dis_threshold); } std::vector> pixel_group( Tensor score, Tensor mask, Tensor embedding, Tensor kernel_label, Tensor kernel_contour, int kernel_region_num, float distance_threshold) { score = score.contiguous(); mask = mask.contiguous(); embedding = embedding.contiguous(); kernel_label = kernel_label.contiguous(); kernel_contour = kernel_contour.contiguous(); return pixel_group_impl(score, mask, embedding, kernel_label, kernel_contour, kernel_region_num, distance_threshold); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/points_in_boxes.cpp ================================================ #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void points_in_boxes_part_forward_impl(int batch_size, int boxes_num, int pts_num, const Tensor boxes, const Tensor pts, Tensor box_idx_of_points) { DISPATCH_DEVICE_IMPL(points_in_boxes_part_forward_impl, batch_size, boxes_num, pts_num, boxes, pts, box_idx_of_points); } void points_in_boxes_all_forward_impl(int batch_size, int boxes_num, int pts_num, const Tensor boxes, const Tensor pts, Tensor box_idx_of_points) { DISPATCH_DEVICE_IMPL(points_in_boxes_all_forward_impl, batch_size, boxes_num, pts_num, boxes, pts, box_idx_of_points); } void points_in_boxes_part_forward(Tensor boxes_tensor, Tensor pts_tensor, Tensor box_idx_of_points_tensor) { // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR // coordinate, z is the bottom center, each box params pts: (B, npoints, 3) // [x, y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), // default -1 int batch_size = boxes_tensor.size(0); int boxes_num = boxes_tensor.size(1); int pts_num = pts_tensor.size(1); points_in_boxes_part_forward_impl(batch_size, boxes_num, pts_num, boxes_tensor, pts_tensor, box_idx_of_points_tensor); } void points_in_boxes_all_forward(Tensor boxes_tensor, Tensor pts_tensor, Tensor box_idx_of_points_tensor) { // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR // coordinate, z is the bottom center. params pts: (B, npoints, 3) [x, y, z] // in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default -1 int batch_size = boxes_tensor.size(0); int boxes_num = boxes_tensor.size(1); int pts_num = pts_tensor.size(1); points_in_boxes_all_forward_impl(batch_size, boxes_num, pts_num, boxes_tensor, pts_tensor, box_idx_of_points_tensor); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/points_in_polygons.cpp ================================================ #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void points_in_polygons_forward_impl(const Tensor points, const Tensor polygons, Tensor output, const int rows, const int cols) { DISPATCH_DEVICE_IMPL(points_in_polygons_forward_impl, points, polygons, output, rows, cols); } void points_in_polygons_forward(Tensor points, Tensor polygons, Tensor output) { int rows = points.size(0); int cols = polygons.size(0); points_in_polygons_forward_impl(points, polygons, output, rows, cols); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/psamask.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved // Modified from // https://github.com/hszhao/semseg/blob/master/lib/psa/src #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void psamask_forward_impl(const int psa_type, const Tensor input, Tensor output, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask) { DISPATCH_DEVICE_IMPL(psamask_forward_impl, psa_type, input, output, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask); } void psamask_backward_impl(const int psa_type, const Tensor grad_output, Tensor grad_input, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask) { DISPATCH_DEVICE_IMPL(psamask_backward_impl, psa_type, grad_output, grad_input, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask); } void psamask_forward(const Tensor input, Tensor output, const int psa_type, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask) { psamask_forward_impl(psa_type, input, output, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask); } void psamask_backward(Tensor grad_output, const Tensor grad_input, const int psa_type, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask) { psamask_backward_impl(psa_type, grad_output, grad_input, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, half_w_mask); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/pybind.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" std::string get_compiler_version(); std::string get_compiling_cuda_version(); void assign_score_withk_forward(const Tensor &points, const Tensor ¢ers, const Tensor &scores, const Tensor &knn_idx, Tensor &output, int B, int N0, int N1, int M, int K, int O, int aggregate); void assign_score_withk_backward(const Tensor &grad_out, const Tensor &points, const Tensor ¢ers, const Tensor &scores, const Tensor &knn_idx, Tensor &grad_points, Tensor &grad_centers, Tensor &grad_scores, int B, int N0, int N1, int M, int K, int O, int aggregate); void carafe_naive_forward(Tensor features, Tensor masks, Tensor output, int kernel_size, int group_size, int scale_factor); void carafe_naive_backward(Tensor top_grad, Tensor features, Tensor masks, Tensor bottom_grad, Tensor mask_grad, int kernel_size, int group_size, int scale_factor); void carafe_forward(Tensor features, Tensor masks, Tensor rfeatures, Tensor routput, Tensor rmasks, Tensor output, int kernel_size, int group_size, int scale_factor); void carafe_backward(Tensor top_grad, Tensor rfeatures, Tensor masks, Tensor rtop_grad, Tensor rbottom_grad_hs, Tensor rbottom_grad, Tensor rmask_grad, Tensor bottom_grad, Tensor mask_grad, int kernel_size, int group_size, int scale_factor); void deform_conv_forward(Tensor input, Tensor weight, Tensor offset, Tensor output, Tensor columns, Tensor ones, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, int group, int deformable_group, int im2col_step); void deform_conv_backward_input(Tensor input, Tensor offset, Tensor gradOutput, Tensor gradInput, Tensor gradOffset, Tensor weight, Tensor columns, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, int group, int deformable_group, int im2col_step); void deform_conv_backward_parameters(Tensor input, Tensor offset, Tensor gradOutput, Tensor gradWeight, Tensor columns, Tensor ones, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, int group, int deformable_group, float scale, int im2col_step); void deform_roi_pool_forward(Tensor input, Tensor rois, Tensor offset, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma); void deform_roi_pool_backward(Tensor grad_output, Tensor input, Tensor rois, Tensor offset, Tensor grad_input, Tensor grad_offset, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, float gamma); void group_points_forward(Tensor points_tensor, Tensor idx_tensor, Tensor out_tensor, int b, int c, int n, int npoints, int nsample); void group_points_backward(Tensor grad_out_tensor, Tensor idx_tensor, Tensor grad_points_tensor, int b, int c, int n, int npoints, int nsample); void roipoint_pool3d_forward(Tensor xyz, Tensor boxes3d, Tensor pts_feature, Tensor pooled_features, Tensor pooled_empty_flag); void gather_points_forward(Tensor points_tensor, Tensor idx_tensor, Tensor out_tensor, int b, int c, int n, int npoints); void gather_points_backward(Tensor grad_out_tensor, Tensor idx_tensor, Tensor grad_points_tensor, int b, int c, int n, int npoints); void sigmoid_focal_loss_forward(Tensor input, Tensor target, Tensor weight, Tensor output, float gamma, float alpha); void sigmoid_focal_loss_backward(Tensor input, Tensor target, Tensor weight, Tensor grad_input, float gamma, float alpha); void softmax_focal_loss_forward(Tensor input, Tensor target, Tensor weight, Tensor output, float gamma, float alpha); void softmax_focal_loss_backward(Tensor input, Tensor target, Tensor weight, Tensor buff, Tensor grad_input, float gamma, float alpha); void three_interpolate_forward(Tensor points_tensor, Tensor idx_tensor, Tensor weight_tensor, Tensor out_tensor, int b, int c, int m, int n); void three_interpolate_backward(Tensor grad_out_tensor, Tensor idx_tensor, Tensor weight_tensor, Tensor grad_points_tensor, int b, int c, int n, int m); void three_nn_forward(Tensor unknown_tensor, Tensor known_tensor, Tensor dist2_tensor, Tensor idx_tensor, int b, int n, int m); void bbox_overlaps(const Tensor bboxes1, const Tensor bboxes2, Tensor ious, const int mode, const bool aligned, const int offset); void knn_forward(Tensor xyz_tensor, Tensor new_xyz_tensor, Tensor idx_tensor, Tensor dist2_tensor, int b, int n, int m, int nsample); void iou3d_boxes_overlap_bev_forward(Tensor boxes_a, Tensor boxes_b, Tensor ans_overlap); void iou3d_boxes_iou_bev_forward(Tensor boxes_a, Tensor boxes_b, Tensor ans_iou); void iou3d_nms_forward(Tensor boxes, Tensor keep, Tensor keep_num, float nms_overlap_thresh); void iou3d_nms_normal_forward(Tensor boxes, Tensor keep, Tensor keep_num, float nms_overlap_thresh); void furthest_point_sampling_forward(Tensor points_tensor, Tensor temp_tensor, Tensor idx_tensor, int b, int n, int m); void furthest_point_sampling_with_dist_forward(Tensor points_tensor, Tensor temp_tensor, Tensor idx_tensor, int b, int n, int m); void masked_im2col_forward(const Tensor im, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w); void masked_col2im_forward(const Tensor col, const Tensor mask_h_idx, const Tensor mask_w_idx, Tensor im, int height, int width, int channels); void modulated_deform_conv_forward( Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset, Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const int group, const int deformable_group, const bool with_bias); void modulated_deform_conv_backward( Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset, Tensor mask, Tensor columns, Tensor grad_input, Tensor grad_weight, Tensor grad_bias, Tensor grad_offset, Tensor grad_mask, Tensor grad_output, int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, const bool with_bias); Tensor ms_deform_attn_forward(const Tensor &value, const Tensor &spatial_shapes, const Tensor &level_start_index, const Tensor &sampling_loc, const Tensor &attn_weight, const int im2col_step); void ms_deform_attn_backward(const Tensor &value, const Tensor &spatial_shapes, const Tensor &level_start_index, const Tensor &sampling_loc, const Tensor &attn_weight, const Tensor &grad_output, Tensor &grad_value, Tensor &grad_sampling_loc, Tensor &grad_attn_weight, const int im2col_step); Tensor nms(Tensor boxes, Tensor scores, float iou_threshold, int offset); Tensor softnms(Tensor boxes, Tensor scores, Tensor dets, float iou_threshold, float sigma, float min_score, int method, int offset); std::vector> nms_match(Tensor dets, float iou_threshold); std::vector> pixel_group( Tensor score, Tensor mask, Tensor embedding, Tensor kernel_label, Tensor kernel_contour, int kernel_region_num, float distance_threshold); std::vector> contour_expand(Tensor kernel_mask, Tensor internal_kernel_label, int min_kernel_area, int kernel_num); void roi_align_forward(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned); void roi_align_backward(Tensor grad_output, Tensor rois, Tensor argmax_y, Tensor argmax_x, Tensor grad_input, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned); void roi_pool_forward(Tensor input, Tensor rois, Tensor output, Tensor argmax, int pooled_height, int pooled_width, float spatial_scale); void roi_pool_backward(Tensor grad_output, Tensor rois, Tensor argmax, Tensor grad_input, int pooled_height, int pooled_width, float spatial_scale); void sync_bn_forward_mean(const Tensor input, Tensor mean); void sync_bn_forward_var(const Tensor input, const Tensor mean, Tensor var); void sync_bn_forward_output(const Tensor input, const Tensor mean, const Tensor var, const Tensor weight, const Tensor bias, Tensor running_mean, Tensor running_var, Tensor norm, Tensor std, Tensor output, float eps, float momentum, int group_size); void sync_bn_backward_param(const Tensor grad_output, const Tensor norm, Tensor grad_weight, Tensor grad_bias); void sync_bn_backward_data(const Tensor grad_output, const Tensor weight, const Tensor grad_weight, const Tensor grad_bias, const Tensor norm, const Tensor std, Tensor grad_input); void psamask_forward(const Tensor input, Tensor output, const int psa_type, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask); void psamask_backward(Tensor grad_output, const Tensor grad_input, const int psa_type, const int num_, const int h_feature, const int w_feature, const int h_mask, const int w_mask, const int half_h_mask, const int half_w_mask); void tin_shift_forward(Tensor input, Tensor shift, Tensor output); void tin_shift_backward(Tensor grad_output, Tensor shift, Tensor grad_input); void ball_query_forward(Tensor new_xyz_tensor, Tensor xyz_tensor, Tensor idx_tensor, int b, int n, int m, float min_radius, float max_radius, int nsample); Tensor bottom_pool_forward(Tensor input); Tensor bottom_pool_backward(Tensor input, Tensor grad_output); Tensor left_pool_forward(Tensor input); Tensor left_pool_backward(Tensor input, Tensor grad_output); Tensor right_pool_forward(Tensor input); Tensor right_pool_backward(Tensor input, Tensor grad_output); Tensor top_pool_forward(Tensor input); Tensor top_pool_backward(Tensor input, Tensor grad_output); void box_iou_rotated(const Tensor boxes1, const Tensor boxes2, Tensor ious, const int mode_flag, const bool aligned); Tensor nms_rotated(const Tensor dets, const Tensor scores, const Tensor order, const Tensor dets_sorted, const float iou_threshold, const int multi_label); Tensor upfirdn2d(const Tensor &input, const Tensor &kernel, int up_x, int up_y, int down_x, int down_y, int pad_x0, int pad_x1, int pad_y0, int pad_y1); Tensor fused_bias_leakyrelu(const Tensor &input, const Tensor &bias, const Tensor &refer, int act, int grad, float alpha, float scale); void roi_align_rotated_forward(Tensor input, Tensor rois, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int sample_num, bool aligned, bool clockwise); void roi_align_rotated_backward(Tensor grad_output, Tensor rois, Tensor grad_input, int pooled_height, int pooled_width, float spatial_scale, int sample_num, bool aligned, bool clockwise); std::vector dynamic_point_to_voxel_forward( const torch::Tensor &feats, const torch::Tensor &coors, const std::string &reduce_type); void dynamic_point_to_voxel_backward(torch::Tensor &grad_feats, const torch::Tensor &grad_reduced_feats, const torch::Tensor &feats, const torch::Tensor &reduced_feats, const torch::Tensor &coors_idx, const torch::Tensor &reduce_count, const std::string &reduce_type); void hard_voxelize_forward(const at::Tensor &points, const at::Tensor &voxel_size, const at::Tensor &coors_range, at::Tensor &voxels, at::Tensor &coors, at::Tensor &num_points_per_voxel, at::Tensor &voxel_num, const int max_points, const int max_voxels, const int NDim); void dynamic_voxelize_forward(const at::Tensor &points, const at::Tensor &voxel_size, const at::Tensor &coors_range, at::Tensor &coors, const int NDim); void border_align_forward(const Tensor &input, const Tensor &boxes, Tensor output, Tensor argmax_idx, const int pool_size); void border_align_backward(const Tensor &grad_output, const Tensor &boxes, const Tensor &argmax_idx, Tensor grad_input, const int pool_size); void points_in_boxes_cpu_forward(Tensor boxes_tensor, Tensor pts_tensor, Tensor pts_indices_tensor); void points_in_boxes_part_forward(Tensor boxes_tensor, Tensor pts_tensor, Tensor box_idx_of_points_tensor); void points_in_boxes_all_forward(Tensor boxes_tensor, Tensor pts_tensor, Tensor box_idx_of_points_tensor); void roiaware_pool3d_forward(Tensor rois, Tensor pts, Tensor pts_feature, Tensor argmax, Tensor pts_idx_of_voxels, Tensor pooled_features, int pool_method); void roiaware_pool3d_backward(Tensor pts_idx_of_voxels, Tensor argmax, Tensor grad_out, Tensor grad_in, int pool_method); void correlation_forward(Tensor input1, Tensor input2, Tensor output, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW); void correlation_backward(Tensor grad_output, Tensor input1, Tensor input2, Tensor grad_input1, Tensor grad_input2, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW); void rotated_feature_align_forward(const Tensor features, const Tensor best_bboxes, Tensor output, const float spatial_scale, const int points); void rotated_feature_align_backward(const Tensor top_grad, const Tensor best_bboxes, Tensor bottom_grad, const float spatial_scale, const int points); void riroi_align_rotated_forward(Tensor features, Tensor rois, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int num_samples, int num_orientations, bool clockwise); void riroi_align_rotated_backward(Tensor top_grad, Tensor rois, Tensor bottom_grad, int pooled_height, int pooled_width, float spatial_scale, int num_samples, int num_orientations, bool clockwise); void points_in_polygons_forward(Tensor points, Tensor polygons, Tensor output); void min_area_polygons(const Tensor pointsets, Tensor polygons); void active_rotated_filter_forward(const Tensor input, const Tensor indices, Tensor output); void active_rotated_filter_backward(const Tensor grad_out, const Tensor indices, Tensor grad_in); void convex_iou(const Tensor pointsets, const Tensor polygons, Tensor ious); void convex_giou(const Tensor pointsets, const Tensor polygons, Tensor output); PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("upfirdn2d", &upfirdn2d, "upfirdn2d (CUDA)", py::arg("input"), py::arg("kernel"), py::arg("up_x"), py::arg("up_y"), py::arg("down_x"), py::arg("down_y"), py::arg("pad_x0"), py::arg("pad_x1"), py::arg("pad_y0"), py::arg("pad_y1")); m.def("fused_bias_leakyrelu", &fused_bias_leakyrelu, "fused_bias_leakyrelu (CUDA)", py::arg("input"), py::arg("bias"), py::arg("empty"), py::arg("act"), py::arg("grad"), py::arg("alpha"), py::arg("scale")); m.def("gather_points_forward", &gather_points_forward, "gather_points_forward", py::arg("points_tensor"), py::arg("idx_tensor"), py::arg("out_tensor"), py::arg("b"), py::arg("c"), py::arg("n"), py::arg("npoints")); m.def("gather_points_backward", &gather_points_backward, "gather_points_backward", py::arg("grad_out_tensor"), py::arg("idx_tensor"), py::arg("grad_points_tensor"), py::arg("b"), py::arg("c"), py::arg("n"), py::arg("npoints")); m.def("get_compiler_version", &get_compiler_version, "get_compiler_version"); m.def("get_compiling_cuda_version", &get_compiling_cuda_version, "get_compiling_cuda_version"); m.def("assign_score_withk_forward", &assign_score_withk_forward, "assign_score_withk_forward", py::arg("points"), py::arg("centers"), py::arg("scores"), py::arg("knn_idx"), py::arg("output"), py::arg("B"), py::arg("N0"), py::arg("N1"), py::arg("M"), py::arg("K"), py::arg("O"), py::arg("aggregate")); m.def("assign_score_withk_backward", &assign_score_withk_backward, "assign_score_withk_backward", py::arg("grad_out"), py::arg("points"), py::arg("centers"), py::arg("scores"), py::arg("knn_idx"), py::arg("grad_points"), py::arg("grad_centers"), py::arg("grad_scores"), py::arg("B"), py::arg("N0"), py::arg("N1"), py::arg("M"), py::arg("K"), py::arg("O"), py::arg("aggregate")); m.def("knn_forward", &knn_forward, "knn_forward", py::arg("xyz_tensor"), py::arg("new_xyz_tensor"), py::arg("idx_tensor"), py::arg("dist2_tensor"), py::arg("b"), py::arg("n"), py::arg("m"), py::arg("nsample")); m.def("carafe_naive_forward", &carafe_naive_forward, "carafe_naive_forward", py::arg("features"), py::arg("masks"), py::arg("output"), py::arg("kernel_size"), py::arg("group_size"), py::arg("scale_factor")); m.def("carafe_naive_backward", &carafe_naive_backward, "carafe_naive_backward", py::arg("top_grad"), py::arg("features"), py::arg("masks"), py::arg("bottom_grad"), py::arg("mask_grad"), py::arg("kernel_size"), py::arg("group_size"), py::arg("scale_factor")); m.def("carafe_forward", &carafe_forward, "carafe_forward", py::arg("features"), py::arg("masks"), py::arg("rfeatures"), py::arg("routput"), py::arg("rmasks"), py::arg("output"), py::arg("kernel_size"), py::arg("group_size"), py::arg("scale_factor")); m.def("carafe_backward", &carafe_backward, "carafe_backward", py::arg("top_grad"), py::arg("rfeatures"), py::arg("masks"), py::arg("rtop_grad"), py::arg("rbottom_grad_hs"), py::arg("rbottom_grad"), py::arg("rmask_grad"), py::arg("bottom_grad"), py::arg("mask_grad"), py::arg("kernel_size"), py::arg("group_size"), py::arg("scale_factor")); m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward", py::arg("input"), py::arg("weight"), py::arg("offset"), py::arg("output"), py::arg("columns"), py::arg("ones"), py::arg("kW"), py::arg("kH"), py::arg("dW"), py::arg("dH"), py::arg("padH"), py::arg("padW"), py::arg("dilationW"), py::arg("dilationH"), py::arg("group"), py::arg("deformable_group"), py::arg("im2col_step")); m.def("deform_conv_backward_input", &deform_conv_backward_input, "deform_conv_backward_input", py::arg("input"), py::arg("offset"), py::arg("gradOutput"), py::arg("gradInput"), py::arg("gradOffset"), py::arg("weight"), py::arg("columns"), py::arg("kW"), py::arg("kH"), py::arg("dW"), py::arg("dH"), py::arg("padH"), py::arg("padW"), py::arg("dilationW"), py::arg("dilationH"), py::arg("group"), py::arg("deformable_group"), py::arg("im2col_step")); m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, "deform_conv_backward_parameters", py::arg("input"), py::arg("offset"), py::arg("gradOutput"), py::arg("gradWeight"), py::arg("columns"), py::arg("ones"), py::arg("kW"), py::arg("kH"), py::arg("dW"), py::arg("dH"), py::arg("padH"), py::arg("padW"), py::arg("dilationW"), py::arg("dilationH"), py::arg("group"), py::arg("deformable_group"), py::arg("scale"), py::arg("im2col_step")); m.def("deform_roi_pool_forward", &deform_roi_pool_forward, "deform roi pool forward", py::arg("input"), py::arg("rois"), py::arg("offset"), py::arg("output"), py::arg("pooled_height"), py::arg("pooled_width"), py::arg("spatial_scale"), py::arg("sampling_ratio"), py::arg("gamma")); m.def("deform_roi_pool_backward", &deform_roi_pool_backward, "deform roi pool backward", py::arg("grad_output"), py::arg("input"), py::arg("rois"), py::arg("offset"), py::arg("grad_input"), py::arg("grad_offset"), py::arg("pooled_height"), py::arg("pooled_width"), py::arg("spatial_scale"), py::arg("sampling_ratio"), py::arg("gamma")); m.def("roipoint_pool3d_forward", &roipoint_pool3d_forward, "roipoint_pool3d_forward", py::arg("xyz"), py::arg("boxes3d"), py::arg("pts_feature"), py::arg("pooled_features"), py::arg("pooled_empty_flag")); m.def("sigmoid_focal_loss_forward", &sigmoid_focal_loss_forward, "sigmoid_focal_loss_forward ", py::arg("input"), py::arg("target"), py::arg("weight"), py::arg("output"), py::arg("gamma"), py::arg("alpha")); m.def("sigmoid_focal_loss_backward", &sigmoid_focal_loss_backward, "sigmoid_focal_loss_backward", py::arg("input"), py::arg("target"), py::arg("weight"), py::arg("grad_input"), py::arg("gamma"), py::arg("alpha")); m.def("softmax_focal_loss_forward", &softmax_focal_loss_forward, "softmax_focal_loss_forward", py::arg("input"), py::arg("target"), py::arg("weight"), py::arg("output"), py::arg("gamma"), py::arg("alpha")); m.def("softmax_focal_loss_backward", &softmax_focal_loss_backward, "softmax_focal_loss_backward", py::arg("input"), py::arg("target"), py::arg("weight"), py::arg("buff"), py::arg("grad_input"), py::arg("gamma"), py::arg("alpha")); m.def("three_interpolate_forward", &three_interpolate_forward, "three_interpolate_forward", py::arg("points_tensor"), py::arg("idx_tensor"), py::arg("weight_tensor"), py::arg("out_tensor"), py::arg("b"), py::arg("c"), py::arg("m"), py::arg("n")); m.def("three_interpolate_backward", &three_interpolate_backward, "three_interpolate_backward", py::arg("grad_out_tensor"), py::arg("idx_tensor"), py::arg("weight_tensor"), py::arg("grad_points_tensor"), py::arg("b"), py::arg("c"), py::arg("n"), py::arg("m")); m.def("three_nn_forward", &three_nn_forward, "three_nn_forward", py::arg("unknown_tensor"), py::arg("known_tensor"), py::arg("dist2_tensor"), py::arg("idx_tensor"), py::arg("b"), py::arg("n"), py::arg("m")); m.def("bbox_overlaps", &bbox_overlaps, "bbox_overlaps", py::arg("bboxes1"), py::arg("bboxes2"), py::arg("ious"), py::arg("mode"), py::arg("aligned"), py::arg("offset")); m.def("group_points_forward", &group_points_forward, "group_points_forward", py::arg("points_tensor"), py::arg("idx_tensor"), py::arg("out_tensor"), py::arg("b"), py::arg("c"), py::arg("n"), py::arg("npoints"), py::arg("nsample")); m.def("group_points_backward", &group_points_backward, "group_points_backward", py::arg("grad_out_tensor"), py::arg("idx_tensor"), py::arg("grad_points_tensor"), py::arg("b"), py::arg("c"), py::arg("n"), py::arg("npoints"), py::arg("nsample")); m.def("knn_forward", &knn_forward, "knn_forward", py::arg("b"), py::arg("n"), py::arg("m"), py::arg("nsample"), py::arg("xyz_tensor"), py::arg("new_xyz_tensor"), py::arg("idx_tensor"), py::arg("dist2_tensor")); m.def("iou3d_boxes_overlap_bev_forward", &iou3d_boxes_overlap_bev_forward, "iou3d_boxes_overlap_bev_forward", py::arg("boxes_a"), py::arg("boxes_b"), py::arg("ans_overlap")); m.def("iou3d_boxes_iou_bev_forward", &iou3d_boxes_iou_bev_forward, "iou3d_boxes_iou_bev_forward", py::arg("boxes_a"), py::arg("boxes_b"), py::arg("ans_iou")); m.def("iou3d_nms_forward", &iou3d_nms_forward, "iou3d_nms_forward", py::arg("boxes"), py::arg("keep"), py::arg("num_out"), py::arg("nms_overlap_thresh")); m.def("iou3d_nms_normal_forward", &iou3d_nms_normal_forward, "iou3d_nms_normal_forward", py::arg("boxes"), py::arg("keep"), py::arg("num_out"), py::arg("nms_overlap_thresh")); m.def("furthest_point_sampling_forward", &furthest_point_sampling_forward, "furthest_point_sampling_forward", py::arg("points_tensor"), py::arg("temp_tensor"), py::arg("idx_tensor"), py::arg("b"), py::arg("n"), py::arg("m")); m.def("furthest_point_sampling_with_dist_forward", &furthest_point_sampling_with_dist_forward, "furthest_point_sampling_with_dist_forward", py::arg("points_tensor"), py::arg("temp_tensor"), py::arg("idx_tensor"), py::arg("b"), py::arg("n"), py::arg("m")); m.def("masked_im2col_forward", &masked_im2col_forward, "masked_im2col_forward", py::arg("im"), py::arg("mask_h_idx"), py::arg("mask_w_idx"), py::arg("col"), py::arg("kernel_h"), py::arg("kernel_w"), py::arg("pad_h"), py::arg("pad_w")); m.def("masked_col2im_forward", &masked_col2im_forward, "masked_col2im_forward", py::arg("col"), py::arg("mask_h_idx"), py::arg("mask_w_idx"), py::arg("im"), py::arg("height"), py::arg("width"), py::arg("channels")); m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, "modulated deform conv forward", py::arg("input"), py::arg("weight"), py::arg("bias"), py::arg("ones"), py::arg("offset"), py::arg("mask"), py::arg("output"), py::arg("columns"), py::arg("kernel_h"), py::arg("kernel_w"), py::arg("stride_h"), py::arg("stride_w"), py::arg("pad_h"), py::arg("pad_w"), py::arg("dilation_h"), py::arg("dilation_w"), py::arg("group"), py::arg("deformable_group"), py::arg("with_bias")); m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, "modulated deform conv backward", py::arg("input"), py::arg("weight"), py::arg("bias"), py::arg("ones"), py::arg("offset"), py::arg("mask"), py::arg("columns"), py::arg("grad_input"), py::arg("grad_weight"), py::arg("grad_bias"), py::arg("grad_offset"), py::arg("grad_mask"), py::arg("grad_output"), py::arg("kernel_h"), py::arg("kernel_w"), py::arg("stride_h"), py::arg("stride_w"), py::arg("pad_h"), py::arg("pad_w"), py::arg("dilation_h"), py::arg("dilation_w"), py::arg("group"), py::arg("deformable_group"), py::arg("with_bias")); m.def("nms", &nms, "nms (CPU/CUDA) ", py::arg("boxes"), py::arg("scores"), py::arg("iou_threshold"), py::arg("offset")); m.def("softnms", &softnms, "softnms (CPU) ", py::arg("boxes"), py::arg("scores"), py::arg("dets"), py::arg("iou_threshold"), py::arg("sigma"), py::arg("min_score"), py::arg("method"), py::arg("offset")); m.def("nms_match", &nms_match, "nms_match (CPU) ", py::arg("dets"), py::arg("iou_threshold")); m.def("pixel_group", &pixel_group, "pixel group (CPU) ", py::arg("score"), py::arg("mask"), py::arg("embedding"), py::arg("kernel_label"), py::arg("kernel_contour"), py::arg("kernel_region_label"), py::arg("distance_threshold")); m.def("contour_expand", &contour_expand, "contour exapnd (CPU) ", py::arg("kernel_mask"), py::arg("internal_kernel_label"), py::arg("min_kernel_area"), py::arg("kernel_num")); m.def("roi_align_forward", &roi_align_forward, "roi_align forward", py::arg("input"), py::arg("rois"), py::arg("output"), py::arg("argmax_y"), py::arg("argmax_x"), py::arg("aligned_height"), py::arg("aligned_width"), py::arg("spatial_scale"), py::arg("sampling_ratio"), py::arg("pool_mode"), py::arg("aligned")); m.def("roi_align_backward", &roi_align_backward, "roi_align backward", py::arg("grad_output"), py::arg("rois"), py::arg("argmax_y"), py::arg("argmax_x"), py::arg("grad_input"), py::arg("aligned_height"), py::arg("aligned_width"), py::arg("spatial_scale"), py::arg("sampling_ratio"), py::arg("pool_mode"), py::arg("aligned")); m.def("roi_pool_forward", &roi_pool_forward, "roi_pool forward", py::arg("input"), py::arg("rois"), py::arg("output"), py::arg("argmax"), py::arg("pooled_height"), py::arg("pooled_width"), py::arg("spatial_scale")); m.def("roi_pool_backward", &roi_pool_backward, "roi_pool backward", py::arg("grad_output"), py::arg("rois"), py::arg("argmax"), py::arg("grad_input"), py::arg("pooled_height"), py::arg("pooled_width"), py::arg("spatial_scale")); m.def("sync_bn_forward_mean", &sync_bn_forward_mean, "sync_bn forward_mean", py::arg("input"), py::arg("mean")); m.def("sync_bn_forward_var", &sync_bn_forward_var, "sync_bn forward_var", py::arg("input"), py::arg("mean"), py::arg("var")); m.def("sync_bn_forward_output", &sync_bn_forward_output, "sync_bn forward_output", py::arg("input"), py::arg("mean"), py::arg("var"), py::arg("weight"), py::arg("bias"), py::arg("running_mean"), py::arg("running_var"), py::arg("norm"), py::arg("std"), py::arg("output"), py::arg("eps"), py::arg("momentum"), py::arg("group_size")); m.def("sync_bn_backward_param", &sync_bn_backward_param, "sync_bn backward_param", py::arg("grad_output"), py::arg("norm"), py::arg("grad_weight"), py::arg("grad_bias")); m.def("sync_bn_backward_data", &sync_bn_backward_data, "sync_bn backward_data", py::arg("grad_output"), py::arg("weight"), py::arg("grad_weight"), py::arg("grad_bias"), py::arg("norm"), py::arg("std"), py::arg("grad_input")); m.def("psamask_forward", &psamask_forward, "PSAMASK forward (CPU/CUDA)", py::arg("input"), py::arg("output"), py::arg("psa_type"), py::arg("num_"), py::arg("h_feature"), py::arg("w_feature"), py::arg("h_mask"), py::arg("w_mask"), py::arg("half_h_mask"), py::arg("half_w_mask")); m.def("psamask_backward", &psamask_backward, "PSAMASK backward (CPU/CUDA)", py::arg("grad_output"), py::arg("grad_input"), py::arg("psa_type"), py::arg("num_"), py::arg("h_feature"), py::arg("w_feature"), py::arg("h_mask"), py::arg("w_mask"), py::arg("half_h_mask"), py::arg("half_w_mask")); m.def("tin_shift_forward", &tin_shift_forward, "tin_shift forward", py::arg("input"), py::arg("shift"), py::arg("output")); m.def("tin_shift_backward", &tin_shift_backward, "tin_shift backward", py::arg("grad_output"), py::arg("shift"), py::arg("grad_input")); m.def("bottom_pool_forward", &bottom_pool_forward, "Bottom Pool Forward", py::arg("input"), py::call_guard()); m.def("bottom_pool_backward", &bottom_pool_backward, "Bottom Pool Backward", py::arg("input"), py::arg("grad_output"), py::call_guard()); m.def("left_pool_forward", &left_pool_forward, "Left Pool Forward", py::arg("input"), py::call_guard()); m.def("left_pool_backward", &left_pool_backward, "Left Pool Backward", py::arg("input"), py::arg("grad_output"), py::call_guard()); m.def("right_pool_forward", &right_pool_forward, "Right Pool Forward", py::arg("input"), py::call_guard()); m.def("right_pool_backward", &right_pool_backward, "Right Pool Backward", py::arg("input"), py::arg("grad_output"), py::call_guard()); m.def("top_pool_forward", &top_pool_forward, "Top Pool Forward", py::arg("input"), py::call_guard()); m.def("top_pool_backward", &top_pool_backward, "Top Pool Backward", py::arg("input"), py::arg("grad_output"), py::call_guard()); m.def("box_iou_rotated", &box_iou_rotated, "IoU for rotated boxes", py::arg("boxes1"), py::arg("boxes2"), py::arg("ious"), py::arg("mode_flag"), py::arg("aligned")); m.def("nms_rotated", &nms_rotated, "NMS for rotated boxes", py::arg("dets"), py::arg("scores"), py::arg("order"), py::arg("dets_sorted"), py::arg("iou_threshold"), py::arg("multi_label")); m.def("ball_query_forward", &ball_query_forward, "ball_query_forward", py::arg("new_xyz_tensor"), py::arg("xyz_tensor"), py::arg("idx_tensor"), py::arg("b"), py::arg("n"), py::arg("m"), py::arg("min_radius"), py::arg("max_radius"), py::arg("nsample")); m.def("roi_align_rotated_forward", &roi_align_rotated_forward, "roi_align_rotated forward", py::arg("input"), py::arg("rois"), py::arg("output"), py::arg("pooled_height"), py::arg("pooled_width"), py::arg("spatial_scale"), py::arg("sample_num"), py::arg("aligned"), py::arg("clockwise")); m.def("roi_align_rotated_backward", &roi_align_rotated_backward, "roi_align_rotated backward", py::arg("rois"), py::arg("grad_input"), py::arg("grad_output"), py::arg("pooled_height"), py::arg("pooled_width"), py::arg("spatial_scale"), py::arg("sample_num"), py::arg("aligned"), py::arg("clockwise")); m.def("dynamic_point_to_voxel_forward", &dynamic_point_to_voxel_forward, "dynamic_point_to_voxel_forward", py::arg("feats"), py::arg("coors"), py::arg("reduce_type")); m.def("dynamic_point_to_voxel_backward", &dynamic_point_to_voxel_backward, "dynamic_point_to_voxel_backward", py::arg("grad_feats"), py::arg("grad_reduced_feats"), py::arg("feats"), py::arg("reduced_feats"), py::arg("coors_idx"), py::arg("reduce_count"), py::arg("reduce_type")); m.def("hard_voxelize_forward", &hard_voxelize_forward, "hard_voxelize_forward", py::arg("points"), py::arg("voxel_size"), py::arg("coors_range"), py::arg("voxels"), py::arg("coors"), py::arg("num_points_per_voxel"), py::arg("voxel_num"), py::arg("max_points"), py::arg("max_voxels"), py::arg("NDim")); m.def("dynamic_voxelize_forward", &dynamic_voxelize_forward, "dynamic_voxelize_forward", py::arg("points"), py::arg("voxel_size"), py::arg("coors_range"), py::arg("coors"), py::arg("NDim")); m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "forward function of multi-scale deformable attention", py::arg("value"), py::arg("value_spatial_shapes"), py::arg("value_level_start_index"), py::arg("sampling_locations"), py::arg("attention_weights"), py::arg("im2col_step")); m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "backward function of multi-scale deformable attention", py::arg("value"), py::arg("value_spatial_shapes"), py::arg("value_level_start_index"), py::arg("sampling_locations"), py::arg("attention_weights"), py::arg("grad_output"), py::arg("grad_value"), py::arg("grad_sampling_loc"), py::arg("grad_attn_weight"), py::arg("im2col_step")); m.def("border_align_forward", &border_align_forward, "forward function of border_align", py::arg("input"), py::arg("boxes"), py::arg("output"), py::arg("argmax_idx"), py::arg("pool_size")); m.def("border_align_backward", &border_align_backward, "backward function of border_align", py::arg("grad_output"), py::arg("boxes"), py::arg("argmax_idx"), py::arg("grad_input"), py::arg("pool_size")); m.def("correlation_forward", &correlation_forward, "Correlation forward", py::arg("input1"), py::arg("input2"), py::arg("output"), py::arg("kH"), py::arg("kW"), py::arg("patchH"), py::arg("patchW"), py::arg("padH"), py::arg("padW"), py::arg("dilationH"), py::arg("dilationW"), py::arg("dilation_patchH"), py::arg("dilation_patchW"), py::arg("dH"), py::arg("dW")); m.def("correlation_backward", &correlation_backward, "Correlation backward", py::arg("grad_output"), py::arg("input1"), py::arg("input2"), py::arg("grad_input1"), py::arg("grad_input2"), py::arg("kH"), py::arg("kW"), py::arg("patchH"), py::arg("patchW"), py::arg("padH"), py::arg("padW"), py::arg("dilationH"), py::arg("dilationW"), py::arg("dilation_patchH"), py::arg("dilation_patchW"), py::arg("dH"), py::arg("dW")); m.def("points_in_boxes_cpu_forward", &points_in_boxes_cpu_forward, "points_in_boxes_cpu_forward", py::arg("boxes_tensor"), py::arg("pts_tensor"), py::arg("pts_indices_tensor")); m.def("points_in_boxes_part_forward", &points_in_boxes_part_forward, "points_in_boxes_part_forward", py::arg("boxes_tensor"), py::arg("pts_tensor"), py::arg("box_idx_of_points_tensor")); m.def("points_in_boxes_all_forward", &points_in_boxes_all_forward, "points_in_boxes_all_forward", py::arg("boxes_tensor"), py::arg("pts_tensor"), py::arg("box_idx_of_points_tensor")); m.def("roiaware_pool3d_forward", &roiaware_pool3d_forward, "roiaware_pool3d_forward", py::arg("rois"), py::arg("pts"), py::arg("pts_feature"), py::arg("argmax"), py::arg("pts_idx_of_voxels"), py::arg("pooled_features"), py::arg("pool_method")); m.def("roiaware_pool3d_backward", &roiaware_pool3d_backward, "roiaware_pool3d_backward", py::arg("pts_idx_of_voxels"), py::arg("argmax"), py::arg("grad_out"), py::arg("grad_in"), py::arg("pool_method")); m.def("rotated_feature_align_forward", &rotated_feature_align_forward, "Feature Refine forward (CUDA)", py::arg("features"), py::arg("best_bboxes"), py::arg("output"), py::arg("spatial_scale"), py::arg("points")); m.def("rotated_feature_align_backward", &rotated_feature_align_backward, "Feature Refine backward (CUDA)", py::arg("top_grad"), py::arg("best_bboxes"), py::arg("bottom_grad"), py::arg("spatial_scale"), py::arg("points")); m.def("riroi_align_rotated_forward", &riroi_align_rotated_forward, "riroi_align_rotated forward", py::arg("features"), py::arg("rois"), py::arg("output"), py::arg("pooled_height"), py::arg("pooled_width"), py::arg("spatial_scale"), py::arg("num_samples"), py::arg("num_orientations"), py::arg("clockwise")); m.def("riroi_align_rotated_backward", &riroi_align_rotated_backward, "riroi_align_rotated backward", py::arg("top_grad"), py::arg("rois"), py::arg("bottom_grad"), py::arg("pooled_height"), py::arg("pooled_width"), py::arg("spatial_scale"), py::arg("num_samples"), py::arg("num_orientations"), py::arg("clockwise")); m.def("points_in_polygons_forward", &points_in_polygons_forward, "points_in_polygons_forward", py::arg("points"), py::arg("polygons"), py::arg("output")); m.def("min_area_polygons", &min_area_polygons, "min_area_polygons", py::arg("pointsets"), py::arg("polygons")); m.def("active_rotated_filter_forward", &active_rotated_filter_forward, "active_rotated_filter_forward", py::arg("input"), py::arg("indices"), py::arg("output")); m.def("active_rotated_filter_backward", &active_rotated_filter_backward, "active_rotated_filter_backward", py::arg("grad_out"), py::arg("indices"), py::arg("grad_in")); m.def("convex_iou", &convex_iou, "convex_iou", py::arg("pointsets"), py::arg("polygons"), py::arg("ious")); m.def("convex_giou", &convex_giou, "convex_giou", py::arg("pointsets"), py::arg("polygons"), py::arg("output")); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/riroi_align_rotated.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void riroi_align_rotated_forward_impl(Tensor features, Tensor rois, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int num_samples, int num_orientations, bool clockwise) { DISPATCH_DEVICE_IMPL(riroi_align_rotated_forward_impl, features, rois, output, pooled_height, pooled_width, spatial_scale, num_samples, num_orientations, clockwise); } void riroi_align_rotated_backward_impl(Tensor top_grad, Tensor rois, Tensor bottom_grad, int pooled_height, int pooled_width, float spatial_scale, int num_samples, int num_orientations, bool clockwise) { DISPATCH_DEVICE_IMPL(riroi_align_rotated_backward_impl, top_grad, rois, bottom_grad, pooled_height, pooled_width, spatial_scale, num_samples, num_orientations, clockwise); } void riroi_align_rotated_forward(Tensor features, Tensor rois, Tensor output, int pooled_height, int pooled_width, float spatial_scale, int num_samples, int num_orientations, bool clockwise) { riroi_align_rotated_forward_impl(features, rois, output, pooled_height, pooled_width, spatial_scale, num_samples, num_orientations, clockwise); } void riroi_align_rotated_backward(Tensor top_grad, Tensor rois, Tensor bottom_grad, int pooled_height, int pooled_width, float spatial_scale, int num_samples, int num_orientations, bool clockwise) { riroi_align_rotated_backward_impl(top_grad, rois, bottom_grad, pooled_height, pooled_width, spatial_scale, num_samples, num_orientations, clockwise); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/roi_align.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void roi_align_forward_impl(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned) { DISPATCH_DEVICE_IMPL(roi_align_forward_impl, input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); } void roi_align_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax_y, Tensor argmax_x, Tensor grad_input, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned) { DISPATCH_DEVICE_IMPL(roi_align_backward_impl, grad_output, rois, argmax_y, argmax_x, grad_input, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); } void roi_align_forward(Tensor input, Tensor rois, Tensor output, Tensor argmax_y, Tensor argmax_x, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned) { roi_align_forward_impl(input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); } void roi_align_backward(Tensor grad_output, Tensor rois, Tensor argmax_y, Tensor argmax_x, Tensor grad_input, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned) { roi_align_backward_impl(grad_output, rois, argmax_y, argmax_x, grad_input, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/roi_align_rotated.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void roi_align_rotated_forward_impl(Tensor features, Tensor rois, Tensor output, int aligned_height, int aligned_width, float spatial_scale, int sample_ratio, bool aligned, bool clockwise) { DISPATCH_DEVICE_IMPL(roi_align_rotated_forward_impl, features, rois, output, aligned_height, aligned_width, spatial_scale, sample_ratio, aligned, clockwise); } void roi_align_rotated_backward_impl(Tensor top_grad, Tensor rois, Tensor bottom_grad, int aligned_height, int aligned_width, float spatial_scale, int sample_ratio, bool aligned, bool clockwise) { DISPATCH_DEVICE_IMPL(roi_align_rotated_backward_impl, top_grad, rois, bottom_grad, aligned_height, aligned_width, spatial_scale, sample_ratio, aligned, clockwise); } void roi_align_rotated_forward(Tensor input, Tensor rois, Tensor output, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, bool aligned, bool clockwise) { roi_align_rotated_forward_impl(input, rois, output, aligned_height, aligned_width, spatial_scale, sampling_ratio, aligned, clockwise); } void roi_align_rotated_backward(Tensor top_grad, Tensor rois, Tensor bottom_grad, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, bool aligned, bool clockwise) { roi_align_rotated_backward_impl(top_grad, rois, bottom_grad, aligned_height, aligned_width, spatial_scale, sampling_ratio, aligned, clockwise); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/roi_pool.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void roi_pool_forward_impl(Tensor input, Tensor rois, Tensor output, Tensor argmax, int pooled_height, int pooled_width, float spatial_scale) { DISPATCH_DEVICE_IMPL(roi_pool_forward_impl, input, rois, output, argmax, pooled_height, pooled_width, spatial_scale); } void roi_pool_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax, Tensor grad_input, int pooled_height, int pooled_width, float spatial_scale) { DISPATCH_DEVICE_IMPL(roi_pool_backward_impl, grad_output, rois, argmax, grad_input, pooled_height, pooled_width, spatial_scale); } void roi_pool_forward(Tensor input, Tensor rois, Tensor output, Tensor argmax, int pooled_height, int pooled_width, float spatial_scale) { roi_pool_forward_impl(input, rois, output, argmax, pooled_height, pooled_width, spatial_scale); } void roi_pool_backward(Tensor grad_output, Tensor rois, Tensor argmax, Tensor grad_input, int pooled_height, int pooled_width, float spatial_scale) { roi_pool_backward_impl(grad_output, rois, argmax, grad_input, pooled_height, pooled_width, spatial_scale); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/roiaware_pool3d.cpp ================================================ #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void roiaware_pool3d_forward_impl(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int out_y, int out_z, const Tensor rois, const Tensor pts, const Tensor pts_feature, Tensor argmax, Tensor pts_idx_of_voxels, Tensor pooled_features, int pool_method) { DISPATCH_DEVICE_IMPL(roiaware_pool3d_forward_impl, boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z, rois, pts, pts_feature, argmax, pts_idx_of_voxels, pooled_features, pool_method); } void roiaware_pool3d_backward_impl(int boxes_num, int out_x, int out_y, int out_z, int channels, int max_pts_each_voxel, const Tensor pts_idx_of_voxels, const Tensor argmax, const Tensor grad_out, Tensor grad_in, int pool_method) { DISPATCH_DEVICE_IMPL(roiaware_pool3d_backward_impl, boxes_num, out_x, out_y, out_z, channels, max_pts_each_voxel, pts_idx_of_voxels, argmax, grad_out, grad_in, pool_method); } void roiaware_pool3d_forward(Tensor rois, Tensor pts, Tensor pts_feature, Tensor argmax, Tensor pts_idx_of_voxels, Tensor pooled_features, int pool_method) { // params rois: (N, 7) [x, y, z, x_size, y_size, z_size, ry] in LiDAR // coordinate // params pts: (npoints, 3) [x, y, z] in LiDAR coordinate // params pts_feature: (npoints, C) // params argmax: (N, out_x, out_y, out_z, C) // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel) // params pooled_features: (N, out_x, out_y, out_z, C) // params pool_method: 0: max_pool 1: avg_pool int boxes_num = rois.size(0); int pts_num = pts.size(0); int channels = pts_feature.size(1); int max_pts_each_voxel = pts_idx_of_voxels.size(4); // index 0 is the counter int out_x = pts_idx_of_voxels.size(1); int out_y = pts_idx_of_voxels.size(2); int out_z = pts_idx_of_voxels.size(3); assert((out_x < 256) && (out_y < 256) && (out_z < 256)); // we encode index with 8bit roiaware_pool3d_forward_impl(boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z, rois, pts, pts_feature, argmax, pts_idx_of_voxels, pooled_features, pool_method); } void roiaware_pool3d_backward(Tensor pts_idx_of_voxels, Tensor argmax, Tensor grad_out, Tensor grad_in, int pool_method) { // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel) // params argmax: (N, out_x, out_y, out_z, C) // params grad_out: (N, out_x, out_y, out_z, C) // params grad_in: (npoints, C), return value // params pool_method: 0: max_pool 1: avg_pool int boxes_num = pts_idx_of_voxels.size(0); int out_x = pts_idx_of_voxels.size(1); int out_y = pts_idx_of_voxels.size(2); int out_z = pts_idx_of_voxels.size(3); int max_pts_each_voxel = pts_idx_of_voxels.size(4); // index 0 is the counter int channels = grad_out.size(4); roiaware_pool3d_backward_impl(boxes_num, out_x, out_y, out_z, channels, max_pts_each_voxel, pts_idx_of_voxels, argmax, grad_out, grad_in, pool_method); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/roipoint_pool3d.cpp ================================================ /* Modified from https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/roipoint_pool3d/src/roipoint_pool3d.cpp Point cloud feature pooling Written by Shaoshuai Shi All Rights Reserved 2018. */ #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void roipoint_pool3d_forward_impl(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, const Tensor xyz, const Tensor boxes3d, const Tensor pts_feature, Tensor pooled_features, Tensor pooled_empty_flag) { DISPATCH_DEVICE_IMPL(roipoint_pool3d_forward_impl, batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, xyz, boxes3d, pts_feature, pooled_features, pooled_empty_flag); } void roipoint_pool3d_forward(Tensor xyz, Tensor boxes3d, Tensor pts_feature, Tensor pooled_features, Tensor pooled_empty_flag) { // params xyz: (B, N, 3) // params boxes3d: (B, M, 7) // params pts_feature: (B, N, C) // params pooled_features: (B, M, 512, 3+C) // params pooled_empty_flag: (B, M) int batch_size = xyz.size(0); int pts_num = xyz.size(1); int boxes_num = boxes3d.size(1); int feature_in_len = pts_feature.size(2); int sampled_pts_num = pooled_features.size(2); roipoint_pool3d_forward_impl(batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, xyz, boxes3d, pts_feature, pooled_features, pooled_empty_flag); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/rotated_feature_align.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved. // Modified from // https://github.com/SJTU-Thinklab-Det/r3det-on-mmdetection/blob/master/mmdet/ops/fr/src/feature_refine_cuda.cpp #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void rotated_feature_align_forward_impl(const Tensor features, const Tensor best_bboxes, const float spatial_scale, const int points, Tensor output) { DISPATCH_DEVICE_IMPL(rotated_feature_align_forward_impl, features, best_bboxes, spatial_scale, points, output); } void rotated_feature_align_backward_impl(const Tensor top_grad, const Tensor best_bboxes, const float spatial_scale, const int points, Tensor bottom_grad) { DISPATCH_DEVICE_IMPL(rotated_feature_align_backward_impl, top_grad, best_bboxes, spatial_scale, points, bottom_grad); } void rotated_feature_align_forward(const Tensor features, const Tensor best_bboxes, Tensor output, const float spatial_scale, const int points) { rotated_feature_align_forward_impl(features, best_bboxes, spatial_scale, points, output); } void rotated_feature_align_backward(const Tensor top_grad, const Tensor best_bboxes, Tensor bottom_grad, const float spatial_scale, const int points) { rotated_feature_align_backward_impl(top_grad, best_bboxes, spatial_scale, points, bottom_grad); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/scatter_points.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved. #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" typedef enum { SUM = 0, MEAN = 1, MAX = 2 } reduce_t; std::vector dynamic_point_to_voxel_forward_impl( const torch::Tensor &feats, const torch::Tensor &coors, const reduce_t reduce_type) { return DISPATCH_DEVICE_IMPL(dynamic_point_to_voxel_forward_impl, feats, coors, reduce_type); } void dynamic_point_to_voxel_backward_impl( torch::Tensor &grad_feats, const torch::Tensor &grad_reduced_feats, const torch::Tensor &feats, const torch::Tensor &reduced_feats, const torch::Tensor &coors_idx, const torch::Tensor &reduce_count, const reduce_t reduce_type) { DISPATCH_DEVICE_IMPL(dynamic_point_to_voxel_backward_impl, grad_feats, grad_reduced_feats, feats, reduced_feats, coors_idx, reduce_count, reduce_type); } inline reduce_t convert_reduce_type(const std::string &reduce_type) { if (reduce_type == "max") return reduce_t::MAX; else if (reduce_type == "sum") return reduce_t::SUM; else if (reduce_type == "mean") return reduce_t::MEAN; else TORCH_CHECK(false, "do not support reduce type " + reduce_type) return reduce_t::SUM; } std::vector dynamic_point_to_voxel_forward( const torch::Tensor &feats, const torch::Tensor &coors, const std::string &reduce_type) { return dynamic_point_to_voxel_forward_impl(feats, coors, convert_reduce_type(reduce_type)); } void dynamic_point_to_voxel_backward(torch::Tensor &grad_feats, const torch::Tensor &grad_reduced_feats, const torch::Tensor &feats, const torch::Tensor &reduced_feats, const torch::Tensor &coors_idx, const torch::Tensor &reduce_count, const std::string &reduce_type) { dynamic_point_to_voxel_backward_impl(grad_feats, grad_reduced_feats, feats, reduced_feats, coors_idx, reduce_count, convert_reduce_type(reduce_type)); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/sync_bn.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void sync_bn_forward_mean_impl(const Tensor input, Tensor mean) { DISPATCH_DEVICE_IMPL(sync_bn_forward_mean_impl, input, mean); } void sync_bn_forward_var_impl(const Tensor input, const Tensor mean, Tensor var) { DISPATCH_DEVICE_IMPL(sync_bn_forward_var_impl, input, mean, var); } void sync_bn_forward_output_impl(const Tensor input, const Tensor mean, const Tensor var, Tensor running_mean, Tensor running_var, const Tensor weight, const Tensor bias, Tensor norm, Tensor std, Tensor output, float eps, float momentum, int group_size) { DISPATCH_DEVICE_IMPL(sync_bn_forward_output_impl, input, mean, var, running_mean, running_var, weight, bias, norm, std, output, eps, momentum, group_size); } void sync_bn_backward_param_impl(const Tensor grad_output, const Tensor norm, Tensor grad_weight, Tensor grad_bias) { DISPATCH_DEVICE_IMPL(sync_bn_backward_param_impl, grad_output, norm, grad_weight, grad_bias); } void sync_bn_backward_data_impl(const Tensor grad_output, const Tensor weight, const Tensor grad_weight, const Tensor grad_bias, const Tensor norm, const Tensor std, Tensor grad_input) { DISPATCH_DEVICE_IMPL(sync_bn_backward_data_impl, grad_output, weight, grad_weight, grad_bias, norm, std, grad_input); } void sync_bn_forward_mean(const Tensor input, Tensor mean) { sync_bn_forward_mean_impl(input, mean); } void sync_bn_forward_var(const Tensor input, const Tensor mean, Tensor var) { sync_bn_forward_var_impl(input, mean, var); } void sync_bn_forward_output(const Tensor input, const Tensor mean, const Tensor var, const Tensor weight, const Tensor bias, Tensor running_mean, Tensor running_var, Tensor norm, Tensor std, Tensor output, float eps, float momentum, int group_size) { sync_bn_forward_output_impl(input, mean, var, running_mean, running_var, weight, bias, norm, std, output, eps, momentum, group_size); } void sync_bn_backward_param(const Tensor grad_output, const Tensor norm, Tensor grad_weight, Tensor grad_bias) { sync_bn_backward_param_impl(grad_output, norm, grad_weight, grad_bias); } void sync_bn_backward_data(const Tensor grad_output, const Tensor weight, const Tensor grad_weight, const Tensor grad_bias, const Tensor norm, const Tensor std, Tensor grad_input) { sync_bn_backward_data_impl(grad_output, weight, grad_weight, grad_bias, norm, std, grad_input); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/three_interpolate.cpp ================================================ // Modified from // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate.cpp #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void three_interpolate_forward_impl(int b, int c, int m, int n, const Tensor points, const Tensor idx, const Tensor weight, Tensor out) { DISPATCH_DEVICE_IMPL(three_interpolate_forward_impl, b, c, m, n, points, idx, weight, out); } void three_interpolate_backward_impl(int b, int c, int n, int m, const Tensor grad_out, const Tensor idx, const Tensor weight, Tensor grad_points) { DISPATCH_DEVICE_IMPL(three_interpolate_backward_impl, b, c, n, m, grad_out, idx, weight, grad_points); } void three_interpolate_forward(Tensor points_tensor, Tensor idx_tensor, Tensor weight_tensor, Tensor out_tensor, int b, int c, int m, int n) { three_interpolate_forward_impl(b, c, m, n, points_tensor, idx_tensor, weight_tensor, out_tensor); } void three_interpolate_backward(Tensor grad_out_tensor, Tensor idx_tensor, Tensor weight_tensor, Tensor grad_points_tensor, int b, int c, int n, int m) { three_interpolate_backward_impl(b, c, n, m, grad_out_tensor, idx_tensor, weight_tensor, grad_points_tensor); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/three_nn.cpp ================================================ // Modified from // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate.cpp #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void three_nn_forward_impl(int b, int n, int m, const Tensor unknown, const Tensor known, Tensor dist2, Tensor idx) { DISPATCH_DEVICE_IMPL(three_nn_forward_impl, b, n, m, unknown, known, dist2, idx); } void three_nn_forward(Tensor unknown_tensor, Tensor known_tensor, Tensor dist2_tensor, Tensor idx_tensor, int b, int n, int m) { three_nn_forward_impl(b, n, m, unknown_tensor, known_tensor, dist2_tensor, idx_tensor); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/tin_shift.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" void tin_shift_forward_impl(Tensor input, Tensor shift, Tensor output) { DISPATCH_DEVICE_IMPL(tin_shift_forward_impl, input, shift, output); } void tin_shift_backward_impl(Tensor grad_output, Tensor shift, Tensor grad_input) { DISPATCH_DEVICE_IMPL(tin_shift_backward_impl, grad_output, shift, grad_input); } void tin_shift_forward(Tensor input, Tensor shift, Tensor output) { tin_shift_forward_impl(input, shift, output); } void tin_shift_backward(Tensor grad_output, Tensor shift, Tensor grad_input) { tin_shift_backward_impl(grad_output, shift, grad_input); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/upfirdn2d.cpp ================================================ // Modified from // https://github.com/rosinality/stylegan2-pytorch/blob/master/op/upfirdn2d.cpp /* Copyright (c) 2021, NVIDIA Corporation. All rights reserved. NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator Augmentation (ADA) ======================================================================= 1. Definitions "Licensor" means any person or entity that distributes its Work. "Software" means the original work of authorship made available under this License. "Work" means the Software and any additions to or derivative works of the Software that are made available under this License. The terms "reproduce," "reproduction," "derivative works," and "distribution" have the meaning as provided under U.S. copyright law; provided, however, that for the purposes of this License, derivative works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work. Works, including the Software, are "made available" under this License by including in or with the Work either (a) a copyright notice referencing the applicability of this License to the Work, or (b) a copy of this License. 2. License Grants 2.1 Copyright Grant. Subject to the terms and conditions of this License, each Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free, copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense and distribute its Work and any resulting derivative works in any form. 3. Limitations 3.1 Redistribution. You may reproduce or distribute the Work only if (a) you do so under this License, (b) you include a complete copy of this License with your distribution, and (c) you retain without modification any copyright, patent, trademark, or attribution notices that are present in the Work. 3.2 Derivative Works. You may specify that additional or different terms apply to the use, reproduction, and distribution of your derivative works of the Work ("Your Terms") only if (a) Your Terms provide that the use limitation in Section 3.3 applies to your derivative works, and (b) you identify the specific derivative works that are subject to Your Terms. Notwithstanding Your Terms, this License (including the redistribution requirements in Section 3.1) will continue to apply to the Work itself. 3.3 Use Limitation. The Work and any derivative works thereof only may be used or intended for use non-commercially. Notwithstanding the foregoing, NVIDIA and its affiliates may use the Work and any derivative works commercially. As used herein, "non-commercially" means for research or evaluation purposes only. 3.4 Patent Claims. If you bring or threaten to bring a patent claim against any Licensor (including any claim, cross-claim or counterclaim in a lawsuit) to enforce any patents that you allege are infringed by any Work, then your rights under this License from such Licensor (including the grant in Section 2.1) will terminate immediately. 3.5 Trademarks. This License does not grant any rights to use any Licensor’s or its affiliates’ names, logos, or trademarks, except as necessary to reproduce the notices described in this License. 3.6 Termination. If you violate any term of this License, then your rights under this License (including the grant in Section 2.1) will terminate immediately. 4. Disclaimer of Warranty. THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE. 5. Limitation of Liability. EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. ======================================================================= */ #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" torch::Tensor upfirdn2d_op_impl(const torch::Tensor& input, const torch::Tensor& kernel, int up_x, int up_y, int down_x, int down_y, int pad_x0, int pad_x1, int pad_y0, int pad_y1) { return DISPATCH_DEVICE_IMPL(upfirdn2d_op_impl, input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1); } torch::Tensor upfirdn2d(const torch::Tensor& input, const torch::Tensor& kernel, int up_x, int up_y, int down_x, int down_y, int pad_x0, int pad_x1, int pad_y0, int pad_y1) { return upfirdn2d_op_impl(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/voxelization.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved. #include "pytorch_cpp_helper.hpp" #include "pytorch_device_registry.hpp" int hard_voxelize_forward_impl(const at::Tensor &points, at::Tensor &voxels, at::Tensor &coors, at::Tensor &num_points_per_voxel, const std::vector voxel_size, const std::vector coors_range, const int max_points, const int max_voxels, const int NDim = 3) { return DISPATCH_DEVICE_IMPL(hard_voxelize_forward_impl, points, voxels, coors, num_points_per_voxel, voxel_size, coors_range, max_points, max_voxels, NDim); } void dynamic_voxelize_forward_impl(const at::Tensor &points, at::Tensor &coors, const std::vector voxel_size, const std::vector coors_range, const int NDim = 3) { DISPATCH_DEVICE_IMPL(dynamic_voxelize_forward_impl, points, coors, voxel_size, coors_range, NDim); } void hard_voxelize_forward(const at::Tensor &points, const at::Tensor &voxel_size, const at::Tensor &coors_range, at::Tensor &voxels, at::Tensor &coors, at::Tensor &num_points_per_voxel, at::Tensor &voxel_num, const int max_points, const int max_voxels, const int NDim = 3) { int64_t *voxel_num_data = voxel_num.data_ptr(); std::vector voxel_size_v( voxel_size.data_ptr(), voxel_size.data_ptr() + voxel_size.numel()); std::vector coors_range_v( coors_range.data_ptr(), coors_range.data_ptr() + coors_range.numel()); *voxel_num_data = hard_voxelize_forward_impl( points, voxels, coors, num_points_per_voxel, voxel_size_v, coors_range_v, max_points, max_voxels, NDim); } void dynamic_voxelize_forward(const at::Tensor &points, const at::Tensor &voxel_size, const at::Tensor &coors_range, at::Tensor &coors, const int NDim = 3) { std::vector voxel_size_v( voxel_size.data_ptr(), voxel_size.data_ptr() + voxel_size.numel()); std::vector coors_range_v( coors_range.data_ptr(), coors_range.data_ptr() + coors_range.numel()); dynamic_voxelize_forward_impl(points, coors, voxel_size_v, coors_range_v, NDim); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_corner_pool.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "trt_corner_pool.hpp" #include #include "trt_serialize.hpp" void CornerPoolForwardLauncher_float(const float *input, float *output, const int batch_size, const int channels, const int height, const int width, const int pool_type, cudaStream_t stream); namespace { static const char *PLUGIN_VERSION{"1"}; static const char *CORNER_POOL_PLUGIN_NAME{"MMCVCornerPool"}; } // namespace CornerPoolPluginDynamic::CornerPoolPluginDynamic(const std::string &name, TRT_CORNER_POOL_TYPE poolType) : mLayerName(name), mPoolType(poolType) {} CornerPoolPluginDynamic::CornerPoolPluginDynamic(const std::string name, const void *data, size_t length) : mLayerName(name) { deserialize_value(&data, &length, &mPoolType); } CornerPoolPluginDynamic::~CornerPoolPluginDynamic() {} nvinfer1::IPluginV2DynamicExt *CornerPoolPluginDynamic::clone() const { CornerPoolPluginDynamic *plugin = new CornerPoolPluginDynamic(mLayerName, mPoolType); plugin->setPluginNamespace(getPluginNamespace()); return plugin; } nvinfer1::DimsExprs CornerPoolPluginDynamic::getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs, nvinfer1::IExprBuilder &exprBuilder) { return inputs[0]; } bool CornerPoolPluginDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs, int nbOutputs) { switch (pos) { // input[0] case 0: return inOut[pos].type == nvinfer1::DataType::kFLOAT && inOut[pos].format == nvinfer1::TensorFormat::kLINEAR; // output[0] case 1: return inOut[pos].type == inOut[0].type && inOut[pos].format == inOut[0].format; default: return false; } } void CornerPoolPluginDynamic::configurePlugin( const nvinfer1::DynamicPluginTensorDesc *inputs, int nbInputs, const nvinfer1::DynamicPluginTensorDesc *outputs, int nbOutputs) {} size_t CornerPoolPluginDynamic::getWorkspaceSize( const nvinfer1::PluginTensorDesc *inputs, int nbInputs, const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const { int sizeof_dtype = mmcv::getElementSize(outputs[0].type); } int CornerPoolPluginDynamic::enqueue( const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workSpace, cudaStream_t stream) { const void *input = inputs[0]; void *output_value = outputs[0]; const int batch_size = inputDesc[0].dims.d[0]; const int channels = inputDesc[0].dims.d[1]; const int height = inputDesc[0].dims.d[2]; const int width = inputDesc[0].dims.d[3]; CornerPoolForwardLauncher_float((float *)input, (float *)output_value, batch_size, channels, height, width, int(mPoolType), stream); return 0; } nvinfer1::DataType CornerPoolPluginDynamic::getOutputDataType( int index, const nvinfer1::DataType *inputTypes, int nbInputs) const { return inputTypes[0]; } // IPluginV2 Methods const char *CornerPoolPluginDynamic::getPluginType() const { switch (mPoolType) { case TRT_CORNER_POOL_TYPE::TRT_TOP_POOL: case TRT_CORNER_POOL_TYPE::TRT_BOTTOM_POOL: case TRT_CORNER_POOL_TYPE::TRT_LEFT_POOL: case TRT_CORNER_POOL_TYPE::TRT_RIGHT_POOL: return CORNER_POOL_PLUGIN_NAME; default: return "UnknownpoolType"; } } const char *CornerPoolPluginDynamic::getPluginVersion() const { return PLUGIN_VERSION; } int CornerPoolPluginDynamic::getNbOutputs() const { return 1; } int CornerPoolPluginDynamic::initialize() { return 0; } void CornerPoolPluginDynamic::terminate() {} size_t CornerPoolPluginDynamic::getSerializationSize() const { return sizeof(mPoolType); } void CornerPoolPluginDynamic::serialize(void *buffer) const { serialize_value(&buffer, mPoolType); } void CornerPoolPluginDynamic::destroy() { // This gets called when the network containing plugin is destroyed delete this; } void CornerPoolPluginDynamic::setPluginNamespace(const char *libNamespace) { mNamespace = libNamespace; } const char *CornerPoolPluginDynamic::getPluginNamespace() const { return mNamespace.c_str(); } CornerPoolPluginDynamicCreator::CornerPoolPluginDynamicCreator() { mPluginAttributes.clear(); mPluginAttributes.emplace_back(nvinfer1::PluginField("mode")); mFC.nbFields = mPluginAttributes.size(); mFC.fields = mPluginAttributes.data(); } const char *CornerPoolPluginDynamicCreator::getPluginName() const { return CORNER_POOL_PLUGIN_NAME; } const char *CornerPoolPluginDynamicCreator::getPluginVersion() const { return PLUGIN_VERSION; } const nvinfer1::PluginFieldCollection * CornerPoolPluginDynamicCreator::getFieldNames() { return &mFC; } nvinfer1::IPluginV2 *CornerPoolPluginDynamicCreator::createPlugin( const char *name, const nvinfer1::PluginFieldCollection *fc) { TRT_CORNER_POOL_TYPE poolType; int poolMode = -1; for (int i = 0; i < fc->nbFields; i++) { if (fc->fields[i].data == nullptr) { continue; } std::string field_name(fc->fields[i].name); if (field_name.compare("mode") == 0) { poolMode = static_cast(fc->fields[i].data)[0]; } } assert(poolMode >= 0 && poolMode <= 3); switch (poolMode) { case 0: poolType = TRT_CORNER_POOL_TYPE::TRT_TOP_POOL; break; case 1: poolType = TRT_CORNER_POOL_TYPE::TRT_BOTTOM_POOL; break; case 2: poolType = TRT_CORNER_POOL_TYPE::TRT_LEFT_POOL; break; case 3: poolType = TRT_CORNER_POOL_TYPE::TRT_RIGHT_POOL; break; default: break; } CornerPoolPluginDynamic *plugin = new CornerPoolPluginDynamic(name, poolType); plugin->setPluginNamespace(getPluginNamespace()); return plugin; } nvinfer1::IPluginV2 *CornerPoolPluginDynamicCreator::deserializePlugin( const char *name, const void *serialData, size_t serialLength) { // This object will be deleted when the network is destroyed, which will // call FCPluginDynamic::destroy() auto plugin = new CornerPoolPluginDynamic(name, serialData, serialLength); plugin->setPluginNamespace(getPluginNamespace()); return plugin; } void CornerPoolPluginDynamicCreator::setPluginNamespace( const char *libNamespace) { mNamespace = libNamespace; } const char *CornerPoolPluginDynamicCreator::getPluginNamespace() const { return mNamespace.c_str(); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_corner_pool_kernel.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "common_cuda_helper.hpp" #include "trt_cuda_helper.cuh" #include "trt_plugin_helper.hpp" template __global__ void top_bottom_pool_kernel(const scalar_t *input, scalar_t *output, const int batch_size, const int channels, const int height, const int width, const int pool_type) { const int nthreads = batch_size * channels * width; CUDA_1D_KERNEL_LOOP(index, nthreads) { int n_idx = index / (channels * width); // batch int w_idx = index % width; // width int c_idx = (index / width) % channels; // channels int offset_n = n_idx * channels * width * height; int offset_n_c = offset_n + c_idx * width * height; int direction = -1; // in [-1, 1], default for TopPool int index_start = height - 2; // default for TopPool // pool_type in [0, 1] if (pool_type == 0) { // TopPool // directly copy the most bottom value from input to output output[offset_n_c + (height - 1) * width + w_idx] = input[offset_n_c + (height - 1) * width + w_idx]; } else { // BottomPool // directly copy the most top value from input to output output[offset_n_c + w_idx] = input[offset_n_c + w_idx]; index_start = 1; direction = 1; } // do pool for (int h = index_start; h >= 0 && h < height; h += direction) { output[offset_n_c + h * width + w_idx] = max(output[offset_n_c + (h - direction) * width + w_idx], input[offset_n_c + h * width + w_idx]); } } } template __global__ void left_right_pool_kernel(const scalar_t *input, scalar_t *output, const int batch_size, const int channels, const int height, const int width, const int pool_type) { const int nthreads = batch_size * channels * height; CUDA_1D_KERNEL_LOOP(index, nthreads) { int n_idx = index / (channels * height); // batch int h_idx = index % height; // height int c_idx = (index / height) % channels; // channels int offset_n = n_idx * channels * width * height; int offset_n_c = offset_n + c_idx * width * height; int offset_n_c_h = offset_n_c + h_idx * width; int direction = -1; // in [-1, 1], default for LeftPool int index_start = width - 2; // default for LeftPool // pool_type in [2, 3] if (pool_type == 2) { // LeftPool // directly copy the most right value from input to output output[offset_n_c_h + width - 1] = input[offset_n_c_h + width - 1]; } else { // RightPool // directly copy the most left value from input to output output[offset_n_c_h] = input[offset_n_c_h]; index_start = 1; direction = 1; } // do pool for (int w = index_start; w >= 0 && w < width; w += direction) { output[offset_n_c_h + w] = max(output[offset_n_c_h + w - direction], input[offset_n_c_h + w]); } } } template void CornerPoolForwardLauncher(const scalar_t *input, scalar_t *output, const int batch_size, const int channels, const int height, const int width, const int pool_type, cudaStream_t stream) { int nthreads = -1, col_block = -1; switch (pool_type) { case 0: case 1: nthreads = batch_size * channels * width; col_block = GET_BLOCKS(nthreads, THREADS_PER_BLOCK); top_bottom_pool_kernel <<>>( input, output, batch_size, channels, height, width, pool_type); break; case 2: case 3: nthreads = batch_size * channels * height; col_block = GET_BLOCKS(nthreads, THREADS_PER_BLOCK); left_right_pool_kernel <<>>( input, output, batch_size, channels, height, width, pool_type); break; } } void CornerPoolForwardLauncher_float(const float *input, float *output, const int batch_size, const int channels, const int height, const int width, const int pool_type, cudaStream_t stream) { CornerPoolForwardLauncher(input, output, batch_size, channels, height, width, pool_type, stream); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_cuda_helper.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include "common_cuda_helper.hpp" #include "trt_cuda_helper.cuh" #include "trt_plugin_helper.hpp" using mmcv::TensorDesc; template __global__ void copy_permute_kernel(scalar_t *dst, const scalar_t *src, int n, TensorDesc ts_src_stride, TensorDesc ts_dst_stride, TensorDesc ts_permute) { const int src_dim = ts_src_stride.dim; int *src_stride = &(ts_src_stride.stride[0]); int *dst_stride = &(ts_dst_stride.stride[0]); int *permute = &(ts_permute.shape[0]); CUDA_1D_KERNEL_LOOP(index, n) { size_t dst_index = index; size_t src_index = 0; for (int i = 0; i < src_dim; ++i) { int dim_index = dst_index / dst_stride[i]; dst_index = dst_index % dst_stride[i]; src_index += dim_index * src_stride[permute[i]]; } dst[index] = src[src_index]; } } template void memcpyPermute(scalar_t *dst, const scalar_t *src, int *src_size, int *permute, int src_dim, cudaStream_t stream) { size_t copy_size = 1; TensorDesc ts_permute; memcpy(&(ts_permute.shape[0]), permute, src_dim * sizeof(int)); TensorDesc ts_src_stride; TensorDesc ts_dst_stride; ts_src_stride.dim = src_dim; ts_dst_stride.dim = src_dim; int *src_stride = &(ts_src_stride.stride[0]); int *dst_stride = &(ts_dst_stride.stride[0]); int *dst_size = &(ts_dst_stride.shape[0]); src_stride[src_dim - 1] = 1; dst_stride[src_dim - 1] = 1; for (int i = src_dim - 1; i >= 0; --i) { dst_size[i] = src_size[permute[i]]; if (i < src_dim - 1) { src_stride[i] = src_stride[i + 1] * src_size[i + 1]; } } for (int i = src_dim - 1; i >= 0; --i) { copy_size *= dst_size[i]; if (i < src_dim - 1) { dst_stride[i] = dst_stride[i + 1] * dst_size[i + 1]; } } copy_permute_kernel <<>>( dst, src, copy_size, ts_src_stride, ts_dst_stride, ts_permute); } template void memcpyPermute(float *dst, const float *src, int *src_size, int *permute, int src_dim, cudaStream_t stream); template <> cublasStatus_t cublasGemmWrap(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float *alpha, const float *A, int lda, const float *B, int ldb, const float *beta, float *C, int ldc) { return cublasSgemm(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> cublasStatus_t cublasGemmWrap(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const half *alpha, const half *A, int lda, const half *B, int ldb, const half *beta, half *C, int ldc) { return cublasHgemm(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_cummaxmin.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "trt_cummaxmin.hpp" #include #include "trt_serialize.hpp" void CumMaxMinForwardLauncher_float(const float *input, float *output_value, int *output_index, const int *dims, int nbDims, int cum_dim, int cum_type, cudaStream_t stream); void CumMaxMinForwardLauncher_int32(const int *input, int *output_value, int *output_index, const int *dims, int nbDims, int cum_dim, int cum_type, cudaStream_t stream); namespace { static const char *PLUGIN_VERSION{"1"}; static const char *CUMMAXMIN_PLUGIN_NAME{"cummaxmin"}; static const char *CUMMAX_PLUGIN_NAME{"cummax"}; static const char *CUMMIN_PLUGIN_NAME{"cummin"}; } // namespace CumMaxMinPluginDynamic::CumMaxMinPluginDynamic(const std::string &name, int dim, TRT_CUMCMPTYPE cumType) : mLayerName(name), mDim(dim), mCumType(cumType) {} CumMaxMinPluginDynamic::CumMaxMinPluginDynamic(const std::string name, const void *data, size_t length) : mLayerName(name) { deserialize_value(&data, &length, &mDim); deserialize_value(&data, &length, &mCumType); } CumMaxMinPluginDynamic::~CumMaxMinPluginDynamic() {} nvinfer1::IPluginV2DynamicExt *CumMaxMinPluginDynamic::clone() const { CumMaxMinPluginDynamic *plugin = new CumMaxMinPluginDynamic(mLayerName, mDim, mCumType); plugin->setPluginNamespace(getPluginNamespace()); return plugin; } nvinfer1::DimsExprs CumMaxMinPluginDynamic::getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs, nvinfer1::IExprBuilder &exprBuilder) { return inputs[0]; } bool CumMaxMinPluginDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs, int nbOutputs) { switch (pos) { // input[0] case 0: return (inOut[pos].type == nvinfer1::DataType::kFLOAT || inOut[pos].type == nvinfer1::DataType::kINT32) && inOut[pos].format == nvinfer1::TensorFormat::kLINEAR; // output[0] case 1: return inOut[pos].type == inOut[0].type && inOut[pos].format == inOut[0].format; // output[1] case 2: return inOut[pos].type == nvinfer1::DataType::kINT32 && inOut[pos].format == nvinfer1::TensorFormat::kLINEAR; default: return false; } } void CumMaxMinPluginDynamic::configurePlugin( const nvinfer1::DynamicPluginTensorDesc *inputs, int nbInputs, const nvinfer1::DynamicPluginTensorDesc *outputs, int nbOutputs) {} size_t CumMaxMinPluginDynamic::getWorkspaceSize( const nvinfer1::PluginTensorDesc *inputs, int nbInputs, const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const { int sizeof_dtype = mmcv::getElementSize(outputs[0].type); } int CumMaxMinPluginDynamic::enqueue( const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workSpace, cudaStream_t stream) { const void *input = inputs[0]; void *output_value = outputs[0]; int *output_index = (int *)outputs[1]; const int *dims = &(inputDesc[0].dims.d[0]); int nbDims = inputDesc[0].dims.nbDims; switch (inputDesc[0].type) { case nvinfer1::DataType::kFLOAT: CumMaxMinForwardLauncher_float((float *)input, (float *)output_value, output_index, dims, nbDims, mDim, int(mCumType), stream); break; case nvinfer1::DataType::kINT32: CumMaxMinForwardLauncher_int32((int *)input, (int *)output_value, output_index, dims, nbDims, mDim, int(mCumType), stream); break; default: break; } return 0; } nvinfer1::DataType CumMaxMinPluginDynamic::getOutputDataType( int index, const nvinfer1::DataType *inputTypes, int nbInputs) const { switch (index) { case 0: return inputTypes[0]; case 1: return nvinfer1::DataType::kINT32; default: break; } } // IPluginV2 Methods const char *CumMaxMinPluginDynamic::getPluginType() const { switch (mCumType) { case TRT_CUMCMPTYPE::TRT_CUMMAX: return CUMMAX_PLUGIN_NAME; case TRT_CUMCMPTYPE::TRT_CUMMIN: return CUMMIN_PLUGIN_NAME; default: return "UnknownCumType"; } } const char *CumMaxMinPluginDynamic::getPluginVersion() const { return PLUGIN_VERSION; } int CumMaxMinPluginDynamic::getNbOutputs() const { return 2; } int CumMaxMinPluginDynamic::initialize() { return 0; } void CumMaxMinPluginDynamic::terminate() {} size_t CumMaxMinPluginDynamic::getSerializationSize() const { return sizeof(mDim) + sizeof(mCumType); } void CumMaxMinPluginDynamic::serialize(void *buffer) const { serialize_value(&buffer, mDim); serialize_value(&buffer, mCumType); } void CumMaxMinPluginDynamic::destroy() { // This gets called when the network containing plugin is destroyed delete this; } void CumMaxMinPluginDynamic::setPluginNamespace(const char *libNamespace) { mNamespace = libNamespace; } const char *CumMaxMinPluginDynamic::getPluginNamespace() const { return mNamespace.c_str(); } CumMaxMinPluginDynamicCreator::CumMaxMinPluginDynamicCreator( TRT_CUMCMPTYPE cumType) : mCumType(cumType) { mPluginAttributes.clear(); mPluginAttributes.emplace_back(nvinfer1::PluginField("dim")); mFC.nbFields = mPluginAttributes.size(); mFC.fields = mPluginAttributes.data(); } const char *CumMaxMinPluginDynamicCreator::getPluginName() const { return CUMMAXMIN_PLUGIN_NAME; } const char *CumMaxMinPluginDynamicCreator::getPluginVersion() const { return PLUGIN_VERSION; } const nvinfer1::PluginFieldCollection * CumMaxMinPluginDynamicCreator::getFieldNames() { return &mFC; } nvinfer1::IPluginV2 *CumMaxMinPluginDynamicCreator::createPlugin( const char *name, const nvinfer1::PluginFieldCollection *fc) { int dim = 0; for (int i = 0; i < fc->nbFields; i++) { if (fc->fields[i].data == nullptr) { continue; } std::string field_name(fc->fields[i].name); if (field_name.compare("dim") == 0) { dim = static_cast(fc->fields[i].data)[0]; } } CumMaxMinPluginDynamic *plugin = new CumMaxMinPluginDynamic(name, dim, mCumType); plugin->setPluginNamespace(getPluginNamespace()); return plugin; } nvinfer1::IPluginV2 *CumMaxMinPluginDynamicCreator::deserializePlugin( const char *name, const void *serialData, size_t serialLength) { // This object will be deleted when the network is destroyed, which will // call FCPluginDynamic::destroy() auto plugin = new CumMaxMinPluginDynamic(name, serialData, serialLength); plugin->setPluginNamespace(getPluginNamespace()); return plugin; } void CumMaxMinPluginDynamicCreator::setPluginNamespace( const char *libNamespace) { mNamespace = libNamespace; } const char *CumMaxMinPluginDynamicCreator::getPluginNamespace() const { return mNamespace.c_str(); } CumMaxPluginDynamicCreator::CumMaxPluginDynamicCreator() : CumMaxMinPluginDynamicCreator(TRT_CUMCMPTYPE::TRT_CUMMAX) {} const char *CumMaxPluginDynamicCreator::getPluginName() const { return CUMMAX_PLUGIN_NAME; } CumMinPluginDynamicCreator::CumMinPluginDynamicCreator() : CumMaxMinPluginDynamicCreator(TRT_CUMCMPTYPE::TRT_CUMMIN) {} const char *CumMinPluginDynamicCreator::getPluginName() const { return CUMMIN_PLUGIN_NAME; } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_cummaxmin_kernel.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "common_cuda_helper.hpp" #include "trt_cuda_helper.cuh" #include "trt_plugin_helper.hpp" using mmcv::TensorDesc; template __global__ void cummaxmin_kernel(const scalar_t *input, scalar_t *output_value, int *output_index, TensorDesc tensor_desc, int cum_dim, int cum_type) { const size_t cum_size = tensor_desc.shape[cum_dim]; const size_t cum_stride = tensor_desc.stride[cum_dim]; const size_t data_size = tensor_desc.stride[0] * tensor_desc.shape[0] / cum_size; CUDA_1D_KERNEL_LOOP(index, data_size) { size_t cum_offset = index / cum_stride * (cum_size * cum_stride) + index % cum_stride; int cum_index = 0; auto cum_value = input[cum_offset]; output_value[cum_offset] = cum_value; output_index[cum_offset] = cum_index; for (size_t cum_index_current = 1; cum_index_current < cum_size; ++cum_index_current) { cum_offset += cum_stride; const auto cum_value_current = input[cum_offset]; switch (cum_type) { case 0: // max if (cum_value_current > cum_value) { cum_value = cum_value_current; cum_index = cum_index_current; } break; case 1: // min if (cum_value_current < cum_value) { cum_value = cum_value_current; cum_index = cum_index_current; } break; } output_value[cum_offset] = cum_value; output_index[cum_offset] = cum_index; } } } template void CumMaxMinForwardLauncher(const scalar_t *input, scalar_t *output_value, int *output_index, const int *dims, int nbDims, int cum_dim, int cum_type, cudaStream_t stream) { // fill tensordesc and initial TensorDesc tensor_desc; memset((void *)&tensor_desc, 0, sizeof(TensorDesc)); tensor_desc.dim = nbDims; tensor_desc.shape[nbDims - 1] = dims[nbDims - 1]; tensor_desc.stride[nbDims - 1] = 1; for (int i = nbDims - 2; i >= 0; --i) { tensor_desc.shape[i] = dims[i]; tensor_desc.stride[i] = dims[i + 1] * tensor_desc.stride[i + 1]; } // cum dim should be larger than 0 cum_dim = cum_dim >= 0 ? cum_dim : (nbDims + cum_dim); const int data_size = tensor_desc.stride[0] * tensor_desc.shape[0] / tensor_desc.shape[cum_dim]; const int col_block = GET_BLOCKS(data_size, THREADS_PER_BLOCK); cummaxmin_kernel<<>>( input, output_value, output_index, tensor_desc, cum_dim, cum_type); } void CumMaxMinForwardLauncher_float(const float *input, float *output_value, int *output_index, const int *dims, int nbDims, int cum_dim, int cum_type, cudaStream_t stream) { CumMaxMinForwardLauncher(input, output_value, output_index, dims, nbDims, cum_dim, cum_type, stream); } void CumMaxMinForwardLauncher_int32(const int *input, int *output_value, int *output_index, const int *dims, int nbDims, int cum_dim, int cum_type, cudaStream_t stream) { CumMaxMinForwardLauncher(input, output_value, output_index, dims, nbDims, cum_dim, cum_type, stream); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_deform_conv.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "trt_deform_conv.hpp" #include #include #include "trt_serialize.hpp" void DeformConvForwardCUDAKernelLauncher_float( const float *input, const float *weight, const float *offset, float *output, void *workspace, int batchSize, int nInputPlane, int inputHeight, int inputWidth, int nOutputPlane, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, int group, int deformable_group, int im2col_step, cublasHandle_t cublas_handle, cudaStream_t stream); namespace { static const char *PLUGIN_VERSION{"1"}; static const char *PLUGIN_NAME{"MMCVDeformConv2d"}; } // namespace nvinfer1::PluginFieldCollection DeformableConvPluginDynamicCreator::mFC{}; std::vector DeformableConvPluginDynamicCreator::mPluginAttributes; DeformableConvPluginDynamic::DeformableConvPluginDynamic( const std::string &name, const nvinfer1::Dims &stride, const nvinfer1::Dims &padding, const nvinfer1::Dims &dilation, const int deformableGroup, const int group, int im2colStep) : mLayerName(name), mStride(stride), mPadding(padding), mDilation(dilation), mDeformableGroup(deformableGroup), mGroup(group), mIm2colStep(im2colStep) {} DeformableConvPluginDynamic::DeformableConvPluginDynamic(const std::string name, const void *data, size_t length) : mLayerName(name) { deserialize_value(&data, &length, &mStride); deserialize_value(&data, &length, &mPadding); deserialize_value(&data, &length, &mDilation); deserialize_value(&data, &length, &mDeformableGroup); deserialize_value(&data, &length, &mGroup); deserialize_value(&data, &length, &mIm2colStep); } DeformableConvPluginDynamic::~DeformableConvPluginDynamic() {} nvinfer1::IPluginV2DynamicExt *DeformableConvPluginDynamic::clone() const { DeformableConvPluginDynamic *plugin = new DeformableConvPluginDynamic(mLayerName, mStride, mPadding, mDilation, mDeformableGroup, mGroup, mIm2colStep); plugin->setPluginNamespace(getPluginNamespace()); return plugin; } nvinfer1::DimsExprs DeformableConvPluginDynamic::getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs, nvinfer1::IExprBuilder &exprBuilder) { nvinfer1::DimsExprs ret; ret.nbDims = 4; ret.d[0] = inputs[0].d[0]; ret.d[1] = inputs[2].d[0]; ret.d[2] = inputs[1].d[2]; ret.d[3] = inputs[1].d[3]; return ret; } bool DeformableConvPluginDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs, int nbOutputs) { if (pos == 0) { return (inOut[pos].type == nvinfer1::DataType::kFLOAT && inOut[pos].format == nvinfer1::TensorFormat::kLINEAR); } else { return inOut[pos].type == inOut[0].type && inOut[pos].format == inOut[0].format; } } void DeformableConvPluginDynamic::configurePlugin( const nvinfer1::DynamicPluginTensorDesc *inputs, int nbInputs, const nvinfer1::DynamicPluginTensorDesc *outputs, int nbOutputs) {} size_t DeformableConvPluginDynamic::getWorkspaceSize( const nvinfer1::PluginTensorDesc *inputs, int nbInputs, const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const { int sizeof_dtype = mmcv::getElementSize(outputs[0].type); int batch_size = inputs[0].dims.d[0]; int nInputPlane = inputs[0].dims.d[1]; int inputHeight = inputs[0].dims.d[2]; int inputWidth = inputs[0].dims.d[3]; int nOutputPlane = outputs[0].dims.d[1]; int outputHeight = outputs[0].dims.d[2]; int outputWidth = outputs[0].dims.d[3]; int kW = inputs[2].dims.d[2]; int kH = inputs[2].dims.d[3]; int im2col_step = std::min(batch_size, mIm2colStep); size_t col_size = mmcv::getAlignedSize(nInputPlane * kW * kH * im2col_step * outputHeight * outputWidth * sizeof_dtype); size_t out_size = 0; if (im2col_step != 1) out_size = mmcv::getAlignedSize(batch_size * nOutputPlane * outputHeight * outputWidth * sizeof_dtype); return col_size + out_size; } int DeformableConvPluginDynamic::enqueue( const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workSpace, cudaStream_t stream) { int batch_size = inputDesc[0].dims.d[0]; int inputChannel = inputDesc[0].dims.d[1]; int inputHeight = inputDesc[0].dims.d[2]; int inputWidth = inputDesc[0].dims.d[3]; int outputChannel = outputDesc[0].dims.d[1]; int kernelHeight = inputDesc[2].dims.d[2]; int kernelWidth = inputDesc[2].dims.d[3]; const void *x = inputs[0]; const void *offset = inputs[1]; const void *weight = inputs[2]; void *output = outputs[0]; int im2col_step = std::min(batch_size, mIm2colStep); // TODO: add fp16 support auto data_type = inputDesc[0].type; switch (data_type) { case nvinfer1::DataType::kFLOAT: DeformConvForwardCUDAKernelLauncher_float( (float *)x, (float *)weight, (float *)offset, (float *)output, workSpace, batch_size, inputChannel, inputHeight, inputWidth, outputChannel, kernelWidth, kernelHeight, mStride.d[0], mStride.d[1], mPadding.d[0], mPadding.d[1], mDilation.d[0], mDilation.d[1], mGroup, mDeformableGroup, im2col_step, m_cublas_handle, stream); break; default: return 1; break; } return 0; } nvinfer1::DataType DeformableConvPluginDynamic::getOutputDataType( int index, const nvinfer1::DataType *inputTypes, int nbInputs) const { return inputTypes[0]; } // IPluginV2 Methods const char *DeformableConvPluginDynamic::getPluginType() const { return PLUGIN_NAME; } const char *DeformableConvPluginDynamic::getPluginVersion() const { return PLUGIN_VERSION; } int DeformableConvPluginDynamic::getNbOutputs() const { return 1; } int DeformableConvPluginDynamic::initialize() { return 0; } void DeformableConvPluginDynamic::terminate() {} size_t DeformableConvPluginDynamic::getSerializationSize() const { return sizeof(mStride) + sizeof(mPadding) + sizeof(mDilation) + sizeof(mDeformableGroup) + sizeof(mGroup) + sizeof(mIm2colStep); } void DeformableConvPluginDynamic::serialize(void *buffer) const { serialize_value(&buffer, mStride); serialize_value(&buffer, mPadding); serialize_value(&buffer, mDilation); serialize_value(&buffer, mDeformableGroup); serialize_value(&buffer, mGroup); serialize_value(&buffer, mIm2colStep); } void DeformableConvPluginDynamic::destroy() { // This gets called when the network containing plugin is destroyed delete this; } void DeformableConvPluginDynamic::attachToContext( cudnnContext *cudnnContext, cublasContext *cublasContext, nvinfer1::IGpuAllocator *gpuAllocator) { m_cublas_handle = cublasContext; } void DeformableConvPluginDynamic::detachFromContext() {} void DeformableConvPluginDynamic::setPluginNamespace(const char *libNamespace) { mNamespace = libNamespace; } const char *DeformableConvPluginDynamic::getPluginNamespace() const { return mNamespace.c_str(); } ////////////////////// creator ///////////////////////////// DeformableConvPluginDynamicCreator::DeformableConvPluginDynamicCreator() { mPluginAttributes.emplace_back(nvinfer1::PluginField("stride")); mPluginAttributes.emplace_back(nvinfer1::PluginField("padding")); mPluginAttributes.emplace_back(nvinfer1::PluginField("dilation")); mPluginAttributes.emplace_back(nvinfer1::PluginField("groups")); mPluginAttributes.emplace_back(nvinfer1::PluginField("deform_groups")); mPluginAttributes.emplace_back(nvinfer1::PluginField("bias")); mPluginAttributes.emplace_back(nvinfer1::PluginField("im2col_step")); mFC.nbFields = mPluginAttributes.size(); mFC.fields = mPluginAttributes.data(); } const char *DeformableConvPluginDynamicCreator::getPluginName() const { return PLUGIN_NAME; } const char *DeformableConvPluginDynamicCreator::getPluginVersion() const { return PLUGIN_VERSION; } const nvinfer1::PluginFieldCollection * DeformableConvPluginDynamicCreator::getFieldNames() { return &mFC; } nvinfer1::IPluginV2 *DeformableConvPluginDynamicCreator::createPlugin( const char *name, const nvinfer1::PluginFieldCollection *fc) { nvinfer1::Dims stride{2, {1, 1}}; nvinfer1::Dims padding{2, {0, 0}}; nvinfer1::Dims dilation{2, {1, 1}}; int deformableGroup = 1; int group = 1; int im2col_step = 32; for (int i = 0; i < fc->nbFields; i++) { if (fc->fields[i].data == nullptr) { continue; } std::string field_name(fc->fields[i].name); if (field_name.compare("stride") == 0) { stride.nbDims = 2; stride.d[0] = static_cast(fc->fields[i].data)[0]; if (fc->fields[i].length == 1) { stride.d[1] = stride.d[0]; } else { stride.d[1] = static_cast(fc->fields[i].data)[1]; } } if (field_name.compare("padding") == 0) { padding.nbDims = 2; padding.d[0] = static_cast(fc->fields[i].data)[0]; if (fc->fields[i].length == 1) { padding.d[1] = padding.d[0]; } else { padding.d[1] = static_cast(fc->fields[i].data)[1]; } } if (field_name.compare("dilation") == 0) { dilation.nbDims = 2; dilation.d[0] = static_cast(fc->fields[i].data)[0]; if (fc->fields[i].length == 1) { dilation.d[1] = dilation.d[0]; } else { dilation.d[1] = static_cast(fc->fields[i].data)[1]; } } if (field_name.compare("deformable_group") == 0) { deformableGroup = static_cast(fc->fields[i].data)[0]; } if (field_name.compare("group") == 0) { group = static_cast(fc->fields[i].data)[0]; } if (field_name.compare("im2col_step") == 0) { im2col_step = static_cast(fc->fields[i].data)[0]; } } DeformableConvPluginDynamic *plugin = new DeformableConvPluginDynamic( name, stride, padding, dilation, deformableGroup, group, im2col_step); plugin->setPluginNamespace(getPluginNamespace()); return plugin; } nvinfer1::IPluginV2 *DeformableConvPluginDynamicCreator::deserializePlugin( const char *name, const void *serialData, size_t serialLength) { auto plugin = new DeformableConvPluginDynamic(name, serialData, serialLength); plugin->setPluginNamespace(getPluginNamespace()); return plugin; } void DeformableConvPluginDynamicCreator::setPluginNamespace( const char *libNamespace) { mNamespace = libNamespace; } const char *DeformableConvPluginDynamicCreator::getPluginNamespace() const { return mNamespace.c_str(); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_deform_conv_kernel.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include "common_cuda_helper.hpp" #include "deform_conv_cuda_kernel.cuh" #include "trt_cuda_helper.cuh" #include "trt_plugin_helper.hpp" template void trt_deformable_im2col(const T* data_input, const T* data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, T* data_col, cudaStream_t stream) { int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; int num_kernels = channels * height_col * width_col * parallel_imgs; int channel_per_deformable_group = channels / deformable_group; deformable_im2col_gpu_kernel <<>>( num_kernels, data_input, data_offset, height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group, parallel_imgs, channels, deformable_group, height_col, width_col, data_col); cudaCheckError(); } template void DeformConvForwardCUDAKernelLauncher( const scalar_t* input, const scalar_t* weight, const scalar_t* offset, scalar_t* output, void* workspace, int batchSize, int nInputPlane, int inputHeight, int inputWidth, int nOutputPlane, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, int group, int deformable_group, int im2col_step, cublasHandle_t cublas_handle, cudaStream_t stream) { size_t word_size = sizeof(scalar_t); im2col_step = std::min(int(batchSize), im2col_step); long outputWidth = (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; long long columns_size = mmcv::getAlignedSize(nInputPlane * kW * kH * im2col_step * outputHeight * outputWidth * word_size); // column buffer for img2col scalar_t* columns = (scalar_t*)workspace; workspace = workspace + columns_size; scalar_t* output_buffer; long long output_buffer_size = 0; if (im2col_step == 1) { output_buffer = output; } else { // output need permute when im2col_step!=1 output_buffer = (scalar_t*)workspace; output_buffer_size = batchSize * nOutputPlane * outputWidth * outputHeight; } long long input_elt_step = im2col_step * nInputPlane * inputHeight * inputWidth; long long offset_elt_step = im2col_step * deformable_group * 2 * kH * kW * outputHeight * outputWidth; long long out_buffer_step = nOutputPlane * im2col_step * outputHeight * outputWidth; long long col_g_step = nInputPlane * kW * kH / group * im2col_step * outputHeight * outputWidth; long long weight_g_step = nOutputPlane / group * nInputPlane / group * kH * kW; long long out_buffer_g_step = nOutputPlane / group * im2col_step * outputHeight * outputWidth; int m = nOutputPlane / group; int n = im2col_step * outputHeight * outputWidth; int k = nInputPlane / group * kH * kW; scalar_t alpha = 1.; scalar_t beta = 0.; for (int elt = 0; elt < batchSize / im2col_step; elt++) { const scalar_t* input_start = input + elt * input_elt_step; const scalar_t* offset_start = offset + elt * offset_elt_step; trt_deformable_im2col(input_start, offset_start, nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW, dilationH, dilationW, im2col_step, deformable_group, columns, stream); for (int g = 0; g < group; ++g) { const scalar_t* weight_start = weight + g * weight_g_step; scalar_t* col_start = columns + g * col_g_step; scalar_t* out_buffer_start = output_buffer + elt * out_buffer_step + g * out_buffer_g_step; cublasGemmWrap(cublas_handle, CUBLAS_OP_N, CUBLAS_OP_N, n, m, k, &alpha, col_start, n, weight_start, k, &beta, out_buffer_start, n); cudaCheckError(); } } if (im2col_step != 1) { int output_buffer_shape[5] = {batchSize / im2col_step, nOutputPlane, im2col_step, outputHeight, outputWidth}; int output_buffer_permute[5] = {0, 2, 1, 3, 4}; memcpyPermute(output, output_buffer, &output_buffer_shape[0], &output_buffer_permute[0], 5, stream); } } void DeformConvForwardCUDAKernelLauncher_float( const float* input, const float* weight, const float* offset, float* output, void* workspace, int batchSize, int nInputPlane, int inputHeight, int inputWidth, int nOutputPlane, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, int group, int deformable_group, int im2col_step, cublasHandle_t cublas_handle, cudaStream_t stream) { DeformConvForwardCUDAKernelLauncher( input, weight, offset, output, workspace, batchSize, nInputPlane, inputHeight, inputWidth, nOutputPlane, kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group, im2col_step, cublas_handle, stream); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_grid_sampler.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "trt_grid_sampler.hpp" #include #include #include #include "trt_serialize.hpp" using mmcv::GridSamplerInterpolation; using mmcv::GridSamplerPadding; void grid_sample_float(float *output, const float *input, const float *grid, int *output_dims, int *input_dims, int *grid_dims, int nb_dims, GridSamplerInterpolation interp, GridSamplerPadding padding, bool align_corners, cudaStream_t stream); namespace { static const char *PLUGIN_VERSION{"1"}; static const char *PLUGIN_NAME{"grid_sampler"}; } // namespace nvinfer1::PluginFieldCollection GridSamplerDynamicCreator::mFC{}; std::vector GridSamplerDynamicCreator::mPluginAttributes; GridSamplerDynamic::GridSamplerDynamic(const std::string &name, int mode, int paddingMode, bool alignCorners) : mLayerName(name), mMode(mode), mPaddingMode(paddingMode), mAlignCorners(alignCorners) {} GridSamplerDynamic::GridSamplerDynamic(const std::string name, const void *data, size_t length) : mLayerName(name) { deserialize_value(&data, &length, &mMode); deserialize_value(&data, &length, &mPaddingMode); deserialize_value(&data, &length, &mAlignCorners); } nvinfer1::IPluginV2DynamicExt *GridSamplerDynamic::clone() const { GridSamplerDynamic *plugin = new GridSamplerDynamic(mLayerName, mMode, mPaddingMode, mAlignCorners); plugin->setPluginNamespace(getPluginNamespace()); return plugin; } nvinfer1::DimsExprs GridSamplerDynamic::getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs, nvinfer1::IExprBuilder &exprBuilder) { nvinfer1::DimsExprs ret; ret.nbDims = inputs[0].nbDims; ret.d[0] = inputs[0].d[0]; ret.d[1] = inputs[0].d[1]; for (int i = 2; i < ret.nbDims; ++i) { ret.d[i] = inputs[1].d[i - 1]; } return ret; } bool GridSamplerDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs, int nbOutputs) { if (pos == 0) { return (inOut[pos].type == nvinfer1::DataType::kFLOAT && inOut[pos].format == nvinfer1::TensorFormat::kLINEAR); } else { return inOut[pos].type == inOut[0].type && inOut[pos].format == inOut[0].format; } } void GridSamplerDynamic::configurePlugin( const nvinfer1::DynamicPluginTensorDesc *inputs, int nbInputs, const nvinfer1::DynamicPluginTensorDesc *outputs, int nbOutputs) { // Validate input arguments } size_t GridSamplerDynamic::getWorkspaceSize( const nvinfer1::PluginTensorDesc *inputs, int nbInputs, const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const { return 0; } int GridSamplerDynamic::enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workSpace, cudaStream_t stream) { nvinfer1::Dims input_dims = inputDesc[0].dims; nvinfer1::Dims grid_dims = inputDesc[1].dims; nvinfer1::Dims output_dims = outputDesc[0].dims; using mmcv::GridSamplerInterpolation; using mmcv::GridSamplerPadding; GridSamplerInterpolation interp_mode = GridSamplerInterpolation::Bilinear; switch (mMode) { case 0: interp_mode = GridSamplerInterpolation::Bilinear; break; case 1: interp_mode = GridSamplerInterpolation::Nearest; break; default: break; } GridSamplerPadding padding_mode = GridSamplerPadding::Zeros; switch (mPaddingMode) { case 0: padding_mode = GridSamplerPadding::Zeros; break; case 1: padding_mode = GridSamplerPadding::Border; break; case 2: padding_mode = GridSamplerPadding::Reflection; break; default: break; } auto data_type = inputDesc[0].type; switch (data_type) { case nvinfer1::DataType::kFLOAT: grid_sample_float( (float *)outputs[0], (float *)inputs[0], (float *)inputs[1], &(output_dims.d[0]), &(input_dims.d[0]), &(grid_dims.d[0]), input_dims.nbDims, interp_mode, padding_mode, mAlignCorners, stream); break; default: return 1; break; } return 0; } nvinfer1::DataType GridSamplerDynamic::getOutputDataType( int index, const nvinfer1::DataType *inputTypes, int nbInputs) const { return inputTypes[0]; } // IPluginV2 Methods const char *GridSamplerDynamic::getPluginType() const { return PLUGIN_NAME; } const char *GridSamplerDynamic::getPluginVersion() const { return PLUGIN_VERSION; } int GridSamplerDynamic::getNbOutputs() const { return 1; } int GridSamplerDynamic::initialize() { return 0; } void GridSamplerDynamic::terminate() {} size_t GridSamplerDynamic::getSerializationSize() const { return sizeof(mMode) + sizeof(mPaddingMode) + sizeof(mAlignCorners); } void GridSamplerDynamic::serialize(void *buffer) const { serialize_value(&buffer, mMode); serialize_value(&buffer, mPaddingMode); serialize_value(&buffer, mAlignCorners); } void GridSamplerDynamic::destroy() { // This gets called when the network containing plugin is destroyed delete this; } void GridSamplerDynamic::setPluginNamespace(const char *libNamespace) { mNamespace = libNamespace; } const char *GridSamplerDynamic::getPluginNamespace() const { return mNamespace.c_str(); } ////////////////////// creator ///////////////////////////// GridSamplerDynamicCreator::GridSamplerDynamicCreator() { mPluginAttributes.clear(); mPluginAttributes.emplace_back(nvinfer1::PluginField("interpolation_mode")); mPluginAttributes.emplace_back(nvinfer1::PluginField("padding_mode")); mPluginAttributes.emplace_back(nvinfer1::PluginField("align_corners")); mFC.nbFields = mPluginAttributes.size(); mFC.fields = mPluginAttributes.data(); } const char *GridSamplerDynamicCreator::getPluginName() const { return PLUGIN_NAME; } const char *GridSamplerDynamicCreator::getPluginVersion() const { return PLUGIN_VERSION; } const nvinfer1::PluginFieldCollection * GridSamplerDynamicCreator::getFieldNames() { return &mFC; } nvinfer1::IPluginV2 *GridSamplerDynamicCreator::createPlugin( const char *name, const nvinfer1::PluginFieldCollection *fc) { int mode = 0; int paddingMode = 0; bool alignCorners = false; for (int i = 0; i < fc->nbFields; i++) { if (fc->fields[i].data == nullptr) { continue; } std::string field_name(fc->fields[i].name); if (field_name.compare("interpolation_mode") == 0) { mode = static_cast(fc->fields[i].data)[0]; } if (field_name.compare("padding_mode") == 0) { paddingMode = static_cast(fc->fields[i].data)[0]; } if (field_name.compare("align_corners") == 0) { alignCorners = (bool)(static_cast(fc->fields[i].data)[0]); } } GridSamplerDynamic *plugin = new GridSamplerDynamic(name, mode, paddingMode, alignCorners); plugin->setPluginNamespace(getPluginNamespace()); return plugin; } nvinfer1::IPluginV2 *GridSamplerDynamicCreator::deserializePlugin( const char *name, const void *serialData, size_t serialLength) { // This object will be deleted when the network is destroyed, which will // call FCPluginDynamic::destroy() auto plugin = new GridSamplerDynamic(name, serialData, serialLength); plugin->setPluginNamespace(getPluginNamespace()); return plugin; } void GridSamplerDynamicCreator::setPluginNamespace(const char *libNamespace) { mNamespace = libNamespace; } const char *GridSamplerDynamicCreator::getPluginNamespace() const { return mNamespace.c_str(); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_grid_sampler_kernel.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved // modified from // https://github.com/pytorch/pytorch/blob/ec683299ebabf297a3504c76248d37be830e4342/aten/src/ATen/native/cuda/GridSampler.cuh // and // https://github.com/pytorch/pytorch/blob/ec683299ebabf297a3504c76248d37be830e4342/aten/src/ATen/native/cuda/GridSampler.cu #include #include #include #include #include #include "common_cuda_helper.hpp" #include "trt_cuda_helper.cuh" #include "trt_grid_sampler.hpp" #include "trt_plugin_helper.hpp" using mmcv::GridSamplerInterpolation; using mmcv::GridSamplerPadding; using mmcv::TensorDesc; // Unnormalizes a coordinate from the -1 to +1 scale to its pixel index value, // where we view each pixel as an area between (idx - 0.5) and (idx + 0.5). // if align_corners: -1 and +1 get sent to the centers of the corner pixels // -1 --> 0 // +1 --> (size - 1) // scale_factor = (size - 1) / 2 // if not align_corners: -1 and +1 get sent to the image edges // -1 --> -0.5 // +1 --> (size - 1) + 0.5 == size - 0.5 // scale_factor = size / 2 template static __forceinline__ __device__ scalar_t grid_sampler_unnormalize(scalar_t coord, int size, bool align_corners) { if (align_corners) { // unnormalize coord from [-1, 1] to [0, size - 1] return ((coord + 1.f) / 2) * (size - 1); } else { // unnormalize coord from [-1, 1] to [-0.5, size - 0.5] return ((coord + 1.f) * size - 1) / 2; } } // Clips coordinates to between 0 and clip_limit - 1 template static __forceinline__ __device__ scalar_t clip_coordinates(scalar_t in, int clip_limit) { return ::min(static_cast(clip_limit - 1), ::max(in, static_cast(0))); } // Reflects coordinates until they fall between low and high (inclusive). // The bounds are passed as twice their value so that half-integer values // can be represented as ints. template static __forceinline__ __device__ scalar_t reflect_coordinates(scalar_t in, int twice_low, int twice_high) { if (twice_low == twice_high) { return static_cast(0); } scalar_t min = static_cast(twice_low) / 2; scalar_t span = static_cast(twice_high - twice_low) / 2; in = ::fabs(in - min); // `fmod` returns same sign as `in`, which is positive after the `fabs` above. scalar_t extra = ::fmod(in, span); int flips = static_cast(::floor(in / span)); if (flips % 2 == 0) { return extra + min; } else { return span - extra + min; } } template static __forceinline__ __device__ scalar_t safe_downgrade_to_int_range(scalar_t x) { // -100.0 does not have special meaning. This is just to make sure // it's not within_bounds_2d or within_bounds_3d, and does not cause // undefined behavior. See #35506. if (x > INT_MAX - 1 || x < INT_MIN || !::isfinite(static_cast(x))) return static_cast(-100.0); return x; } // Computes the pixel source index value for a grid coordinate template static __forceinline__ __device__ scalar_t grid_sampler_compute_source_index( scalar_t coord, int size, GridSamplerPadding padding_mode, bool align_corners) { coord = grid_sampler_unnormalize(coord, size, align_corners); if (padding_mode == GridSamplerPadding::Border) { // clip coordinates to image borders coord = clip_coordinates(coord, size); } else if (padding_mode == GridSamplerPadding::Reflection) { // reflect coordinates by image borders if (align_corners) { coord = reflect_coordinates(coord, 0, 2 * (size - 1)); } else { coord = reflect_coordinates(coord, -1, 2 * size - 1); } // clip coordinates to image borders coord = clip_coordinates(coord, size); } coord = safe_downgrade_to_int_range(coord); return coord; } static __forceinline__ __device__ bool within_bounds_2d(int h, int w, int H, int W) { return h >= 0 && h < H && w >= 0 && w < W; } static __forceinline__ __device__ bool within_bounds_3d(int d, int h, int w, int D, int H, int W) { return d >= 0 && d < D && h >= 0 && h < H && w >= 0 && w < W; } template __global__ void grid_sampler_2d_kernel( const int nthreads, const scalar_t *input, const scalar_t *grid, scalar_t *output, TensorDesc input_desc, TensorDesc grid_desc, TensorDesc output_desc, const GridSamplerInterpolation interpolation_mode, const GridSamplerPadding padding_mode, bool align_corners) { int C = input_desc.shape[1]; int inp_H = input_desc.shape[2]; int inp_W = input_desc.shape[3]; int out_H = grid_desc.shape[1]; int out_W = grid_desc.shape[2]; int inp_sN = input_desc.stride[0]; int inp_sC = input_desc.stride[1]; int inp_sH = input_desc.stride[2]; int inp_sW = input_desc.stride[3]; int grid_sN = grid_desc.stride[0]; int grid_sH = grid_desc.stride[1]; int grid_sW = grid_desc.stride[2]; int grid_sCoor = grid_desc.stride[3]; int out_sN = output_desc.stride[0]; int out_sC = output_desc.stride[1]; int out_sH = output_desc.stride[2]; int out_sW = output_desc.stride[3]; CUDA_1D_KERNEL_LOOP(index, nthreads) { const int w = index % out_W; const int h = (index / out_W) % out_H; const int n = index / (out_H * out_W); const int grid_offset = n * grid_sN + h * grid_sH + w * grid_sW; // get the corresponding input x, y coordinates from grid scalar_t ix = grid[grid_offset]; scalar_t iy = grid[grid_offset + grid_sCoor]; ix = grid_sampler_compute_source_index(ix, inp_W, padding_mode, align_corners); iy = grid_sampler_compute_source_index(iy, inp_H, padding_mode, align_corners); if (interpolation_mode == GridSamplerInterpolation::Bilinear) { // get NE, NW, SE, SW pixel values from (x, y) int ix_nw = static_cast(::floor(ix)); int iy_nw = static_cast(::floor(iy)); int ix_ne = ix_nw + 1; int iy_ne = iy_nw; int ix_sw = ix_nw; int iy_sw = iy_nw + 1; int ix_se = ix_nw + 1; int iy_se = iy_nw + 1; // get surfaces to each neighbor: scalar_t nw = (ix_se - ix) * (iy_se - iy); scalar_t ne = (ix - ix_sw) * (iy_sw - iy); scalar_t sw = (ix_ne - ix) * (iy - iy_ne); scalar_t se = (ix - ix_nw) * (iy - iy_nw); // calculate bilinear weighted pixel value and set output pixel auto inp_ptr_NC = input + n * inp_sN; auto out_ptr_NCHW = output + n * out_sN + h * out_sH + w * out_sW; for (int c = 0; c < C; ++c, inp_ptr_NC += inp_sC, out_ptr_NCHW += out_sC) { *out_ptr_NCHW = static_cast(0); if (within_bounds_2d(iy_nw, ix_nw, inp_H, inp_W)) { *out_ptr_NCHW += inp_ptr_NC[iy_nw * inp_sH + ix_nw * inp_sW] * nw; } if (within_bounds_2d(iy_ne, ix_ne, inp_H, inp_W)) { *out_ptr_NCHW += inp_ptr_NC[iy_ne * inp_sH + ix_ne * inp_sW] * ne; } if (within_bounds_2d(iy_sw, ix_sw, inp_H, inp_W)) { *out_ptr_NCHW += inp_ptr_NC[iy_sw * inp_sH + ix_sw * inp_sW] * sw; } if (within_bounds_2d(iy_se, ix_se, inp_H, inp_W)) { *out_ptr_NCHW += inp_ptr_NC[iy_se * inp_sH + ix_se * inp_sW] * se; } } } else if (interpolation_mode == GridSamplerInterpolation::Nearest) { int ix_nearest = static_cast(::round(ix)); int iy_nearest = static_cast(::round(iy)); // assign nearest neighbor pixel value to output pixel auto inp_ptr_NC = input + n * inp_sN; auto out_ptr_NCHW = output + n * out_sN + h * out_sH + w * out_sW; for (int c = 0; c < C; ++c, inp_ptr_NC += inp_sC, out_ptr_NCHW += out_sC) { if (within_bounds_2d(iy_nearest, ix_nearest, inp_H, inp_W)) { *out_ptr_NCHW = inp_ptr_NC[iy_nearest * inp_sH + ix_nearest * inp_sW]; } else { *out_ptr_NCHW = static_cast(0); } } } } } template __global__ void grid_sampler_3d_kernel( const int nthreads, const scalar_t *input, const scalar_t *grid, scalar_t *output, TensorDesc input_desc, TensorDesc grid_desc, TensorDesc output_desc, const GridSamplerInterpolation interpolation_mode, const GridSamplerPadding padding_mode, bool align_corners) { int C = input_desc.shape[1]; int inp_D = input_desc.shape[2]; int inp_H = input_desc.shape[3]; int inp_W = input_desc.shape[4]; int out_D = grid_desc.shape[1]; int out_H = grid_desc.shape[2]; int out_W = grid_desc.shape[3]; int inp_sN = input_desc.stride[0]; int inp_sC = input_desc.stride[1]; int inp_sD = input_desc.stride[2]; int inp_sH = input_desc.stride[3]; int inp_sW = input_desc.stride[4]; int grid_sN = grid_desc.stride[0]; int grid_sD = grid_desc.stride[1]; int grid_sH = grid_desc.stride[2]; int grid_sW = grid_desc.stride[3]; int grid_sCoor = grid_desc.stride[4]; int out_sN = output_desc.stride[0]; int out_sC = output_desc.stride[1]; int out_sD = output_desc.stride[2]; int out_sH = output_desc.stride[3]; int out_sW = output_desc.stride[4]; CUDA_1D_KERNEL_LOOP(index, nthreads) { const int w = index % out_W; const int h = (index / out_W) % out_H; const int d = (index / (out_H * out_W)) % out_D; const int n = index / (out_D * out_H * out_W); const int grid_offset = n * grid_sN + d * grid_sD + h * grid_sH + w * grid_sW; // get the corresponding input x, y, z coordinates from grid scalar_t ix = grid[grid_offset]; scalar_t iy = grid[grid_offset + grid_sCoor]; scalar_t iz = grid[grid_offset + 2 * grid_sCoor]; ix = grid_sampler_compute_source_index(ix, inp_W, padding_mode, align_corners); iy = grid_sampler_compute_source_index(iy, inp_H, padding_mode, align_corners); iz = grid_sampler_compute_source_index(iz, inp_D, padding_mode, align_corners); if (interpolation_mode == GridSamplerInterpolation::Bilinear) { // get corner pixel values from (x, y, z) // for 4d, we used north-east-south-west // for 5d, we add top-bottom int ix_tnw = static_cast(::floor(ix)); int iy_tnw = static_cast(::floor(iy)); int iz_tnw = static_cast(::floor(iz)); int ix_tne = ix_tnw + 1; int iy_tne = iy_tnw; int iz_tne = iz_tnw; int ix_tsw = ix_tnw; int iy_tsw = iy_tnw + 1; int iz_tsw = iz_tnw; int ix_tse = ix_tnw + 1; int iy_tse = iy_tnw + 1; int iz_tse = iz_tnw; int ix_bnw = ix_tnw; int iy_bnw = iy_tnw; int iz_bnw = iz_tnw + 1; int ix_bne = ix_tnw + 1; int iy_bne = iy_tnw; int iz_bne = iz_tnw + 1; int ix_bsw = ix_tnw; int iy_bsw = iy_tnw + 1; int iz_bsw = iz_tnw + 1; int ix_bse = ix_tnw + 1; int iy_bse = iy_tnw + 1; int iz_bse = iz_tnw + 1; // get surfaces to each neighbor: scalar_t tnw = (ix_bse - ix) * (iy_bse - iy) * (iz_bse - iz); scalar_t tne = (ix - ix_bsw) * (iy_bsw - iy) * (iz_bsw - iz); scalar_t tsw = (ix_bne - ix) * (iy - iy_bne) * (iz_bne - iz); scalar_t tse = (ix - ix_bnw) * (iy - iy_bnw) * (iz_bnw - iz); scalar_t bnw = (ix_tse - ix) * (iy_tse - iy) * (iz - iz_tse); scalar_t bne = (ix - ix_tsw) * (iy_tsw - iy) * (iz - iz_tsw); scalar_t bsw = (ix_tne - ix) * (iy - iy_tne) * (iz - iz_tne); scalar_t bse = (ix - ix_tnw) * (iy - iy_tnw) * (iz - iz_tnw); auto inp_ptr_NC = input + n * inp_sN; auto out_ptr_NCDHW = output + n * out_sN + d * out_sD + h * out_sH + w * out_sW; for (int c = 0; c < C; ++c, inp_ptr_NC += inp_sC, out_ptr_NCDHW += out_sC) { // (c, iz_tnw, iy_tnw, ix_tnw) * tnw + (c, iz_tne, iy_tne, ix_tne) * // tne // + (c, iz_tsw, iy_tsw, ix_tsw) * tsw + (c, iz_tse, iy_tse, ix_tse) * // tse // + (c, iz_bnw, iy_bnw, ix_bnw) * bnw + (c, iz_bne, iy_bne, ix_bne) * // bne // + (c, iz_bsw, iy_bsw, ix_bsw) * bsw + (c, iz_bse, iy_bse, ix_bse) * // bse *out_ptr_NCDHW = static_cast(0); if (within_bounds_3d(iz_tnw, iy_tnw, ix_tnw, inp_D, inp_H, inp_W)) { *out_ptr_NCDHW += inp_ptr_NC[iz_tnw * inp_sD + iy_tnw * inp_sH + ix_tnw * inp_sW] * tnw; } if (within_bounds_3d(iz_tne, iy_tne, ix_tne, inp_D, inp_H, inp_W)) { *out_ptr_NCDHW += inp_ptr_NC[iz_tne * inp_sD + iy_tne * inp_sH + ix_tne * inp_sW] * tne; } if (within_bounds_3d(iz_tsw, iy_tsw, ix_tsw, inp_D, inp_H, inp_W)) { *out_ptr_NCDHW += inp_ptr_NC[iz_tsw * inp_sD + iy_tsw * inp_sH + ix_tsw * inp_sW] * tsw; } if (within_bounds_3d(iz_tse, iy_tse, ix_tse, inp_D, inp_H, inp_W)) { *out_ptr_NCDHW += inp_ptr_NC[iz_tse * inp_sD + iy_tse * inp_sH + ix_tse * inp_sW] * tse; } if (within_bounds_3d(iz_bnw, iy_bnw, ix_bnw, inp_D, inp_H, inp_W)) { *out_ptr_NCDHW += inp_ptr_NC[iz_bnw * inp_sD + iy_bnw * inp_sH + ix_bnw * inp_sW] * bnw; } if (within_bounds_3d(iz_bne, iy_bne, ix_bne, inp_D, inp_H, inp_W)) { *out_ptr_NCDHW += inp_ptr_NC[iz_bne * inp_sD + iy_bne * inp_sH + ix_bne * inp_sW] * bne; } if (within_bounds_3d(iz_bsw, iy_bsw, ix_bsw, inp_D, inp_H, inp_W)) { *out_ptr_NCDHW += inp_ptr_NC[iz_bsw * inp_sD + iy_bsw * inp_sH + ix_bsw * inp_sW] * bsw; } if (within_bounds_3d(iz_bse, iy_bse, ix_bse, inp_D, inp_H, inp_W)) { *out_ptr_NCDHW += inp_ptr_NC[iz_bse * inp_sD + iy_bse * inp_sH + ix_bse * inp_sW] * bse; } } } else if (interpolation_mode == GridSamplerInterpolation::Nearest) { int ix_nearest = static_cast(::round(ix)); int iy_nearest = static_cast(::round(iy)); int iz_nearest = static_cast(::round(iz)); // assign nearest neighbor pixel value to output pixel auto inp_ptr_NC = input + n * inp_sN; auto out_ptr_NCDHW = output + n * out_sN + d * out_sD + h * out_sH + w * out_sW; for (int c = 0; c < C; ++c, inp_ptr_NC += inp_sC, out_ptr_NCDHW += out_sC) { if (within_bounds_3d(iz_nearest, iy_nearest, ix_nearest, inp_D, inp_H, inp_W)) { *out_ptr_NCDHW = inp_ptr_NC[iz_nearest * inp_sD + iy_nearest * inp_sH + ix_nearest * inp_sW]; } else { *out_ptr_NCDHW = static_cast(0); } } } } } void create_desc(const int *dims, int nb_dims, TensorDesc &desc) { memcpy(&desc.shape[0], dims, sizeof(int) * nb_dims); desc.stride[nb_dims - 1] = 1; for (int i = nb_dims - 2; i >= 0; --i) { desc.stride[i] = desc.stride[i + 1] * desc.shape[i + 1]; } } template void grid_sample(T *output, const T *input, const T *grid, int *output_dims, int *input_dims, int *grid_dims, int nb_dims, GridSamplerInterpolation interp, GridSamplerPadding padding, bool align_corners, cudaStream_t stream) { TensorDesc input_desc; create_desc(input_dims, nb_dims, input_desc); TensorDesc output_desc; create_desc(output_dims, nb_dims, output_desc); TensorDesc grid_desc; create_desc(grid_dims, nb_dims, grid_desc); int count = 1; for (int i = 0; i < nb_dims; ++i) { if (i == 1) { continue; } count *= output_desc.shape[i]; } if (nb_dims == 4) { grid_sampler_2d_kernel <<>>( count, input, grid, output, input_desc, grid_desc, output_desc, interp, padding, align_corners); } else if (nb_dims == 5) { grid_sampler_3d_kernel <<>>( count, input, grid, output, input_desc, grid_desc, output_desc, interp, padding, align_corners); } else { printf("input and grid dims should be 4 or 5\n"); } } void grid_sample_float(float *output, const float *input, const float *grid, int *output_dims, int *input_dims, int *grid_dims, int nb_dims, GridSamplerInterpolation interp, GridSamplerPadding padding, bool align_corners, cudaStream_t stream) { grid_sample(output, input, grid, output_dims, input_dims, grid_dims, nb_dims, interp, padding, align_corners, stream); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_instance_norm.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved // Modified from: // https://github.com/NVIDIA/TensorRT/blob/master/plugin/instanceNormalizationPlugin/instanceNormalizationPlugin.cpp #include "trt_instance_norm.hpp" #include #include #include "trt_serialize.hpp" using namespace nvinfer1; cudnnStatus_t convert_trt2cudnn_dtype(nvinfer1::DataType trt_dtype, cudnnDataType_t* cudnn_dtype) { switch (trt_dtype) { case nvinfer1::DataType::kFLOAT: *cudnn_dtype = CUDNN_DATA_FLOAT; break; case nvinfer1::DataType::kHALF: *cudnn_dtype = CUDNN_DATA_HALF; break; default: return CUDNN_STATUS_BAD_PARAM; } return CUDNN_STATUS_SUCCESS; } namespace { constexpr const char* PLUGIN_VERSION{"1"}; constexpr const char* PLUGIN_NAME{"MMCVInstanceNormalization"}; } // namespace PluginFieldCollection InstanceNormalizationDynamicCreator::mFC{}; std::vector InstanceNormalizationDynamicCreator::mPluginAttributes; InstanceNormalizationDynamic::InstanceNormalizationDynamic( const std::string& name, float epsilon) : mLayerName(name), mEpsilon(epsilon) {} InstanceNormalizationDynamic::InstanceNormalizationDynamic( const std::string& name, void const* serialData, size_t serialLength) : mLayerName(name) { deserialize_value(&serialData, &serialLength, &mEpsilon); } InstanceNormalizationDynamic::~InstanceNormalizationDynamic() {} // InstanceNormalizationDynamic returns one output. int InstanceNormalizationDynamic::getNbOutputs() const { return 1; } DimsExprs InstanceNormalizationDynamic::getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs, nvinfer1::IExprBuilder& exprBuilder) { nvinfer1::DimsExprs output(inputs[0]); return output; } int InstanceNormalizationDynamic::initialize() { return 0; } void InstanceNormalizationDynamic::terminate() {} size_t InstanceNormalizationDynamic::getWorkspaceSize( const nvinfer1::PluginTensorDesc* inputs, int nbInputs, const nvinfer1::PluginTensorDesc* outputs, int nbOutputs) const { int n = inputs[0].dims.d[0]; int c = inputs[0].dims.d[1]; int elem_size = mmcv::getElementSize(inputs[1].type); return mmcv::getAlignedSize(n * c * elem_size) * 2; } int InstanceNormalizationDynamic::enqueue( const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) { nvinfer1::Dims input_dims = inputDesc[0].dims; int n = input_dims.d[0]; int c = input_dims.d[1]; int h = input_dims.d[2]; int w = input_dims.nbDims > 3 ? input_dims.d[3] : 1; int elem_size = mmcv::getElementSize(inputDesc[1].type); void* n_scales = (void*)workspace; void* n_bias = (void*)(workspace + mmcv::getAlignedSize(n * c * elem_size)); const void* scales = (const void*)inputs[1]; const void* bias = (const void*)inputs[2]; for (int i = 0; i < n; ++i) { cudaMemcpyAsync(n_scales + i * c * elem_size, scales, c * elem_size, cudaMemcpyDeviceToDevice, stream); cudaMemcpyAsync(n_bias + i * c * elem_size, bias, c * elem_size, cudaMemcpyDeviceToDevice, stream); } cudnnSetTensor4dDescriptor(_b_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, n * c, 1, 1); cudnnDataType_t cudnn_dtype{}; convert_trt2cudnn_dtype(inputDesc[0].type, &cudnn_dtype); cudnnSetTensor4dDescriptor(_x_desc, CUDNN_TENSOR_NCHW, cudnn_dtype, 1, n * c, h, w); cudnnSetTensor4dDescriptor(_y_desc, CUDNN_TENSOR_NCHW, cudnn_dtype, 1, n * c, h, w); float alpha = 1; float beta = 0; void const* x_ptr = inputs[0]; void* y_ptr = outputs[0]; cudnnSetStream(_cudnn_handle, stream); // Note: Use of CUDNN_BATCHNORM_SPATIAL_PERSISTENT can cause numerical // overflows (NaNs) for fp32 data in some circumstances. The lower- // performance CUDNN_BATCHNORM_SPATIAL should be used if this is not // acceptable. cudnnBatchNormalizationForwardTraining( _cudnn_handle, CUDNN_BATCHNORM_SPATIAL_PERSISTENT, &alpha, &beta, _x_desc, x_ptr, _y_desc, y_ptr, _b_desc, n_scales, n_bias, 1., nullptr, nullptr, mEpsilon, nullptr, nullptr); return 0; } size_t InstanceNormalizationDynamic::getSerializationSize() const { return serialized_size(mEpsilon); } void InstanceNormalizationDynamic::serialize(void* buffer) const { serialize_value(&buffer, mEpsilon); } bool InstanceNormalizationDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc* inOut, int nbInputs, int nbOutputs) { return ((inOut[pos].type == nvinfer1::DataType::kFLOAT || inOut[pos].type == nvinfer1::DataType::kHALF) && inOut[pos].format == nvinfer1::PluginFormat::kLINEAR && inOut[pos].type == inOut[0].type); } const char* InstanceNormalizationDynamic::getPluginType() const { return PLUGIN_NAME; } const char* InstanceNormalizationDynamic::getPluginVersion() const { return PLUGIN_VERSION; } void InstanceNormalizationDynamic::destroy() { delete this; } IPluginV2DynamicExt* InstanceNormalizationDynamic::clone() const { auto* plugin = new InstanceNormalizationDynamic{mLayerName, mEpsilon}; plugin->setPluginNamespace(mPluginNamespace.c_str()); return plugin; } // Set plugin namespace void InstanceNormalizationDynamic::setPluginNamespace( const char* pluginNamespace) { mPluginNamespace = pluginNamespace; } const char* InstanceNormalizationDynamic::getPluginNamespace() const { return mPluginNamespace.c_str(); } nvinfer1::DataType InstanceNormalizationDynamic::getOutputDataType( int index, const nvinfer1::DataType* inputTypes, int nbInputs) const { return inputTypes[0]; } // Attach the plugin object to an execution context and grant the plugin the // access to some context resource. void InstanceNormalizationDynamic::attachToContext( cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) { _cudnn_handle = cudnnContext; cudnnCreateTensorDescriptor(&_b_desc); cudnnCreateTensorDescriptor(&_x_desc); cudnnCreateTensorDescriptor(&_y_desc); } // Detach the plugin object from its execution context. void InstanceNormalizationDynamic::detachFromContext() { cudnnDestroyTensorDescriptor(_y_desc); cudnnDestroyTensorDescriptor(_x_desc); cudnnDestroyTensorDescriptor(_b_desc); } void InstanceNormalizationDynamic::configurePlugin( const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) {} // InstanceNormalizationDynamicCreator methods InstanceNormalizationDynamicCreator::InstanceNormalizationDynamicCreator() { mPluginAttributes.clear(); mPluginAttributes.emplace_back( PluginField("epsilon", nullptr, PluginFieldType::kFLOAT32, 1)); mFC.nbFields = mPluginAttributes.size(); mFC.fields = mPluginAttributes.data(); } const char* InstanceNormalizationDynamicCreator::getPluginName() const { return PLUGIN_NAME; } const char* InstanceNormalizationDynamicCreator::getPluginVersion() const { return PLUGIN_VERSION; } const PluginFieldCollection* InstanceNormalizationDynamicCreator::getFieldNames() { return &mFC; } IPluginV2DynamicExt* InstanceNormalizationDynamicCreator::createPlugin( const char* name, const nvinfer1::PluginFieldCollection* fc) { float epsilon = 1e-5; const PluginField* fields = fc->fields; for (int i = 0; i < fc->nbFields; ++i) { const char* attrName = fields[i].name; if (!strcmp(attrName, "epsilon")) { epsilon = *(static_cast(fields[i].data)); } } InstanceNormalizationDynamic* obj = new InstanceNormalizationDynamic(name, epsilon); obj->setPluginNamespace(mNamespace.c_str()); return obj; } IPluginV2DynamicExt* InstanceNormalizationDynamicCreator::deserializePlugin( const char* name, const void* serialData, size_t serialLength) { InstanceNormalizationDynamic* obj = new InstanceNormalizationDynamic{name, serialData, serialLength}; obj->setPluginNamespace(mNamespace.c_str()); return obj; } void InstanceNormalizationDynamicCreator::setPluginNamespace( const char* libNamespace) { mNamespace = libNamespace; } const char* InstanceNormalizationDynamicCreator::getPluginNamespace() const { return mNamespace.c_str(); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_modulated_deform_conv.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "trt_modulated_deform_conv.hpp" #include #include #include "trt_serialize.hpp" void ModulatedDeformConvForwardCUDAKernelLauncher_float( const float *input, const float *weight, const float *bias, const float *offset, const float *mask, float *output, void *workspace, int batch, int channels, int height, int width, int channels_out, int kernel_w, int kernel_h, int stride_w, int stride_h, int pad_w, int pad_h, int dilation_w, int dilation_h, int group, int deformable_group, int im2col_step, cublasHandle_t cublas_handle, cudaStream_t stream); namespace { static const char *PLUGIN_VERSION{"1"}; static const char *PLUGIN_NAME{"MMCVModulatedDeformConv2d"}; } // namespace nvinfer1::PluginFieldCollection ModulatedDeformableConvPluginDynamicCreator::mFC{}; std::vector ModulatedDeformableConvPluginDynamicCreator::mPluginAttributes; ModulatedDeformableConvPluginDynamic::ModulatedDeformableConvPluginDynamic( const std::string &name, const nvinfer1::Dims stride, const nvinfer1::Dims padding, const nvinfer1::Dims dilation, const int deformableGroup, const int group) : mLayerName(name), mStride(stride), mPadding(padding), mDilation(dilation), mDeformableGroup(deformableGroup), mGroup(group) { mWithBias = false; } ModulatedDeformableConvPluginDynamic::ModulatedDeformableConvPluginDynamic( const std::string name, const void *data, size_t length) : mLayerName(name) { deserialize_value(&data, &length, &mStride); deserialize_value(&data, &length, &mPadding); deserialize_value(&data, &length, &mDilation); deserialize_value(&data, &length, &mDeformableGroup); deserialize_value(&data, &length, &mGroup); mWithBias = false; } ModulatedDeformableConvPluginDynamic::~ModulatedDeformableConvPluginDynamic() {} nvinfer1::IPluginV2DynamicExt *ModulatedDeformableConvPluginDynamic::clone() const { ModulatedDeformableConvPluginDynamic *plugin = new ModulatedDeformableConvPluginDynamic( mLayerName, mStride, mPadding, mDilation, mDeformableGroup, mGroup); plugin->setPluginNamespace(getPluginNamespace()); return plugin; } nvinfer1::DimsExprs ModulatedDeformableConvPluginDynamic::getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs, nvinfer1::IExprBuilder &exprBuilder) { nvinfer1::DimsExprs ret; ret.nbDims = 4; ret.d[0] = inputs[0].d[0]; ret.d[1] = inputs[3].d[0]; ret.d[2] = inputs[1].d[2]; ret.d[3] = inputs[1].d[3]; return ret; } bool ModulatedDeformableConvPluginDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs, int nbOutputs) { if (pos == 0) { return (inOut[pos].type == nvinfer1::DataType::kFLOAT && inOut[pos].format == nvinfer1::TensorFormat::kLINEAR); } else { return inOut[pos].type == inOut[0].type && inOut[pos].format == inOut[0].format; } } void ModulatedDeformableConvPluginDynamic::configurePlugin( const nvinfer1::DynamicPluginTensorDesc *inputs, int nbInputs, const nvinfer1::DynamicPluginTensorDesc *outputs, int nbOutputs) { if (nbInputs == 5) { mWithBias = true; } } size_t ModulatedDeformableConvPluginDynamic::getWorkspaceSize( const nvinfer1::PluginTensorDesc *inputs, int nbInputs, const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const { int sizeof_dtype = mmcv::getElementSize(outputs[0].type); int batch_size = inputs[0].dims.d[0]; int nInputPlane = inputs[0].dims.d[1]; int inputHeight = inputs[0].dims.d[2]; int inputWidth = inputs[0].dims.d[3]; int nOutputPlane = outputs[0].dims.d[1]; int outputHeight = outputs[0].dims.d[2]; int outputWidth = outputs[0].dims.d[3]; int kW = inputs[3].dims.d[2]; int kH = inputs[3].dims.d[3]; int im2col_step = std::min(32, batch_size); size_t col_size = mmcv::getAlignedSize(nInputPlane * kW * kH * outputHeight * outputWidth * sizeof_dtype); return col_size; } int ModulatedDeformableConvPluginDynamic::enqueue( const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workSpace, cudaStream_t stream) { int batch = inputDesc[0].dims.d[0]; int channels = inputDesc[0].dims.d[1]; int height = inputDesc[0].dims.d[2]; int width = inputDesc[0].dims.d[3]; int channels_out = outputDesc[0].dims.d[1]; int kernel_h = inputDesc[3].dims.d[2]; int kernel_w = inputDesc[3].dims.d[3]; const void *x = inputs[0]; const void *offset = inputs[1]; const void *mask = inputs[2]; const void *weight = inputs[3]; const void *bias = mWithBias ? inputs[4] : nullptr; void *output = outputs[0]; int im2col_step = std::min(batch, 32); // TODO: add fp16 support auto data_type = inputDesc[0].type; switch (data_type) { case nvinfer1::DataType::kFLOAT: ModulatedDeformConvForwardCUDAKernelLauncher_float( (float *)x, (float *)weight, (float *)bias, (float *)offset, (float *)mask, (float *)output, workSpace, batch, channels, height, width, channels_out, kernel_w, kernel_h, mStride.d[0], mStride.d[1], mPadding.d[0], mPadding.d[1], mDilation.d[0], mDilation.d[1], mGroup, mDeformableGroup, im2col_step, m_cublas_handle, stream); break; default: return 1; break; } return 0; } nvinfer1::DataType ModulatedDeformableConvPluginDynamic::getOutputDataType( int index, const nvinfer1::DataType *inputTypes, int nbInputs) const { return inputTypes[0]; } // IPluginV2 Methods const char *ModulatedDeformableConvPluginDynamic::getPluginType() const { return PLUGIN_NAME; } const char *ModulatedDeformableConvPluginDynamic::getPluginVersion() const { return PLUGIN_VERSION; } int ModulatedDeformableConvPluginDynamic::getNbOutputs() const { return 1; } int ModulatedDeformableConvPluginDynamic::initialize() { return 0; } void ModulatedDeformableConvPluginDynamic::terminate() {} size_t ModulatedDeformableConvPluginDynamic::getSerializationSize() const { return sizeof(mStride) + sizeof(mPadding) + sizeof(mDilation) + sizeof(mDeformableGroup) + sizeof(mGroup); } void ModulatedDeformableConvPluginDynamic::serialize(void *buffer) const { serialize_value(&buffer, mStride); serialize_value(&buffer, mPadding); serialize_value(&buffer, mDilation); serialize_value(&buffer, mDeformableGroup); serialize_value(&buffer, mGroup); } void ModulatedDeformableConvPluginDynamic::destroy() { // This gets called when the network containing plugin is destroyed delete this; } void ModulatedDeformableConvPluginDynamic::attachToContext( cudnnContext *cudnnContext, cublasContext *cublasContext, nvinfer1::IGpuAllocator *gpuAllocator) { m_cublas_handle = cublasContext; } void ModulatedDeformableConvPluginDynamic::detachFromContext() {} void ModulatedDeformableConvPluginDynamic::setPluginNamespace( const char *libNamespace) { mNamespace = libNamespace; } const char *ModulatedDeformableConvPluginDynamic::getPluginNamespace() const { return mNamespace.c_str(); } ////////////////////// creator ///////////////////////////// ModulatedDeformableConvPluginDynamicCreator:: ModulatedDeformableConvPluginDynamicCreator() { mPluginAttributes.emplace_back(nvinfer1::PluginField("stride")); mPluginAttributes.emplace_back(nvinfer1::PluginField("padding")); mPluginAttributes.emplace_back(nvinfer1::PluginField("dilation")); mPluginAttributes.emplace_back(nvinfer1::PluginField("groups")); mPluginAttributes.emplace_back(nvinfer1::PluginField("deform_groups")); mFC.nbFields = mPluginAttributes.size(); mFC.fields = mPluginAttributes.data(); } const char *ModulatedDeformableConvPluginDynamicCreator::getPluginName() const { return PLUGIN_NAME; } const char *ModulatedDeformableConvPluginDynamicCreator::getPluginVersion() const { return PLUGIN_VERSION; } const nvinfer1::PluginFieldCollection * ModulatedDeformableConvPluginDynamicCreator::getFieldNames() { return &mFC; } nvinfer1::IPluginV2 *ModulatedDeformableConvPluginDynamicCreator::createPlugin( const char *name, const nvinfer1::PluginFieldCollection *fc) { nvinfer1::Dims stride{2, {1, 1}}; nvinfer1::Dims padding{2, {0, 0}}; nvinfer1::Dims dilation{2, {1, 1}}; int deformableGroup = 1; int group = 1; for (int i = 0; i < fc->nbFields; i++) { if (fc->fields[i].data == nullptr) { continue; } std::string field_name(fc->fields[i].name); if (field_name.compare("deformable_group") == 0) { deformableGroup = static_cast(fc->fields[i].data)[0]; } if (field_name.compare("group") == 0) { group = static_cast(fc->fields[i].data)[0]; } if (field_name.compare("stride") == 0) { stride.nbDims = 2; stride.d[0] = static_cast(fc->fields[i].data)[0]; stride.d[1] = static_cast(fc->fields[i].data)[1]; } if (field_name.compare("padding") == 0) { padding.nbDims = 2; padding.d[0] = static_cast(fc->fields[i].data)[0]; padding.d[1] = static_cast(fc->fields[i].data)[1]; } if (field_name.compare("dilation") == 0) { dilation.nbDims = 2; dilation.d[0] = static_cast(fc->fields[i].data)[0]; dilation.d[1] = static_cast(fc->fields[i].data)[1]; } } ModulatedDeformableConvPluginDynamic *plugin = new ModulatedDeformableConvPluginDynamic(name, stride, padding, dilation, deformableGroup, group); plugin->setPluginNamespace(getPluginNamespace()); return plugin; } nvinfer1::IPluginV2 * ModulatedDeformableConvPluginDynamicCreator::deserializePlugin( const char *name, const void *serialData, size_t serialLength) { auto plugin = new ModulatedDeformableConvPluginDynamic(name, serialData, serialLength); plugin->setPluginNamespace(getPluginNamespace()); return plugin; } void ModulatedDeformableConvPluginDynamicCreator::setPluginNamespace( const char *libNamespace) { mNamespace = libNamespace; } const char *ModulatedDeformableConvPluginDynamicCreator::getPluginNamespace() const { return mNamespace.c_str(); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_modulated_deform_conv_kernel.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include "common_cuda_helper.hpp" #include "modulated_deform_conv_cuda_kernel.cuh" #include "trt_cuda_helper.cuh" #include "trt_plugin_helper.hpp" template void trt_modulated_deformable_im2col( const T* data_im_, const T* data_offset_, const T* data_mask_, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kenerl_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, T* data_col_, cudaStream_t stream) { // num_axes should be smaller than block size const int channel_per_deformable_group = channels / deformable_group; const int num_kernels = channels * batch_size * height_col * width_col; modulated_deformable_im2col_gpu_kernel <<>>( num_kernels, data_im_, data_offset_, data_mask_, height_im, width_im, kernel_h, kenerl_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group, batch_size, channels, deformable_group, height_col, width_col, data_col_); cudaCheckError(); } template __global__ void output_add_bias_kernel(scalar_t* output, const scalar_t* bias, size_t step_batch, size_t step_channel, size_t n) { CUDA_1D_KERNEL_LOOP(index, n) { output[index] += bias[(index % step_batch) / step_channel]; } } template static void output_add_bias(scalar_t* output, const scalar_t* bias, size_t batch, size_t channel, size_t height, size_t width, cudaStream_t stream) { size_t step_channel = height * width; size_t step_batch = step_channel * channel; size_t n = step_batch * batch; output_add_bias_kernel<<>>( output, bias, step_batch, step_channel, n); } template void ModulatedDeformConvForwardCUDAKernelLauncher( const scalar_t* input, const scalar_t* weight, const scalar_t* bias, const scalar_t* offset, const scalar_t* mask, scalar_t* output, void* workspace, int batch, int channels, int height, int width, int channels_out, int kernel_w, int kernel_h, int stride_w, int stride_h, int pad_w, int pad_h, int dilation_w, int dilation_h, int group, int deformable_group, int im2col_step, cublasHandle_t cublas_handle, cudaStream_t stream) { size_t sizeof_dtype = sizeof(scalar_t); bool with_bias = (bias != nullptr); im2col_step = std::min(int(batch), im2col_step); assert(batch % im2col_step == 0); const int channels_kernel = channels / group; const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; scalar_t* columns = (scalar_t*)workspace; const size_t input_step = channels * height * width; const size_t offset_step = deformable_group * kernel_h * kernel_w * 2 * height * width; const size_t mask_step = deformable_group * kernel_h * kernel_w * height * width; const size_t out_step = channels_out * height_out * width_out; const size_t out_group_step = out_step / group; const size_t col_g_step = channels * kernel_w * kernel_h / group * height_out * width_out; const size_t weight_g_step = channels_out / group * channels / group * kernel_h * kernel_w; const int m = channels_out / group; const int n = height_out * width_out; const int k = channels / group * kernel_h * kernel_w; scalar_t alpha = 1.; scalar_t beta = 0.; for (int b = 0; b < batch; b++) { const scalar_t* input_start = input + b * input_step; const scalar_t* offset_start = offset + b * offset_step; const scalar_t* mask_start = mask + b * mask_step; trt_modulated_deformable_im2col( input_start, offset_start, mask_start, 1, channels, height, width, height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, deformable_group, columns, stream); for (int g = 0; g < group; g++) { const scalar_t* weight_start = weight + g * weight_g_step; scalar_t* col_start = columns + g * col_g_step; scalar_t* out_buffer_start = output + b * out_step + g * out_group_step; // cudaMemsetAsync(out_buffer_start, 0, 1, stream); cublasGemmWrap(cublas_handle, CUBLAS_OP_N, CUBLAS_OP_N, n, m, k, &alpha, col_start, n, weight_start, k, &beta, out_buffer_start, n); cudaCheckError(); } } if (with_bias) { output_add_bias(output, bias, batch, channels_out, height_out, width_out, stream); } } void ModulatedDeformConvForwardCUDAKernelLauncher_float( const float* input, const float* weight, const float* bias, const float* offset, const float* mask, float* output, void* workspace, int batch, int channels, int height, int width, int channels_out, int kernel_w, int kernel_h, int stride_w, int stride_h, int pad_w, int pad_h, int dilation_w, int dilation_h, int group, int deformable_group, int im2col_step, cublasHandle_t cublas_handle, cudaStream_t stream) { ModulatedDeformConvForwardCUDAKernelLauncher( input, weight, bias, offset, mask, output, workspace, batch, channels, height, width, channels_out, kernel_w, kernel_h, stride_w, stride_h, pad_w, pad_h, dilation_w, dilation_h, group, deformable_group, im2col_step, cublas_handle, stream); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_nms.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "trt_nms.hpp" #include #include #include #include "trt_serialize.hpp" extern size_t get_onnxnms_workspace_size( size_t num_batches, size_t spatial_dimension, size_t num_classes, size_t boxes_word_size, int center_point_box, size_t output_length); extern void TRTNMSCUDAKernelLauncher_float( const float *boxes, const float *scores, const int max_output_boxes_per_class, const float iou_threshold, const float score_threshold, const int offset, int *output, int center_point_box, int num_batches, int spatial_dimension, int num_classes, size_t output_length, void *workspace, cudaStream_t stream); namespace { static const char *PLUGIN_VERSION{"1"}; static const char *PLUGIN_NAME{"NonMaxSuppression"}; } // namespace nvinfer1::PluginFieldCollection NonMaxSuppressionDynamicCreator::mFC{}; std::vector NonMaxSuppressionDynamicCreator::mPluginAttributes; NonMaxSuppressionDynamic::NonMaxSuppressionDynamic( const std::string &name, int centerPointBox, int maxOutputBoxesPerClass, float iouThreshold, float scoreThreshold, int offset) : mLayerName(name), mCenterPointBox(centerPointBox), mMaxOutputBoxesPerClass(maxOutputBoxesPerClass), mIouThreshold(iouThreshold), mScoreThreshold(scoreThreshold), mOffset(offset) {} NonMaxSuppressionDynamic::NonMaxSuppressionDynamic(const std::string name, const void *data, size_t length) : mLayerName(name) { deserialize_value(&data, &length, &mCenterPointBox); deserialize_value(&data, &length, &mMaxOutputBoxesPerClass); deserialize_value(&data, &length, &mIouThreshold); deserialize_value(&data, &length, &mScoreThreshold); deserialize_value(&data, &length, &mOffset); } nvinfer1::IPluginV2DynamicExt *NonMaxSuppressionDynamic::clone() const { NonMaxSuppressionDynamic *plugin = new NonMaxSuppressionDynamic( mLayerName, mCenterPointBox, mMaxOutputBoxesPerClass, mIouThreshold, mScoreThreshold, mOffset); plugin->setPluginNamespace(getPluginNamespace()); return plugin; } nvinfer1::DimsExprs NonMaxSuppressionDynamic::getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs, nvinfer1::IExprBuilder &exprBuilder) { nvinfer1::DimsExprs ret; ret.nbDims = 2; auto num_batches = inputs[0].d[0]; auto spatial_dimension = inputs[0].d[1]; if (mMaxOutputBoxesPerClass > 0) { spatial_dimension = exprBuilder.operation( nvinfer1::DimensionOperation::kMIN, *spatial_dimension, *exprBuilder.constant(mMaxOutputBoxesPerClass)); } auto num_classes = inputs[1].d[1]; ret.d[0] = exprBuilder.operation( nvinfer1::DimensionOperation::kPROD, *num_batches, *exprBuilder.operation(nvinfer1::DimensionOperation::kPROD, *spatial_dimension, *num_classes)); ret.d[1] = exprBuilder.constant(3); return ret; } bool NonMaxSuppressionDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs, int nbOutputs) { if (pos < nbInputs) { switch (pos) { case 0: // boxes return inOut[pos].type == nvinfer1::DataType::kFLOAT && inOut[pos].format == nvinfer1::TensorFormat::kLINEAR; case 1: // scores return inOut[pos].type == nvinfer1::DataType::kFLOAT && inOut[pos].format == nvinfer1::TensorFormat::kLINEAR; default: return true; } } else { switch (pos - nbInputs) { case 0: // selected_indices return inOut[pos].type == nvinfer1::DataType::kINT32 && inOut[pos].format == nvinfer1::TensorFormat::kLINEAR; default: return true; } } return true; } void NonMaxSuppressionDynamic::configurePlugin( const nvinfer1::DynamicPluginTensorDesc *inputs, int nbInputs, const nvinfer1::DynamicPluginTensorDesc *outputs, int nbOutputs) {} size_t NonMaxSuppressionDynamic::getWorkspaceSize( const nvinfer1::PluginTensorDesc *inputs, int nbInputs, const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const { size_t boxes_word_size = mmcv::getElementSize(inputs[0].type); size_t num_batches = inputs[0].dims.d[0]; size_t spatial_dimension = inputs[0].dims.d[1]; size_t num_classes = inputs[1].dims.d[1]; size_t output_length = outputs[0].dims.d[0]; return get_onnxnms_workspace_size(num_batches, spatial_dimension, num_classes, boxes_word_size, mCenterPointBox, output_length); } int NonMaxSuppressionDynamic::enqueue( const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workSpace, cudaStream_t stream) { int num_batches = inputDesc[0].dims.d[0]; int spatial_dimension = inputDesc[0].dims.d[1]; int num_classes = inputDesc[1].dims.d[1]; int output_length = outputDesc[0].dims.d[0]; const float *boxes = (const float *)inputs[0]; const float *scores = (const float *)inputs[1]; int *output = (int *)outputs[0]; TRTNMSCUDAKernelLauncher_float( boxes, scores, mMaxOutputBoxesPerClass, mIouThreshold, mScoreThreshold, mOffset, output, mCenterPointBox, num_batches, spatial_dimension, num_classes, output_length, workSpace, stream); return 0; } nvinfer1::DataType NonMaxSuppressionDynamic::getOutputDataType( int index, const nvinfer1::DataType *inputTypes, int nbInputs) const { return nvinfer1::DataType::kINT32; } // IPluginV2 Methods const char *NonMaxSuppressionDynamic::getPluginType() const { return PLUGIN_NAME; } const char *NonMaxSuppressionDynamic::getPluginVersion() const { return PLUGIN_VERSION; } int NonMaxSuppressionDynamic::getNbOutputs() const { return 1; } int NonMaxSuppressionDynamic::initialize() { return 0; } void NonMaxSuppressionDynamic::terminate() {} size_t NonMaxSuppressionDynamic::getSerializationSize() const { return sizeof(mCenterPointBox) + sizeof(mMaxOutputBoxesPerClass) + sizeof(mIouThreshold) + sizeof(mScoreThreshold) + sizeof(mOffset); } void NonMaxSuppressionDynamic::serialize(void *buffer) const { serialize_value(&buffer, mCenterPointBox); serialize_value(&buffer, mMaxOutputBoxesPerClass); serialize_value(&buffer, mIouThreshold); serialize_value(&buffer, mScoreThreshold); serialize_value(&buffer, mOffset); } void NonMaxSuppressionDynamic::destroy() { // This gets called when the network containing plugin is destroyed delete this; } void NonMaxSuppressionDynamic::setPluginNamespace(const char *libNamespace) { mNamespace = libNamespace; } const char *NonMaxSuppressionDynamic::getPluginNamespace() const { return mNamespace.c_str(); } ////////////////////// creator ///////////////////////////// NonMaxSuppressionDynamicCreator::NonMaxSuppressionDynamicCreator() { mPluginAttributes.clear(); mPluginAttributes.emplace_back(nvinfer1::PluginField("center_point_box")); mPluginAttributes.emplace_back( nvinfer1::PluginField("max_output_boxes_per_class")); mPluginAttributes.emplace_back(nvinfer1::PluginField("iou_threshold")); mPluginAttributes.emplace_back(nvinfer1::PluginField("score_threshold")); mPluginAttributes.emplace_back(nvinfer1::PluginField("offset")); mFC.nbFields = mPluginAttributes.size(); mFC.fields = mPluginAttributes.data(); } const char *NonMaxSuppressionDynamicCreator::getPluginName() const { return PLUGIN_NAME; } const char *NonMaxSuppressionDynamicCreator::getPluginVersion() const { return PLUGIN_VERSION; } const nvinfer1::PluginFieldCollection * NonMaxSuppressionDynamicCreator::getFieldNames() { return &mFC; } nvinfer1::IPluginV2 *NonMaxSuppressionDynamicCreator::createPlugin( const char *name, const nvinfer1::PluginFieldCollection *fc) { int centerPointBox = 0; int maxOutputBoxesPerClass = 0; float iouThreshold = 0.0f; float scoreThreshold = 0.0f; int offset = 0; for (int i = 0; i < fc->nbFields; i++) { if (fc->fields[i].data == nullptr) { continue; } std::string field_name(fc->fields[i].name); if (field_name.compare("center_point_box") == 0) { centerPointBox = static_cast(fc->fields[i].data)[0]; } if (field_name.compare("max_output_boxes_per_class") == 0) { maxOutputBoxesPerClass = static_cast(fc->fields[i].data)[0]; } if (field_name.compare("iou_threshold") == 0) { iouThreshold = static_cast(fc->fields[i].data)[0]; } if (field_name.compare("score_threshold") == 0) { scoreThreshold = static_cast(fc->fields[i].data)[0]; } if (field_name.compare("offset") == 0) { offset = static_cast(fc->fields[i].data)[0]; } } NonMaxSuppressionDynamic *plugin = new NonMaxSuppressionDynamic(name, centerPointBox, maxOutputBoxesPerClass, iouThreshold, scoreThreshold, offset); plugin->setPluginNamespace(getPluginNamespace()); return plugin; } nvinfer1::IPluginV2 *NonMaxSuppressionDynamicCreator::deserializePlugin( const char *name, const void *serialData, size_t serialLength) { auto plugin = new NonMaxSuppressionDynamic(name, serialData, serialLength); plugin->setPluginNamespace(getPluginNamespace()); return plugin; } void NonMaxSuppressionDynamicCreator::setPluginNamespace( const char *libNamespace) { mNamespace = libNamespace; } const char *NonMaxSuppressionDynamicCreator::getPluginNamespace() const { return mNamespace.c_str(); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_nms_kernel.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include #include #include #include #include #include #include "common_cuda_helper.hpp" #include "nms_cuda_kernel.cuh" #include "trt_cuda_helper.cuh" #include "trt_plugin_helper.hpp" struct NMSBox { float box[4]; }; struct nms_centerwh2xyxy { __host__ __device__ NMSBox operator()(const NMSBox box) { NMSBox out; out.box[0] = box.box[0] - box.box[2] / 2.0f; out.box[1] = box.box[1] - box.box[3] / 2.0f; out.box[2] = box.box[0] + box.box[2] / 2.0f; out.box[3] = box.box[1] + box.box[3] / 2.0f; return out; } }; struct nms_sbox_idle { const float* idle_box_; __host__ __device__ nms_sbox_idle(const float* idle_box) { idle_box_ = idle_box; } __host__ __device__ NMSBox operator()(const NMSBox box) { return {idle_box_[0], idle_box_[1], idle_box_[2], idle_box_[3]}; } }; struct nms_score_threshold { float score_threshold_; __host__ __device__ nms_score_threshold(const float score_threshold) { score_threshold_ = score_threshold; } __host__ __device__ bool operator()(const float score) { return score < score_threshold_; } }; __global__ void nms_reindex_kernel(int n, int* output, int* index_cache) { CUDA_1D_KERNEL_LOOP(index, n) { const int old_index = output[index * 3 + 2]; output[index * 3 + 2] = index_cache[old_index]; } } __global__ void mask_to_output_kernel(const unsigned long long* dev_mask, const int* index, int* output, int* output_count, int batch_id, int cls_id, int spatial_dimension, int col_blocks, int max_output_boxes_per_class) { extern __shared__ unsigned long long remv[]; // fill remv with 0 CUDA_1D_KERNEL_LOOP(i, col_blocks) { remv[i] = 0; } __syncthreads(); int start = *output_count; int out_per_class_count = 0; for (int i = 0; i < spatial_dimension; i++) { const int nblock = i / threadsPerBlock; const int inblock = i % threadsPerBlock; if (!(remv[nblock] & (1ULL << inblock))) { if (threadIdx.x == 0) { output[start * 3 + 0] = batch_id; output[start * 3 + 1] = cls_id; output[start * 3 + 2] = index[i]; start += 1; } out_per_class_count += 1; if (out_per_class_count >= max_output_boxes_per_class) { break; } __syncthreads(); // set every overlap box with bit 1 in remv const unsigned long long* p = dev_mask + i * col_blocks; CUDA_1D_KERNEL_LOOP(j, col_blocks) { if (j >= nblock) { remv[j] |= p[j]; } } // j __syncthreads(); } } // i if (threadIdx.x == 0) { *output_count = start; } } size_t get_onnxnms_workspace_size(size_t num_batches, size_t spatial_dimension, size_t num_classes, size_t boxes_word_size, int center_point_box, size_t output_length) { size_t boxes_xyxy_workspace = 0; if (center_point_box == 1) { boxes_xyxy_workspace = mmcv::getAlignedSize( num_batches * spatial_dimension * 4 * boxes_word_size); } size_t scores_workspace = mmcv::getAlignedSize(spatial_dimension * boxes_word_size); size_t boxes_workspace = mmcv::getAlignedSize(spatial_dimension * 4 * boxes_word_size); const int col_blocks = (spatial_dimension + threadsPerBlock - 1) / threadsPerBlock; size_t mask_workspace = mmcv::getAlignedSize(spatial_dimension * col_blocks * sizeof(unsigned long long)); size_t index_template_workspace = mmcv::getAlignedSize(spatial_dimension * sizeof(int)); size_t index_workspace = mmcv::getAlignedSize(spatial_dimension * sizeof(int)); size_t count_workspace = mmcv::getAlignedSize(sizeof(int)); return scores_workspace + boxes_xyxy_workspace + boxes_workspace + mask_workspace + index_template_workspace + index_workspace + count_workspace; } /** * Launch the NonMaxSuppression kernel * * The NMS will be performed on each batch/class, share the kernel implement * `nms_cuda`. For each batch/class, the `boxes_sorted` and `index_cache` will * be sorted by scores, boxes_sorted will be used in `nms_cuda` kernel. After * that, the output would be generated by `mask_to_output_kernel` with * `dev_mask` and `sorted_cache`. * * @param[in] bboxes with shape [num_batch, spatial_dimension, 4], input boxes * @param[in] scores with shape [num_batch, num_classes, spatial_dimension], * input scores * @param[in] max_output_boxes_per_class max output boxes per class * @param[in] iou_threshold threshold of iou * @param[in] score_threshold threshold of scores * @param[in] offset box offset, only 0 or 1 is valid * @param[out] output with shape [output_length, 3], each row contain index * (batch_id, class_id, boxes_id), filling -1 if result is not valid. * @param[in] center_point_box 0 if boxes is [left, top, right, bottom] 1 if * boxes is [center_x, center_y, width, height] * @param[in] num_batches batch size of boxes and scores * @param[in] spatial_dimension boxes numbers each batch * @param[in] num_classes class numbers * @param[in] output_length the max output rows * @param[in] workspace memory for all temporary variables. * @param[in] stream cuda stream */ void TRTNMSCUDAKernelLauncher_float(const float* boxes, const float* scores, const int max_output_boxes_per_class, const float iou_threshold, const float score_threshold, const int offset, int* output, int center_point_box, int num_batches, int spatial_dimension, int num_classes, size_t output_length, void* workspace, cudaStream_t stream) { const int col_blocks = (spatial_dimension + threadsPerBlock - 1) / threadsPerBlock; float* boxes_sorted = (float*)workspace; workspace = static_cast(workspace) + mmcv::getAlignedSize(spatial_dimension * 4 * sizeof(float)); float* boxes_xyxy = nullptr; if (center_point_box == 1) { boxes_xyxy = (float*)workspace; workspace = static_cast(workspace) + mmcv::getAlignedSize(num_batches * spatial_dimension * 4 * sizeof(float)); thrust::transform(thrust::cuda::par.on(stream), (NMSBox*)boxes, (NMSBox*)(boxes + num_batches * spatial_dimension * 4), (NMSBox*)boxes_xyxy, nms_centerwh2xyxy()); cudaCheckError(); } float* scores_sorted = (float*)workspace; workspace = static_cast(workspace) + mmcv::getAlignedSize(spatial_dimension * sizeof(float)); unsigned long long* dev_mask = (unsigned long long*)workspace; workspace = static_cast(workspace) + mmcv::getAlignedSize(spatial_dimension * col_blocks * sizeof(unsigned long long)); int* index_cache = (int*)workspace; workspace = static_cast(workspace) + mmcv::getAlignedSize(spatial_dimension * sizeof(int)); // generate sequence [0,1,2,3,4 ....] int* index_template = (int*)workspace; workspace = static_cast(workspace) + mmcv::getAlignedSize(spatial_dimension * sizeof(int)); thrust::sequence(thrust::cuda::par.on(stream), index_template, index_template + spatial_dimension, 0); int max_output_boxes_per_class_cpu = max_output_boxes_per_class; if (max_output_boxes_per_class_cpu <= 0) { max_output_boxes_per_class_cpu = spatial_dimension; } int* output_count = (int*)workspace; workspace = static_cast(workspace) + mmcv::getAlignedSize(sizeof(int)); cudaMemsetAsync(output_count, 0, sizeof(int), stream); // fill output with -1 thrust::fill(thrust::cuda::par.on(stream), output, output + output_length * 3, -1); cudaCheckError(); dim3 blocks(col_blocks, col_blocks); dim3 threads(threadsPerBlock); for (int batch_id = 0; batch_id < num_batches; ++batch_id) { for (int cls_id = 0; cls_id < num_classes; ++cls_id) { const int batch_cls_id = batch_id * num_classes + cls_id; // sort boxes by score cudaMemcpyAsync(scores_sorted, scores + batch_cls_id * spatial_dimension, spatial_dimension * sizeof(float), cudaMemcpyDeviceToDevice, stream); cudaCheckError(); cudaMemcpyAsync(index_cache, index_template, spatial_dimension * sizeof(int), cudaMemcpyDeviceToDevice, stream); cudaCheckError(); thrust::sort_by_key(thrust::cuda::par.on(stream), scores_sorted, scores_sorted + spatial_dimension, index_cache, thrust::greater()); if (center_point_box == 1) { thrust::gather(thrust::cuda::par.on(stream), index_cache, index_cache + spatial_dimension, (NMSBox*)(boxes_xyxy + batch_id * spatial_dimension * 4), (NMSBox*)boxes_sorted); } else { thrust::gather(thrust::cuda::par.on(stream), index_cache, index_cache + spatial_dimension, (NMSBox*)(boxes + batch_id * spatial_dimension * 4), (NMSBox*)boxes_sorted); } cudaCheckError(); if (score_threshold > 0.0f) { thrust::transform_if( thrust::cuda::par.on(stream), (NMSBox*)boxes_sorted, (NMSBox*)(boxes_sorted + spatial_dimension * 4), scores_sorted, (NMSBox*)boxes_sorted, nms_sbox_idle(boxes_sorted), nms_score_threshold(score_threshold)); } nms_cuda<<>>(spatial_dimension, iou_threshold, offset, boxes_sorted, dev_mask); // will be performed when dev_mask is full. mask_to_output_kernel<<<1, threadsPerBlock, col_blocks * sizeof(unsigned long long), stream>>>( dev_mask, index_cache, output, output_count, batch_id, cls_id, spatial_dimension, col_blocks, max_output_boxes_per_class_cpu); } // cls_id } // batch_id } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_plugin.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "trt_plugin.hpp" #include "trt_corner_pool.hpp" #include "trt_cummaxmin.hpp" #include "trt_deform_conv.hpp" #include "trt_grid_sampler.hpp" #include "trt_instance_norm.hpp" #include "trt_modulated_deform_conv.hpp" #include "trt_nms.hpp" #include "trt_roi_align.hpp" #include "trt_scatternd.hpp" REGISTER_TENSORRT_PLUGIN(CumMaxPluginDynamicCreator); REGISTER_TENSORRT_PLUGIN(CumMinPluginDynamicCreator); REGISTER_TENSORRT_PLUGIN(GridSamplerDynamicCreator); REGISTER_TENSORRT_PLUGIN(DeformableConvPluginDynamicCreator); REGISTER_TENSORRT_PLUGIN(ModulatedDeformableConvPluginDynamicCreator); REGISTER_TENSORRT_PLUGIN(NonMaxSuppressionDynamicCreator); REGISTER_TENSORRT_PLUGIN(RoIAlignPluginDynamicCreator); REGISTER_TENSORRT_PLUGIN(ONNXScatterNDDynamicCreator); REGISTER_TENSORRT_PLUGIN(InstanceNormalizationDynamicCreator); REGISTER_TENSORRT_PLUGIN(CornerPoolPluginDynamicCreator); extern "C" { bool initLibMMCVInferPlugins() { return true; } } // extern "C" ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_roi_align.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "trt_roi_align.hpp" #include #include #include "trt_serialize.hpp" extern void TRTRoIAlignForwardCUDAKernelLauncher_float( const float *input, const float *rois, float *output, float *argmax_y, float *argmax_x, int output_size, int channels, int height, int width, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned, cudaStream_t stream); namespace { static const char *PLUGIN_VERSION{"1"}; static const char *PLUGIN_NAME{"MMCVRoiAlign"}; } // namespace nvinfer1::PluginFieldCollection RoIAlignPluginDynamicCreator::mFC{}; std::vector RoIAlignPluginDynamicCreator::mPluginAttributes; RoIAlignPluginDynamic::RoIAlignPluginDynamic(const std::string &name, int outWidth, int outHeight, float spatialScale, int sampleRatio, int poolMode, bool aligned) : mLayerName(name), mOutWidth(outWidth), mOutHeight(outHeight), mSpatialScale(spatialScale), mSampleRatio(sampleRatio), mPoolMode(poolMode), mAligned(aligned) {} RoIAlignPluginDynamic::RoIAlignPluginDynamic(const std::string name, const void *data, size_t length) : mLayerName(name) { deserialize_value(&data, &length, &mOutWidth); deserialize_value(&data, &length, &mOutHeight); deserialize_value(&data, &length, &mSpatialScale); deserialize_value(&data, &length, &mSampleRatio); deserialize_value(&data, &length, &mPoolMode); deserialize_value(&data, &length, &mAligned); } nvinfer1::IPluginV2DynamicExt *RoIAlignPluginDynamic::clone() const { RoIAlignPluginDynamic *plugin = new RoIAlignPluginDynamic( mLayerName, mOutWidth, mOutHeight, mSpatialScale, mSampleRatio, mPoolMode, mAligned); plugin->setPluginNamespace(getPluginNamespace()); return plugin; } nvinfer1::DimsExprs RoIAlignPluginDynamic::getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs, nvinfer1::IExprBuilder &exprBuilder) { nvinfer1::DimsExprs ret; ret.nbDims = 4; ret.d[0] = inputs[1].d[0]; ret.d[1] = inputs[0].d[1]; ret.d[2] = exprBuilder.constant(mOutHeight); ret.d[3] = exprBuilder.constant(mOutWidth); return ret; } bool RoIAlignPluginDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs, int nbOutputs) { return inOut[pos].type == nvinfer1::DataType::kFLOAT && inOut[pos].format == nvinfer1::TensorFormat::kLINEAR; } void RoIAlignPluginDynamic::configurePlugin( const nvinfer1::DynamicPluginTensorDesc *inputs, int nbInputs, const nvinfer1::DynamicPluginTensorDesc *outputs, int nbOutputs) {} size_t RoIAlignPluginDynamic::getWorkspaceSize( const nvinfer1::PluginTensorDesc *inputs, int nbInputs, const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const { size_t output_size = 0; size_t word_size = 0; switch (mPoolMode) { case 0: // max output_size = outputs[0].dims.d[0] * outputs[0].dims.d[1] * outputs[0].dims.d[2] * outputs[0].dims.d[3]; word_size = mmcv::getElementSize(outputs[0].type); return output_size * word_size * 2; break; case 1: return 0; break; default: return 0; } return 0; } int RoIAlignPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workSpace, cudaStream_t stream) { int channels = inputDesc[0].dims.d[1]; int height = inputDesc[0].dims.d[2]; int width = inputDesc[0].dims.d[3]; int output_size = outputDesc[0].dims.d[0] * outputDesc[0].dims.d[1] * outputDesc[0].dims.d[2] * outputDesc[0].dims.d[3]; int word_size = mmcv::getElementSize(outputDesc[0].type); const void *feat = inputs[0]; const void *rois = inputs[1]; void *output = outputs[0]; void *argmax_y = nullptr; void *argmax_x = nullptr; switch (mPoolMode) { case 0: // max argmax_y = workSpace; argmax_x = argmax_y + output_size * word_size; break; case 1: // avg break; } switch (outputDesc[0].type) { case nvinfer1::DataType::kFLOAT: TRTRoIAlignForwardCUDAKernelLauncher_float( (const float *)feat, (const float *)rois, (float *)output, (float *)argmax_y, (float *)argmax_x, output_size, channels, height, width, mOutHeight, mOutWidth, mSpatialScale, mSampleRatio, mPoolMode, mAligned, stream); break; default: break; } return 0; } nvinfer1::DataType RoIAlignPluginDynamic::getOutputDataType( int index, const nvinfer1::DataType *inputTypes, int nbInputs) const { return inputTypes[0]; } // IPluginV2 Methods const char *RoIAlignPluginDynamic::getPluginType() const { return PLUGIN_NAME; } const char *RoIAlignPluginDynamic::getPluginVersion() const { return PLUGIN_VERSION; } int RoIAlignPluginDynamic::getNbOutputs() const { return 1; } int RoIAlignPluginDynamic::initialize() { return 0; } void RoIAlignPluginDynamic::terminate() {} size_t RoIAlignPluginDynamic::getSerializationSize() const { return sizeof(mOutWidth) + sizeof(mOutHeight) + sizeof(mSpatialScale) + sizeof(mSampleRatio) + sizeof(mPoolMode) + sizeof(mAligned); } void RoIAlignPluginDynamic::serialize(void *buffer) const { serialize_value(&buffer, mOutWidth); serialize_value(&buffer, mOutHeight); serialize_value(&buffer, mSpatialScale); serialize_value(&buffer, mSampleRatio); serialize_value(&buffer, mPoolMode); serialize_value(&buffer, mAligned); } void RoIAlignPluginDynamic::destroy() { // This gets called when the network containing plugin is destroyed delete this; } void RoIAlignPluginDynamic::setPluginNamespace(const char *libNamespace) { mNamespace = libNamespace; } const char *RoIAlignPluginDynamic::getPluginNamespace() const { return mNamespace.c_str(); } ////////////////////// creator ///////////////////////////// RoIAlignPluginDynamicCreator::RoIAlignPluginDynamicCreator() { mPluginAttributes.emplace_back(nvinfer1::PluginField("output_height")); mPluginAttributes.emplace_back(nvinfer1::PluginField("output_width")); mPluginAttributes.emplace_back(nvinfer1::PluginField("spatial_scale")); mPluginAttributes.emplace_back(nvinfer1::PluginField("sampling_ratio")); mPluginAttributes.emplace_back(nvinfer1::PluginField("mode")); mPluginAttributes.emplace_back(nvinfer1::PluginField("aligned")); mFC.nbFields = mPluginAttributes.size(); mFC.fields = mPluginAttributes.data(); } const char *RoIAlignPluginDynamicCreator::getPluginName() const { return PLUGIN_NAME; } const char *RoIAlignPluginDynamicCreator::getPluginVersion() const { return PLUGIN_VERSION; } const nvinfer1::PluginFieldCollection * RoIAlignPluginDynamicCreator::getFieldNames() { return &mFC; } nvinfer1::IPluginV2 *RoIAlignPluginDynamicCreator::createPlugin( const char *name, const nvinfer1::PluginFieldCollection *fc) { int outWidth = 7; int outHeight = 7; float spatialScale = 1.0; int sampleRatio = 0; int poolMode = -1; bool aligned = true; for (int i = 0; i < fc->nbFields; i++) { if (fc->fields[i].data == nullptr) { continue; } std::string field_name(fc->fields[i].name); if (field_name.compare("output_height") == 0) { outHeight = static_cast(fc->fields[i].data)[0]; } if (field_name.compare("output_width") == 0) { outWidth = static_cast(fc->fields[i].data)[0]; } if (field_name.compare("spatial_scale") == 0) { spatialScale = static_cast(fc->fields[i].data)[0]; } if (field_name.compare("sampling_ratio") == 0) { sampleRatio = static_cast(fc->fields[i].data)[0]; } if (field_name.compare("mode") == 0) { int data_size = fc->fields[i].length; const char *data_start = static_cast(fc->fields[i].data); std::string poolModeStr(data_start, data_size); if (poolModeStr == "avg") { poolMode = 1; } else if (poolModeStr == "max") { poolMode = 0; } else { std::cout << "Unknown pool mode \"" << poolModeStr << "\"." << std::endl; } assert(poolMode >= 0); } if (field_name.compare("aligned") == 0) { int aligned_int = static_cast(fc->fields[i].data)[0]; aligned = aligned_int != 0; } } assert(outHeight > 0); assert(outWidth > 0); assert(spatialScale > 0.); assert(poolMode >= 0); RoIAlignPluginDynamic *plugin = new RoIAlignPluginDynamic( name, outWidth, outHeight, spatialScale, sampleRatio, poolMode, aligned); plugin->setPluginNamespace(getPluginNamespace()); return plugin; } nvinfer1::IPluginV2 *RoIAlignPluginDynamicCreator::deserializePlugin( const char *name, const void *serialData, size_t serialLength) { auto plugin = new RoIAlignPluginDynamic(name, serialData, serialLength); plugin->setPluginNamespace(getPluginNamespace()); return plugin; } void RoIAlignPluginDynamicCreator::setPluginNamespace( const char *libNamespace) { mNamespace = libNamespace; } const char *RoIAlignPluginDynamicCreator::getPluginNamespace() const { return mNamespace.c_str(); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_roi_align_kernel.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "common_cuda_helper.hpp" #include "roi_align_cuda_kernel.cuh" template void TRTRoIAlignForwardCUDAKernelLauncher( const scalar_t* input, const scalar_t* rois, scalar_t* output, scalar_t* argmax_y, scalar_t* argmax_x, int output_size, int channels, int height, int width, int aligned_height, int aligned_width, scalar_t spatial_scale, int sampling_ratio, int pool_mode, bool aligned, cudaStream_t stream) { roi_align_forward_cuda_kernel <<>>( output_size, input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width, static_cast(spatial_scale), sampling_ratio, pool_mode, aligned, channels, height, width); } void TRTRoIAlignForwardCUDAKernelLauncher_float( const float* input, const float* rois, float* output, float* argmax_y, float* argmax_x, int output_size, int channels, int height, int width, int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode, bool aligned, cudaStream_t stream) { TRTRoIAlignForwardCUDAKernelLauncher( input, rois, output, argmax_y, argmax_x, output_size, channels, height, width, aligned_height, aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned, stream); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_scatternd.cpp ================================================ // Copyright (c) OpenMMLab. All rights reserved #include "trt_scatternd.hpp" #include #include #include #include "trt_serialize.hpp" extern void TRTONNXScatterNDKernelLauncher_float( const float *data, const int *indices, const float *update, const int *dims, int nbDims, const int *indices_dims, int indice_nbDims, float *output, cudaStream_t stream); extern void TRTONNXScatterNDKernelLauncher_int32( const int *data, const int *indices, const int *update, const int *dims, int nbDims, const int *indices_dims, int indice_nbDims, int *output, cudaStream_t stream); namespace { static const char *PLUGIN_VERSION{"1"}; static const char *PLUGIN_NAME{"ScatterND"}; } // namespace nvinfer1::PluginFieldCollection ONNXScatterNDDynamicCreator::mFC{}; std::vector ONNXScatterNDDynamicCreator::mPluginAttributes; ONNXScatterNDDynamic::ONNXScatterNDDynamic(const std::string &name) : mLayerName(name) {} ONNXScatterNDDynamic::ONNXScatterNDDynamic(const std::string name, const void *data, size_t length) : mLayerName(name) {} nvinfer1::IPluginV2DynamicExt *ONNXScatterNDDynamic::clone() const { ONNXScatterNDDynamic *plugin = new ONNXScatterNDDynamic(mLayerName); plugin->setPluginNamespace(getPluginNamespace()); return plugin; } nvinfer1::DimsExprs ONNXScatterNDDynamic::getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs, nvinfer1::IExprBuilder &exprBuilder) { return inputs[0]; } bool ONNXScatterNDDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs, int nbOutputs) { if (pos < nbInputs) { switch (pos) { case 0: // data return (inOut[pos].type == nvinfer1::DataType::kFLOAT && inOut[pos].format == nvinfer1::TensorFormat::kLINEAR) || (inOut[pos].type == nvinfer1::DataType::kINT32 && inOut[pos].format == nvinfer1::TensorFormat::kLINEAR); case 1: // indices return inOut[pos].type == nvinfer1::DataType::kINT32 && inOut[pos].format == nvinfer1::TensorFormat::kLINEAR; case 2: // updates return inOut[pos].type == inOut[0].type && inOut[pos].format == inOut[0].format; default: return true; } } else { switch (pos - nbInputs) { case 0: // output return inOut[pos].type == inOut[0].type && inOut[pos].format == inOut[0].format; default: return true; } } return true; } void ONNXScatterNDDynamic::configurePlugin( const nvinfer1::DynamicPluginTensorDesc *inputs, int nbInputs, const nvinfer1::DynamicPluginTensorDesc *outputs, int nbOutputs) {} size_t ONNXScatterNDDynamic::getWorkspaceSize( const nvinfer1::PluginTensorDesc *inputs, int nbInputs, const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const { return 0; } int ONNXScatterNDDynamic::enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workSpace, cudaStream_t stream) { const int *dims = &(inputDesc[0].dims.d[0]); const int *indices_dims = &(inputDesc[1].dims.d[0]); int nbDims = inputDesc[0].dims.nbDims; int indice_nbDims = inputDesc[1].dims.nbDims; const void *data = inputs[0]; const void *indices = inputs[1]; const void *update = inputs[2]; void *output = outputs[0]; auto data_type = inputDesc[0].type; switch (data_type) { case nvinfer1::DataType::kFLOAT: TRTONNXScatterNDKernelLauncher_float( (float *)data, (int *)indices, (float *)update, dims, nbDims, indices_dims, indice_nbDims, (float *)output, stream); break; case nvinfer1::DataType::kINT32: TRTONNXScatterNDKernelLauncher_int32( (int *)data, (int *)indices, (int *)update, dims, nbDims, indices_dims, indice_nbDims, (int *)output, stream); break; default: break; } return 0; } nvinfer1::DataType ONNXScatterNDDynamic::getOutputDataType( int index, const nvinfer1::DataType *inputTypes, int nbInputs) const { return inputTypes[0]; } // IPluginV2 Methods const char *ONNXScatterNDDynamic::getPluginType() const { return PLUGIN_NAME; } const char *ONNXScatterNDDynamic::getPluginVersion() const { return PLUGIN_VERSION; } int ONNXScatterNDDynamic::getNbOutputs() const { return 1; } int ONNXScatterNDDynamic::initialize() { return 0; } void ONNXScatterNDDynamic::terminate() {} size_t ONNXScatterNDDynamic::getSerializationSize() const { return 0; } void ONNXScatterNDDynamic::serialize(void *buffer) const {} void ONNXScatterNDDynamic::destroy() { // This gets called when the network containing plugin is destroyed delete this; } void ONNXScatterNDDynamic::setPluginNamespace(const char *libNamespace) { mNamespace = libNamespace; } const char *ONNXScatterNDDynamic::getPluginNamespace() const { return mNamespace.c_str(); } ////////////////////// creator ///////////////////////////// ONNXScatterNDDynamicCreator::ONNXScatterNDDynamicCreator() { mPluginAttributes.clear(); mFC.nbFields = mPluginAttributes.size(); mFC.fields = mPluginAttributes.data(); } const char *ONNXScatterNDDynamicCreator::getPluginName() const { return PLUGIN_NAME; } const char *ONNXScatterNDDynamicCreator::getPluginVersion() const { return PLUGIN_VERSION; } const nvinfer1::PluginFieldCollection * ONNXScatterNDDynamicCreator::getFieldNames() { return &mFC; } nvinfer1::IPluginV2 *ONNXScatterNDDynamicCreator::createPlugin( const char *name, const nvinfer1::PluginFieldCollection *fc) { ONNXScatterNDDynamic *plugin = new ONNXScatterNDDynamic(name); plugin->setPluginNamespace(getPluginNamespace()); return plugin; } nvinfer1::IPluginV2 *ONNXScatterNDDynamicCreator::deserializePlugin( const char *name, const void *serialData, size_t serialLength) { auto plugin = new ONNXScatterNDDynamic(name, serialData, serialLength); plugin->setPluginNamespace(getPluginNamespace()); return plugin; } void ONNXScatterNDDynamicCreator::setPluginNamespace(const char *libNamespace) { mNamespace = libNamespace; } const char *ONNXScatterNDDynamicCreator::getPluginNamespace() const { return mNamespace.c_str(); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_scatternd_kernel.cu ================================================ // Copyright (c) OpenMMLab. All rights reserved #include #include #include "common_cuda_helper.hpp" #include "trt_cuda_helper.cuh" #include "trt_plugin_helper.hpp" static int const threadsPerBlock = sizeof(unsigned long long int) * 8; using mmcv::TensorDesc; template __global__ void onnx_scatternd_kernel(const int n, const int* indices, const T* update, T* output, TensorDesc tensor_desc, TensorDesc indice_desc) { const int indice_cols = indice_desc.shape[indice_desc.dim - 1]; const int copy_stride = tensor_desc.stride[indice_cols - 1]; const int* stride = &(tensor_desc.stride[0]); CUDA_1D_KERNEL_LOOP(index, n) { int output_offset = 0; const int* indices_current = indices + index * indice_cols; for (int i = 0; i < indice_cols; ++i) { output_offset += stride[i] * indices_current[i]; } memcpy(output + output_offset, update + index * copy_stride, copy_stride * sizeof(T)); } } template void TRTONNXScatterNDKernelLauncher(const T* data, const int* indices, const T* update, const int* dims, int nbDims, const int* indices_dims, int indice_nbDims, T* output, cudaStream_t stream) { // fill tensordesc and initial TensorDesc tensor_desc; memset((void*)&tensor_desc, 0, sizeof(TensorDesc)); tensor_desc.dim = nbDims; tensor_desc.shape[nbDims - 1] = dims[nbDims - 1]; tensor_desc.stride[nbDims - 1] = 1; for (int i = nbDims - 2; i >= 0; --i) { tensor_desc.shape[i] = dims[i]; tensor_desc.stride[i] = dims[i + 1] * tensor_desc.stride[i + 1]; } const int data_size = tensor_desc.stride[0] * tensor_desc.shape[0]; TensorDesc indice_desc; memset((void*)&indice_desc, 0, sizeof(TensorDesc)); indice_desc.dim = indice_nbDims; indice_desc.shape[indice_nbDims - 1] = indices_dims[indice_nbDims - 1]; indice_desc.stride[indice_nbDims - 1] = 1; for (int i = indice_nbDims - 2; i >= 0; --i) { indice_desc.shape[i] = indices_dims[i]; indice_desc.stride[i] = indices_dims[i + 1] * indice_desc.stride[i + 1]; } // output = np.copy(data) cudaMemcpyAsync(output, data, data_size * sizeof(T), cudaMemcpyDeviceToDevice); int num_update_indice = 1; for (int i = 0; i < indice_nbDims - 1; ++i) { num_update_indice *= indice_desc.shape[i]; } // scatter const int col_block = GET_BLOCKS(num_update_indice, threadsPerBlock); onnx_scatternd_kernel<<>>( num_update_indice, indices, update, output, tensor_desc, indice_desc); } void TRTONNXScatterNDKernelLauncher_float(const float* data, const int* indices, const float* update, const int* dims, int nbDims, const int* indices_dims, int indice_nbDims, float* output, cudaStream_t stream) { TRTONNXScatterNDKernelLauncher(data, indices, update, dims, nbDims, indices_dims, indice_nbDims, output, stream); } void TRTONNXScatterNDKernelLauncher_int32(const int* data, const int* indices, const int* update, const int* dims, int nbDims, const int* indices_dims, int indice_nbDims, int* output, cudaStream_t stream) { TRTONNXScatterNDKernelLauncher(data, indices, update, dims, nbDims, indices_dims, indice_nbDims, output, stream); } ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_corner_pool.hpp ================================================ #ifndef TRT_CORNER_POOL_HPP #define TRT_CORNER_POOL_HPP #include #include #include "trt_plugin_helper.hpp" enum TRT_CORNER_POOL_TYPE { TRT_TOP_POOL = 0, TRT_BOTTOM_POOL = 1, TRT_LEFT_POOL = 2, TRT_RIGHT_POOL = 3 }; // implement of CornerPool class CornerPoolPluginDynamic : public nvinfer1::IPluginV2DynamicExt { public: CornerPoolPluginDynamic(const std::string &name, TRT_CORNER_POOL_TYPE poolType); CornerPoolPluginDynamic(const std::string name, const void *data, size_t length); CornerPoolPluginDynamic() = delete; ~CornerPoolPluginDynamic(); // IPluginV2DynamicExt Methods nvinfer1::IPluginV2DynamicExt *clone() const override; nvinfer1::DimsExprs getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs, nvinfer1::IExprBuilder &exprBuilder) override; bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs, int nbOutputs) override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc *out, int nbOutputs) override; size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs, int nbInputs, const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const override; int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) override; // IPluginV2Ext Methods nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataType *inputTypes, int nbInputs) const override; // IPluginV2 Methods const char *getPluginType() const override; const char *getPluginVersion() const override; int getNbOutputs() const override; int initialize() override; void terminate() override; size_t getSerializationSize() const override; void serialize(void *buffer) const override; void destroy() override; void setPluginNamespace(const char *pluginNamespace) override; const char *getPluginNamespace() const override; protected: const std::string mLayerName; std::string mNamespace; TRT_CORNER_POOL_TYPE mPoolType; protected: // To prevent compiler warnings. using nvinfer1::IPluginV2DynamicExt::canBroadcastInputAcrossBatch; using nvinfer1::IPluginV2DynamicExt::configurePlugin; using nvinfer1::IPluginV2DynamicExt::enqueue; using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; using nvinfer1::IPluginV2DynamicExt::isOutputBroadcastAcrossBatch; using nvinfer1::IPluginV2DynamicExt::supportsFormat; }; // CornerPool creator class CornerPoolPluginDynamicCreator : public nvinfer1::IPluginCreator { public: CornerPoolPluginDynamicCreator(); const char *getPluginName() const override; const char *getPluginVersion() const override; const nvinfer1::PluginFieldCollection *getFieldNames() override; nvinfer1::IPluginV2 *createPlugin( const char *name, const nvinfer1::PluginFieldCollection *fc) override; nvinfer1::IPluginV2 *deserializePlugin(const char *name, const void *serialData, size_t serialLength) override; void setPluginNamespace(const char *pluginNamespace) override; const char *getPluginNamespace() const override; protected: nvinfer1::PluginFieldCollection mFC; std::vector mPluginAttributes; std::string mNamespace; }; #endif TRT_CORNER_POOL_HPP // TRT_CORNER_POOL_HPP ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_cuda_helper.cuh ================================================ // Copyright (c) OpenMMLab. All rights reserved #ifndef TRT_CUDA_HELPER_HPP #define TRT_CUDA_HELPER_HPP #include #define cudaCheckError() \ { \ cudaError_t e = cudaGetLastError(); \ if (e != cudaSuccess) { \ printf("Cuda failure %s:%d: '%s'\n", __FILE__, __LINE__, \ cudaGetErrorString(e)); \ exit(0); \ } \ } /** * Returns a view of the original tensor with its dimensions permuted. * * @param[out] dst pointer to the destination tensor * @param[in] src pointer to the source tensor * @param[in] src_size shape of the src tensor * @param[in] permute The desired ordering of dimensions * @param[in] src_dim dim of src tensor * @param[in] stream cuda stream handle */ template void memcpyPermute(scalar_t* dst, const scalar_t* src, int* src_size, int* permute, int src_dim, cudaStream_t stream = 0); template cublasStatus_t cublasGemmWrap(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const scalar_t* alpha, const scalar_t* A, int lda, const scalar_t* B, int ldb, const scalar_t* beta, scalar_t* C, int ldc) { return CUBLAS_STATUS_INTERNAL_ERROR; } #endif // TRT_CUDA_HELPER_HPP ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_cummaxmin.hpp ================================================ #ifndef TRT_CUMMAXMIN_HPP #define TRT_CUMMAXMIN_HPP #include #include #include "trt_plugin_helper.hpp" enum TRT_CUMCMPTYPE { TRT_CUMMAX = 0, TRT_CUMMIN = 1 }; // implement of cummax and cummin class CumMaxMinPluginDynamic : public nvinfer1::IPluginV2DynamicExt { public: CumMaxMinPluginDynamic(const std::string &name, int dim, TRT_CUMCMPTYPE cumType); CumMaxMinPluginDynamic(const std::string name, const void *data, size_t length); CumMaxMinPluginDynamic() = delete; ~CumMaxMinPluginDynamic(); // IPluginV2DynamicExt Methods nvinfer1::IPluginV2DynamicExt *clone() const override; nvinfer1::DimsExprs getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs, nvinfer1::IExprBuilder &exprBuilder) override; bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs, int nbOutputs) override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc *out, int nbOutputs) override; size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs, int nbInputs, const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const override; int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) override; // IPluginV2Ext Methods nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataType *inputTypes, int nbInputs) const override; // IPluginV2 Methods const char *getPluginType() const override; const char *getPluginVersion() const override; int getNbOutputs() const override; int initialize() override; void terminate() override; size_t getSerializationSize() const override; void serialize(void *buffer) const override; void destroy() override; void setPluginNamespace(const char *pluginNamespace) override; const char *getPluginNamespace() const override; protected: const std::string mLayerName; std::string mNamespace; int mDim; TRT_CUMCMPTYPE mCumType; protected: // To prevent compiler warnings. using nvinfer1::IPluginV2DynamicExt::canBroadcastInputAcrossBatch; using nvinfer1::IPluginV2DynamicExt::configurePlugin; using nvinfer1::IPluginV2DynamicExt::enqueue; using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; using nvinfer1::IPluginV2DynamicExt::isOutputBroadcastAcrossBatch; using nvinfer1::IPluginV2DynamicExt::supportsFormat; }; // cummax and cummin creator class CumMaxMinPluginDynamicCreator : public nvinfer1::IPluginCreator { public: CumMaxMinPluginDynamicCreator(TRT_CUMCMPTYPE cumType); const char *getPluginName() const override; const char *getPluginVersion() const override; const nvinfer1::PluginFieldCollection *getFieldNames() override; nvinfer1::IPluginV2 *createPlugin( const char *name, const nvinfer1::PluginFieldCollection *fc) override; nvinfer1::IPluginV2 *deserializePlugin(const char *name, const void *serialData, size_t serialLength) override; void setPluginNamespace(const char *pluginNamespace) override; const char *getPluginNamespace() const override; protected: TRT_CUMCMPTYPE mCumType; nvinfer1::PluginFieldCollection mFC; std::vector mPluginAttributes; std::string mNamespace; }; // cummax creator class CumMaxPluginDynamicCreator : public CumMaxMinPluginDynamicCreator { public: CumMaxPluginDynamicCreator(); const char *getPluginName() const override; }; // cummin creator class CumMinPluginDynamicCreator : public CumMaxMinPluginDynamicCreator { public: CumMinPluginDynamicCreator(); const char *getPluginName() const override; }; #endif TRT_CUMMAXMIN_HPP // TRT_CUMMAXMIN_HPP ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_deform_conv.hpp ================================================ #ifndef TRT_DEFORM_CONV_HPP #define TRT_DEFORM_CONV_HPP #include #include #include #include #include "trt_plugin_helper.hpp" class DeformableConvPluginDynamic : public nvinfer1::IPluginV2DynamicExt { public: DeformableConvPluginDynamic(const std::string &name, const nvinfer1::Dims &stride, const nvinfer1::Dims &padding, const nvinfer1::Dims &dilation, const int deformableGroup, const int group, int im2colStep); DeformableConvPluginDynamic(const std::string name, const void *data, size_t length); DeformableConvPluginDynamic() = delete; ~DeformableConvPluginDynamic(); // IPluginV2DynamicExt Methods nvinfer1::IPluginV2DynamicExt *clone() const override; nvinfer1::DimsExprs getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs, nvinfer1::IExprBuilder &exprBuilder) override; bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs, int nbOutputs) override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc *out, int nbOutputs) override; size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs, int nbInputs, const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const override; int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) override; void attachToContext(cudnnContext *cudnnContext, cublasContext *cublasContext, nvinfer1::IGpuAllocator *gpuAllocator) override; void detachFromContext() override; // IPluginV2Ext Methods nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataType *inputTypes, int nbInputs) const override; // IPluginV2 Methods const char *getPluginType() const override; const char *getPluginVersion() const override; int getNbOutputs() const override; int initialize() override; void terminate() override; size_t getSerializationSize() const override; void serialize(void *buffer) const override; void destroy() override; void setPluginNamespace(const char *pluginNamespace) override; const char *getPluginNamespace() const override; private: const std::string mLayerName; std::string mNamespace; nvinfer1::Dims mStride; nvinfer1::Dims mPadding; nvinfer1::Dims mDilation; int mDeformableGroup; int mGroup; int mIm2colStep; cublasHandle_t m_cublas_handle; protected: // To prevent compiler warnings. using nvinfer1::IPluginV2DynamicExt::canBroadcastInputAcrossBatch; using nvinfer1::IPluginV2DynamicExt::configurePlugin; using nvinfer1::IPluginV2DynamicExt::enqueue; using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; using nvinfer1::IPluginV2DynamicExt::isOutputBroadcastAcrossBatch; using nvinfer1::IPluginV2DynamicExt::supportsFormat; }; class DeformableConvPluginDynamicCreator : public nvinfer1::IPluginCreator { public: DeformableConvPluginDynamicCreator(); const char *getPluginName() const override; const char *getPluginVersion() const override; const nvinfer1::PluginFieldCollection *getFieldNames() override; nvinfer1::IPluginV2 *createPlugin( const char *name, const nvinfer1::PluginFieldCollection *fc) override; nvinfer1::IPluginV2 *deserializePlugin(const char *name, const void *serialData, size_t serialLength) override; void setPluginNamespace(const char *pluginNamespace) override; const char *getPluginNamespace() const override; private: static nvinfer1::PluginFieldCollection mFC; static std::vector mPluginAttributes; std::string mNamespace; }; #endif // TRT_DEFORM_CONV_HPP ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_grid_sampler.hpp ================================================ #ifndef TRT_GRID_SAMPLER_HPP #define TRT_GRID_SAMPLER_HPP #include #include #include #include #include "trt_plugin_helper.hpp" namespace mmcv { enum class GridSamplerInterpolation { Bilinear, Nearest }; enum class GridSamplerPadding { Zeros, Border, Reflection }; } // namespace mmcv class GridSamplerDynamic : public nvinfer1::IPluginV2DynamicExt { public: GridSamplerDynamic(const std::string &name, int mode, int paddingMode, bool alignCorners); GridSamplerDynamic(const std::string name, const void *data, size_t length); GridSamplerDynamic() = delete; // IPluginV2DynamicExt Methods nvinfer1::IPluginV2DynamicExt *clone() const override; nvinfer1::DimsExprs getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs, nvinfer1::IExprBuilder &exprBuilder) override; bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs, int nbOutputs) override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc *out, int nbOutputs) override; size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs, int nbInputs, const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const override; int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) override; // IPluginV2Ext Methods nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataType *inputTypes, int nbInputs) const override; // IPluginV2 Methods const char *getPluginType() const override; const char *getPluginVersion() const override; int getNbOutputs() const override; int initialize() override; void terminate() override; size_t getSerializationSize() const override; void serialize(void *buffer) const override; void destroy() override; void setPluginNamespace(const char *pluginNamespace) override; const char *getPluginNamespace() const override; private: const std::string mLayerName; std::string mNamespace; int mMode; int mPaddingMode; bool mAlignCorners; protected: // To prevent compiler warnings. using nvinfer1::IPluginV2DynamicExt::canBroadcastInputAcrossBatch; using nvinfer1::IPluginV2DynamicExt::configurePlugin; using nvinfer1::IPluginV2DynamicExt::enqueue; using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; using nvinfer1::IPluginV2DynamicExt::isOutputBroadcastAcrossBatch; using nvinfer1::IPluginV2DynamicExt::supportsFormat; }; class GridSamplerDynamicCreator : public nvinfer1::IPluginCreator { public: GridSamplerDynamicCreator(); const char *getPluginName() const override; const char *getPluginVersion() const override; const nvinfer1::PluginFieldCollection *getFieldNames() override; nvinfer1::IPluginV2 *createPlugin( const char *name, const nvinfer1::PluginFieldCollection *fc) override; nvinfer1::IPluginV2 *deserializePlugin(const char *name, const void *serialData, size_t serialLength) override; void setPluginNamespace(const char *pluginNamespace) override; const char *getPluginNamespace() const override; private: static nvinfer1::PluginFieldCollection mFC; static std::vector mPluginAttributes; std::string mNamespace; }; #endif // TRT_GRID_SAMPLER_HPP ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_instance_norm.hpp ================================================ // Modified from: // https://github.com/NVIDIA/TensorRT/blob/master/plugin/instanceNormalizationPlugin/instanceNormalizationPlugin.h #ifndef TRT_INSTANCE_NORMALIZATION_PLUGIN_H #define TRT_INSTANCE_NORMALIZATION_PLUGIN_H #include #include #include #include #include "trt_plugin_helper.hpp" typedef unsigned short half_type; class InstanceNormalizationDynamic final : public nvinfer1::IPluginV2DynamicExt { public: InstanceNormalizationDynamic(const std::string& name, float epsilon); InstanceNormalizationDynamic(const std::string& name, void const* serialData, size_t serialLength); InstanceNormalizationDynamic() = delete; ~InstanceNormalizationDynamic() override; int getNbOutputs() const override; // DynamicExt plugins returns DimsExprs class instead of Dims nvinfer1::DimsExprs getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs, nvinfer1::IExprBuilder& exprBuilder) override; int initialize() override; void terminate() override; size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int nbInputs, const nvinfer1::PluginTensorDesc* outputs, int nbOutputs) const override; int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) override; size_t getSerializationSize() const override; void serialize(void* buffer) const override; // DynamicExt plugin supportsFormat update. bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc* inOut, int nbInputs, int nbOutputs) override; const char* getPluginType() const override; const char* getPluginVersion() const override; void destroy() override; nvinfer1::IPluginV2DynamicExt* clone() const override; void setPluginNamespace(const char* pluginNamespace) override; const char* getPluginNamespace() const override; nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override; void attachToContext(cudnnContext* cudnn, cublasContext* cublas, nvinfer1::IGpuAllocator* allocator) override; void detachFromContext() override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) override; private: const std::string mLayerName; float mEpsilon{}; cudnnHandle_t _cudnn_handle{}; cudnnTensorDescriptor_t _x_desc{}, _y_desc{}, _b_desc{}; std::string mPluginNamespace{}; }; class InstanceNormalizationDynamicCreator : public nvinfer1::IPluginCreator { public: InstanceNormalizationDynamicCreator(); ~InstanceNormalizationDynamicCreator() override = default; const char* getPluginName() const override; const char* getPluginVersion() const override; const nvinfer1::PluginFieldCollection* getFieldNames() override; nvinfer1::IPluginV2DynamicExt* createPlugin( const char* name, const nvinfer1::PluginFieldCollection* fc) override; nvinfer1::IPluginV2DynamicExt* deserializePlugin( const char* name, const void* serialData, size_t serialLength) override; void setPluginNamespace(const char* pluginNamespace) override; const char* getPluginNamespace() const override; private: static nvinfer1::PluginFieldCollection mFC; static std::vector mPluginAttributes; std::string mNamespace; }; #endif // TRT_INSTANCE_NORMALIZATION_PLUGIN_H ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_modulated_deform_conv.hpp ================================================ #ifndef TRT_MODULATED_DEFORM_CONV_HPP #define TRT_MODULATED_DEFORM_CONV_HPP #include #include #include #include #include "trt_plugin_helper.hpp" class ModulatedDeformableConvPluginDynamic : public nvinfer1::IPluginV2DynamicExt { public: ModulatedDeformableConvPluginDynamic(const std::string &name, const nvinfer1::Dims stride, const nvinfer1::Dims padding, const nvinfer1::Dims dilation, const int deformableGroup, const int group); ModulatedDeformableConvPluginDynamic(const std::string name, const void *data, size_t length); ModulatedDeformableConvPluginDynamic() = delete; ~ModulatedDeformableConvPluginDynamic(); // IPluginV2DynamicExt Methods nvinfer1::IPluginV2DynamicExt *clone() const override; nvinfer1::DimsExprs getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs, nvinfer1::IExprBuilder &exprBuilder) override; bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs, int nbOutputs) override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc *out, int nbOutputs) override; size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs, int nbInputs, const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const override; int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) override; void attachToContext(cudnnContext *cudnnContext, cublasContext *cublasContext, nvinfer1::IGpuAllocator *gpuAllocator) override; void detachFromContext() override; // IPluginV2Ext Methods nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataType *inputTypes, int nbInputs) const override; // IPluginV2 Methods const char *getPluginType() const override; const char *getPluginVersion() const override; int getNbOutputs() const override; int initialize() override; void terminate() override; size_t getSerializationSize() const override; void serialize(void *buffer) const override; void destroy() override; void setPluginNamespace(const char *pluginNamespace) override; const char *getPluginNamespace() const override; private: const std::string mLayerName; std::string mNamespace; nvinfer1::Dims mStride; nvinfer1::Dims mPadding; nvinfer1::Dims mDilation; int mDeformableGroup; int mGroup; bool mWithBias; cublasHandle_t m_cublas_handle; protected: // To prevent compiler warnings. using nvinfer1::IPluginV2DynamicExt::canBroadcastInputAcrossBatch; using nvinfer1::IPluginV2DynamicExt::configurePlugin; using nvinfer1::IPluginV2DynamicExt::enqueue; using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; using nvinfer1::IPluginV2DynamicExt::isOutputBroadcastAcrossBatch; using nvinfer1::IPluginV2DynamicExt::supportsFormat; }; class ModulatedDeformableConvPluginDynamicCreator : public nvinfer1::IPluginCreator { public: ModulatedDeformableConvPluginDynamicCreator(); const char *getPluginName() const override; const char *getPluginVersion() const override; const nvinfer1::PluginFieldCollection *getFieldNames() override; nvinfer1::IPluginV2 *createPlugin( const char *name, const nvinfer1::PluginFieldCollection *fc) override; nvinfer1::IPluginV2 *deserializePlugin(const char *name, const void *serialData, size_t serialLength) override; void setPluginNamespace(const char *pluginNamespace) override; const char *getPluginNamespace() const override; private: static nvinfer1::PluginFieldCollection mFC; static std::vector mPluginAttributes; std::string mNamespace; }; #endif // TRT_MODULATED_DEFORM_CONV_HPP ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_nms.hpp ================================================ #ifndef TRT_NMS_HPP #define TRT_NMS_HPP #include #include #include #include #include "trt_plugin_helper.hpp" class NonMaxSuppressionDynamic : public nvinfer1::IPluginV2DynamicExt { public: NonMaxSuppressionDynamic(const std::string &name, int centerPointBox, int maxOutputBoxesPerClass, float iouThreshold, float scoreThreshold, int offset); NonMaxSuppressionDynamic(const std::string name, const void *data, size_t length); NonMaxSuppressionDynamic() = delete; // IPluginV2DynamicExt Methods nvinfer1::IPluginV2DynamicExt *clone() const override; nvinfer1::DimsExprs getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs, nvinfer1::IExprBuilder &exprBuilder) override; bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs, int nbOutputs) override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc *out, int nbOutputs) override; size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs, int nbInputs, const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const override; int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) override; // IPluginV2Ext Methods nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataType *inputTypes, int nbInputs) const override; // IPluginV2 Methods const char *getPluginType() const override; const char *getPluginVersion() const override; int getNbOutputs() const override; int initialize() override; void terminate() override; size_t getSerializationSize() const override; void serialize(void *buffer) const override; void destroy() override; void setPluginNamespace(const char *pluginNamespace) override; const char *getPluginNamespace() const override; private: const std::string mLayerName; std::string mNamespace; int mCenterPointBox; int mMaxOutputBoxesPerClass; float mIouThreshold; float mScoreThreshold; int mOffset; protected: // To prevent compiler warnings. using nvinfer1::IPluginV2DynamicExt::canBroadcastInputAcrossBatch; using nvinfer1::IPluginV2DynamicExt::configurePlugin; using nvinfer1::IPluginV2DynamicExt::enqueue; using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; using nvinfer1::IPluginV2DynamicExt::isOutputBroadcastAcrossBatch; using nvinfer1::IPluginV2DynamicExt::supportsFormat; }; class NonMaxSuppressionDynamicCreator : public nvinfer1::IPluginCreator { public: NonMaxSuppressionDynamicCreator(); const char *getPluginName() const override; const char *getPluginVersion() const override; const nvinfer1::PluginFieldCollection *getFieldNames() override; nvinfer1::IPluginV2 *createPlugin( const char *name, const nvinfer1::PluginFieldCollection *fc) override; nvinfer1::IPluginV2 *deserializePlugin(const char *name, const void *serialData, size_t serialLength) override; void setPluginNamespace(const char *pluginNamespace) override; const char *getPluginNamespace() const override; private: static nvinfer1::PluginFieldCollection mFC; static std::vector mPluginAttributes; std::string mNamespace; }; #endif // TRT_NMS_HPP ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_plugin.hpp ================================================ #ifndef TRT_PLUGIN_HPP #define TRT_PLUGIN_HPP extern "C" { bool initLibMMCVInferPlugins(); } // extern "C" #endif // TRT_PLUGIN_HPP ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_plugin_helper.hpp ================================================ #ifndef TRT_PLUGIN_HELPER_HPP #define TRT_PLUGIN_HELPER_HPP #include #include "NvInferPlugin.h" namespace mmcv { const int MAXTENSORDIMS = 10; struct TensorDesc { int shape[MAXTENSORDIMS]; int stride[MAXTENSORDIMS]; int dim; }; inline unsigned int getElementSize(nvinfer1::DataType t) { switch (t) { case nvinfer1::DataType::kINT32: return 4; case nvinfer1::DataType::kFLOAT: return 4; case nvinfer1::DataType::kHALF: return 2; // case nvinfer1::DataType::kBOOL: case nvinfer1::DataType::kINT8: return 1; default: throw std::runtime_error("Invalid DataType."); } throw std::runtime_error("Invalid DataType."); return 0; } inline size_t getAlignedSize(size_t origin_size, size_t aligned_number = 16) { return size_t((origin_size + aligned_number - 1) / aligned_number) * aligned_number; } } // namespace mmcv #endif // TRT_PLUGIN_HELPER_HPP ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_roi_align.hpp ================================================ #ifndef TRT_ROI_ALIGN_HPP #define TRT_ROI_ALIGN_HPP #include #include #include #include #include "trt_plugin_helper.hpp" class RoIAlignPluginDynamic : public nvinfer1::IPluginV2DynamicExt { public: RoIAlignPluginDynamic(const std::string &name, int outWidth, int outHeight, float spatialScale, int sampleRatio, int poolMode, bool aligned); RoIAlignPluginDynamic(const std::string name, const void *data, size_t length); RoIAlignPluginDynamic() = delete; // IPluginV2DynamicExt Methods nvinfer1::IPluginV2DynamicExt *clone() const override; nvinfer1::DimsExprs getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs, nvinfer1::IExprBuilder &exprBuilder) override; bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs, int nbOutputs) override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc *out, int nbOutputs) override; size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs, int nbInputs, const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const override; int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) override; // IPluginV2Ext Methods nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataType *inputTypes, int nbInputs) const override; // IPluginV2 Methods const char *getPluginType() const override; const char *getPluginVersion() const override; int getNbOutputs() const override; int initialize() override; void terminate() override; size_t getSerializationSize() const override; void serialize(void *buffer) const override; void destroy() override; void setPluginNamespace(const char *pluginNamespace) override; const char *getPluginNamespace() const override; private: const std::string mLayerName; std::string mNamespace; int mOutWidth; int mOutHeight; float mSpatialScale; int mSampleRatio; int mPoolMode; // 1:avg 0:max bool mAligned; protected: // To prevent compiler warnings. using nvinfer1::IPluginV2DynamicExt::canBroadcastInputAcrossBatch; using nvinfer1::IPluginV2DynamicExt::configurePlugin; using nvinfer1::IPluginV2DynamicExt::enqueue; using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; using nvinfer1::IPluginV2DynamicExt::isOutputBroadcastAcrossBatch; using nvinfer1::IPluginV2DynamicExt::supportsFormat; }; class RoIAlignPluginDynamicCreator : public nvinfer1::IPluginCreator { public: RoIAlignPluginDynamicCreator(); const char *getPluginName() const override; const char *getPluginVersion() const override; const nvinfer1::PluginFieldCollection *getFieldNames() override; nvinfer1::IPluginV2 *createPlugin( const char *name, const nvinfer1::PluginFieldCollection *fc) override; nvinfer1::IPluginV2 *deserializePlugin(const char *name, const void *serialData, size_t serialLength) override; void setPluginNamespace(const char *pluginNamespace) override; const char *getPluginNamespace() const override; private: static nvinfer1::PluginFieldCollection mFC; static std::vector mPluginAttributes; std::string mNamespace; }; #endif // TRT_ROI_ALIGN_HPP ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_scatternd.hpp ================================================ #ifndef TRT_SCATTERND_HPP #define TRT_SCATTERND_HPP #include #include #include #include #include "trt_plugin_helper.hpp" class ONNXScatterNDDynamic : public nvinfer1::IPluginV2DynamicExt { public: ONNXScatterNDDynamic(const std::string &name); ONNXScatterNDDynamic(const std::string name, const void *data, size_t length); ONNXScatterNDDynamic() = delete; // IPluginV2DynamicExt Methods nvinfer1::IPluginV2DynamicExt *clone() const override; nvinfer1::DimsExprs getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs, nvinfer1::IExprBuilder &exprBuilder) override; bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs, int nbOutputs) override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc *out, int nbOutputs) override; size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs, int nbInputs, const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const override; int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) override; // IPluginV2Ext Methods nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataType *inputTypes, int nbInputs) const override; // IPluginV2 Methods const char *getPluginType() const override; const char *getPluginVersion() const override; int getNbOutputs() const override; int initialize() override; void terminate() override; size_t getSerializationSize() const override; void serialize(void *buffer) const override; void destroy() override; void setPluginNamespace(const char *pluginNamespace) override; const char *getPluginNamespace() const override; private: const std::string mLayerName; std::string mNamespace; protected: // To prevent compiler warnings. using nvinfer1::IPluginV2DynamicExt::canBroadcastInputAcrossBatch; using nvinfer1::IPluginV2DynamicExt::configurePlugin; using nvinfer1::IPluginV2DynamicExt::enqueue; using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; using nvinfer1::IPluginV2DynamicExt::isOutputBroadcastAcrossBatch; using nvinfer1::IPluginV2DynamicExt::supportsFormat; }; class ONNXScatterNDDynamicCreator : public nvinfer1::IPluginCreator { public: ONNXScatterNDDynamicCreator(); const char *getPluginName() const override; const char *getPluginVersion() const override; const nvinfer1::PluginFieldCollection *getFieldNames() override; nvinfer1::IPluginV2 *createPlugin( const char *name, const nvinfer1::PluginFieldCollection *fc) override; nvinfer1::IPluginV2 *deserializePlugin(const char *name, const void *serialData, size_t serialLength) override; void setPluginNamespace(const char *pluginNamespace) override; const char *getPluginNamespace() const override; private: static nvinfer1::PluginFieldCollection mFC; static std::vector mPluginAttributes; std::string mNamespace; }; #endif // TRT_SCATTERND_HPP ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_serialize.hpp ================================================ // Modified from: // https://github.com/NVIDIA/TensorRT/blob/master/plugin/common/serialize.hpp #ifndef TRT_SERIALIZE_HPP #define TRT_SERIALIZE_HPP #include #include #include #include #include using std::cerr; using std::cout; using std::endl; template inline void serialize_value(void** buffer, T const& value); template inline void deserialize_value(void const** buffer, size_t* buffer_size, T* value); namespace { template struct Serializer {}; template struct Serializer::value || std::is_enum::value || std::is_pod::value>::type> { static size_t serialized_size(T const& value) { return sizeof(T); } static void serialize(void** buffer, T const& value) { ::memcpy(*buffer, &value, sizeof(T)); reinterpret_cast(*buffer) += sizeof(T); } static void deserialize(void const** buffer, size_t* buffer_size, T* value) { assert(*buffer_size >= sizeof(T)); ::memcpy(value, *buffer, sizeof(T)); reinterpret_cast(*buffer) += sizeof(T); *buffer_size -= sizeof(T); } }; template <> struct Serializer { static size_t serialized_size(const char* value) { return strlen(value) + 1; } static void serialize(void** buffer, const char* value) { ::strcpy(static_cast(*buffer), value); reinterpret_cast(*buffer) += strlen(value) + 1; } static void deserialize(void const** buffer, size_t* buffer_size, const char** value) { *value = static_cast(*buffer); size_t data_size = strnlen(*value, *buffer_size) + 1; assert(*buffer_size >= data_size); reinterpret_cast(*buffer) += data_size; *buffer_size -= data_size; } }; template struct Serializer, typename std::enable_if::value || std::is_enum::value || std::is_pod::value>::type> { static size_t serialized_size(std::vector const& value) { return sizeof(value.size()) + value.size() * sizeof(T); } static void serialize(void** buffer, std::vector const& value) { serialize_value(buffer, value.size()); size_t nbyte = value.size() * sizeof(T); ::memcpy(*buffer, value.data(), nbyte); reinterpret_cast(*buffer) += nbyte; } static void deserialize(void const** buffer, size_t* buffer_size, std::vector* value) { size_t size; deserialize_value(buffer, buffer_size, &size); value->resize(size); size_t nbyte = value->size() * sizeof(T); assert(*buffer_size >= nbyte); ::memcpy(value->data(), *buffer, nbyte); reinterpret_cast(*buffer) += nbyte; *buffer_size -= nbyte; } }; } // namespace template inline size_t serialized_size(T const& value) { return Serializer::serialized_size(value); } template inline void serialize_value(void** buffer, T const& value) { return Serializer::serialize(buffer, value); } template inline void deserialize_value(void const** buffer, size_t* buffer_size, T* value) { return Serializer::deserialize(buffer, buffer_size, value); } #endif // TRT_SERIALIZE_HPP ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/deform_conv.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from typing import Tuple, Union import torch import torch.nn as nn import torch.nn.functional as F from torch import Tensor from torch.autograd import Function from torch.autograd.function import once_differentiable from torch.nn.modules.utils import _pair, _single from mmcv.utils import deprecated_api_warning from ..cnn import CONV_LAYERS from ..utils import ext_loader, print_log ext_module = ext_loader.load_ext('_ext', [ 'deform_conv_forward', 'deform_conv_backward_input', 'deform_conv_backward_parameters' ]) class DeformConv2dFunction(Function): @staticmethod def symbolic(g, input, offset, weight, stride, padding, dilation, groups, deform_groups, bias=False, im2col_step=32): return g.op( 'mmcv::MMCVDeformConv2d', input, offset, weight, stride_i=stride, padding_i=padding, dilation_i=dilation, groups_i=groups, deform_groups_i=deform_groups, bias_i=bias, im2col_step_i=im2col_step) @staticmethod def forward(ctx, input, offset, weight, stride=1, padding=0, dilation=1, groups=1, deform_groups=1, bias=False, im2col_step=32): if input is not None and input.dim() != 4: raise ValueError( f'Expected 4D tensor as input, got {input.dim()}D tensor \ instead.') assert bias is False, 'Only support bias is False.' ctx.stride = _pair(stride) ctx.padding = _pair(padding) ctx.dilation = _pair(dilation) ctx.groups = groups ctx.deform_groups = deform_groups ctx.im2col_step = im2col_step # When pytorch version >= 1.6.0, amp is adopted for fp16 mode; # amp won't cast the type of model (float32), but "offset" is cast # to float16 by nn.Conv2d automatically, leading to the type # mismatch with input (when it is float32) or weight. # The flag for whether to use fp16 or amp is the type of "offset", # we cast weight and input to temporarily support fp16 and amp # whatever the pytorch version is. input = input.type_as(offset) weight = weight.type_as(input) ctx.save_for_backward(input, offset, weight) output = input.new_empty( DeformConv2dFunction._output_size(ctx, input, weight)) ctx.bufs_ = [input.new_empty(0), input.new_empty(0)] # columns, ones cur_im2col_step = min(ctx.im2col_step, input.size(0)) assert (input.size(0) % cur_im2col_step ) == 0, 'batch size must be divisible by im2col_step' ext_module.deform_conv_forward( input, weight, offset, output, ctx.bufs_[0], ctx.bufs_[1], kW=weight.size(3), kH=weight.size(2), dW=ctx.stride[1], dH=ctx.stride[0], padW=ctx.padding[1], padH=ctx.padding[0], dilationW=ctx.dilation[1], dilationH=ctx.dilation[0], group=ctx.groups, deformable_group=ctx.deform_groups, im2col_step=cur_im2col_step) return output @staticmethod @once_differentiable def backward(ctx, grad_output): input, offset, weight = ctx.saved_tensors grad_input = grad_offset = grad_weight = None cur_im2col_step = min(ctx.im2col_step, input.size(0)) assert (input.size(0) % cur_im2col_step ) == 0, 'batch size must be divisible by im2col_step' grad_output = grad_output.contiguous() if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]: grad_input = torch.zeros_like(input) grad_offset = torch.zeros_like(offset) ext_module.deform_conv_backward_input( input, offset, grad_output, grad_input, grad_offset, weight, ctx.bufs_[0], kW=weight.size(3), kH=weight.size(2), dW=ctx.stride[1], dH=ctx.stride[0], padW=ctx.padding[1], padH=ctx.padding[0], dilationW=ctx.dilation[1], dilationH=ctx.dilation[0], group=ctx.groups, deformable_group=ctx.deform_groups, im2col_step=cur_im2col_step) if ctx.needs_input_grad[2]: grad_weight = torch.zeros_like(weight) ext_module.deform_conv_backward_parameters( input, offset, grad_output, grad_weight, ctx.bufs_[0], ctx.bufs_[1], kW=weight.size(3), kH=weight.size(2), dW=ctx.stride[1], dH=ctx.stride[0], padW=ctx.padding[1], padH=ctx.padding[0], dilationW=ctx.dilation[1], dilationH=ctx.dilation[0], group=ctx.groups, deformable_group=ctx.deform_groups, scale=1, im2col_step=cur_im2col_step) return grad_input, grad_offset, grad_weight, \ None, None, None, None, None, None, None @staticmethod def _output_size(ctx, input, weight): channels = weight.size(0) output_size = (input.size(0), channels) for d in range(input.dim() - 2): in_size = input.size(d + 2) pad = ctx.padding[d] kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1 stride_ = ctx.stride[d] output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, ) if not all(map(lambda s: s > 0, output_size)): raise ValueError( 'convolution input is too small (output would be ' + 'x'.join(map(str, output_size)) + ')') return output_size deform_conv2d = DeformConv2dFunction.apply class DeformConv2d(nn.Module): r"""Deformable 2D convolution. Applies a deformable 2D convolution over an input signal composed of several input planes. DeformConv2d was described in the paper `Deformable Convolutional Networks `_ Note: The argument ``im2col_step`` was added in version 1.3.17, which means number of samples processed by the ``im2col_cuda_kernel`` per call. It enables users to define ``batch_size`` and ``im2col_step`` more flexibly and solved `issue mmcv#1440 `_. Args: in_channels (int): Number of channels in the input image. out_channels (int): Number of channels produced by the convolution. kernel_size(int, tuple): Size of the convolving kernel. stride(int, tuple): Stride of the convolution. Default: 1. padding (int or tuple): Zero-padding added to both sides of the input. Default: 0. dilation (int or tuple): Spacing between kernel elements. Default: 1. groups (int): Number of blocked connections from input. channels to output channels. Default: 1. deform_groups (int): Number of deformable group partitions. bias (bool): If True, adds a learnable bias to the output. Default: False. im2col_step (int): Number of samples processed by im2col_cuda_kernel per call. It will work when ``batch_size`` > ``im2col_step``, but ``batch_size`` must be divisible by ``im2col_step``. Default: 32. `New in version 1.3.17.` """ @deprecated_api_warning({'deformable_groups': 'deform_groups'}, cls_name='DeformConv2d') def __init__(self, in_channels: int, out_channels: int, kernel_size: Union[int, Tuple[int, ...]], stride: Union[int, Tuple[int, ...]] = 1, padding: Union[int, Tuple[int, ...]] = 0, dilation: Union[int, Tuple[int, ...]] = 1, groups: int = 1, deform_groups: int = 1, bias: bool = False, im2col_step: int = 32) -> None: super(DeformConv2d, self).__init__() assert not bias, \ f'bias={bias} is not supported in DeformConv2d.' assert in_channels % groups == 0, \ f'in_channels {in_channels} cannot be divisible by groups {groups}' assert out_channels % groups == 0, \ f'out_channels {out_channels} cannot be divisible by groups \ {groups}' self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = _pair(kernel_size) self.stride = _pair(stride) self.padding = _pair(padding) self.dilation = _pair(dilation) self.groups = groups self.deform_groups = deform_groups self.im2col_step = im2col_step # enable compatibility with nn.Conv2d self.transposed = False self.output_padding = _single(0) # only weight, no bias self.weight = nn.Parameter( torch.Tensor(out_channels, in_channels // self.groups, *self.kernel_size)) self.reset_parameters() def reset_parameters(self): # switch the initialization of `self.weight` to the standard kaiming # method described in `Delving deep into rectifiers: Surpassing # human-level performance on ImageNet classification` - He, K. et al. # (2015), using a uniform distribution nn.init.kaiming_uniform_(self.weight, nonlinearity='relu') def forward(self, x: Tensor, offset: Tensor) -> Tensor: """Deformable Convolutional forward function. Args: x (Tensor): Input feature, shape (B, C_in, H_in, W_in) offset (Tensor): Offset for deformable convolution, shape (B, deform_groups*kernel_size[0]*kernel_size[1]*2, H_out, W_out), H_out, W_out are equal to the output's. An offset is like `[y0, x0, y1, x1, y2, x2, ..., y8, x8]`. The spatial arrangement is like: .. code:: text (x0, y0) (x1, y1) (x2, y2) (x3, y3) (x4, y4) (x5, y5) (x6, y6) (x7, y7) (x8, y8) Returns: Tensor: Output of the layer. """ # To fix an assert error in deform_conv_cuda.cpp:128 # input image is smaller than kernel input_pad = (x.size(2) < self.kernel_size[0]) or (x.size(3) < self.kernel_size[1]) if input_pad: pad_h = max(self.kernel_size[0] - x.size(2), 0) pad_w = max(self.kernel_size[1] - x.size(3), 0) x = F.pad(x, (0, pad_w, 0, pad_h), 'constant', 0).contiguous() offset = F.pad(offset, (0, pad_w, 0, pad_h), 'constant', 0) offset = offset.contiguous() out = deform_conv2d(x, offset, self.weight, self.stride, self.padding, self.dilation, self.groups, self.deform_groups, False, self.im2col_step) if input_pad: out = out[:, :, :out.size(2) - pad_h, :out.size(3) - pad_w].contiguous() return out def __repr__(self): s = self.__class__.__name__ s += f'(in_channels={self.in_channels},\n' s += f'out_channels={self.out_channels},\n' s += f'kernel_size={self.kernel_size},\n' s += f'stride={self.stride},\n' s += f'padding={self.padding},\n' s += f'dilation={self.dilation},\n' s += f'groups={self.groups},\n' s += f'deform_groups={self.deform_groups},\n' # bias is not supported in DeformConv2d. s += 'bias=False)' return s @CONV_LAYERS.register_module('DCN') class DeformConv2dPack(DeformConv2d): """A Deformable Conv Encapsulation that acts as normal Conv layers. The offset tensor is like `[y0, x0, y1, x1, y2, x2, ..., y8, x8]`. The spatial arrangement is like: .. code:: text (x0, y0) (x1, y1) (x2, y2) (x3, y3) (x4, y4) (x5, y5) (x6, y6) (x7, y7) (x8, y8) Args: in_channels (int): Same as nn.Conv2d. out_channels (int): Same as nn.Conv2d. kernel_size (int or tuple[int]): Same as nn.Conv2d. stride (int or tuple[int]): Same as nn.Conv2d. padding (int or tuple[int]): Same as nn.Conv2d. dilation (int or tuple[int]): Same as nn.Conv2d. groups (int): Same as nn.Conv2d. bias (bool or str): If specified as `auto`, it will be decided by the norm_cfg. Bias will be set as True if norm_cfg is None, otherwise False. """ _version = 2 def __init__(self, *args, **kwargs): super(DeformConv2dPack, self).__init__(*args, **kwargs) self.conv_offset = nn.Conv2d( self.in_channels, self.deform_groups * 2 * self.kernel_size[0] * self.kernel_size[1], kernel_size=self.kernel_size, stride=_pair(self.stride), padding=_pair(self.padding), dilation=_pair(self.dilation), bias=True) self.init_offset() def init_offset(self): self.conv_offset.weight.data.zero_() self.conv_offset.bias.data.zero_() def forward(self, x): offset = self.conv_offset(x) return deform_conv2d(x, offset, self.weight, self.stride, self.padding, self.dilation, self.groups, self.deform_groups, False, self.im2col_step) def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs): version = local_metadata.get('version', None) if version is None or version < 2: # the key is different in early versions # In version < 2, DeformConvPack loads previous benchmark models. if (prefix + 'conv_offset.weight' not in state_dict and prefix[:-1] + '_offset.weight' in state_dict): state_dict[prefix + 'conv_offset.weight'] = state_dict.pop( prefix[:-1] + '_offset.weight') if (prefix + 'conv_offset.bias' not in state_dict and prefix[:-1] + '_offset.bias' in state_dict): state_dict[prefix + 'conv_offset.bias'] = state_dict.pop(prefix[:-1] + '_offset.bias') if version is not None and version > 1: print_log( f'DeformConv2dPack {prefix.rstrip(".")} is upgraded to ' 'version 2.', logger='root') super()._load_from_state_dict(state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/deform_roi_pool.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from torch import nn from torch.autograd import Function from torch.autograd.function import once_differentiable from torch.nn.modules.utils import _pair from ..utils import ext_loader ext_module = ext_loader.load_ext( '_ext', ['deform_roi_pool_forward', 'deform_roi_pool_backward']) class DeformRoIPoolFunction(Function): @staticmethod def symbolic(g, input, rois, offset, output_size, spatial_scale, sampling_ratio, gamma): return g.op( 'mmcv::MMCVDeformRoIPool', input, rois, offset, pooled_height_i=output_size[0], pooled_width_i=output_size[1], spatial_scale_f=spatial_scale, sampling_ratio_f=sampling_ratio, gamma_f=gamma) @staticmethod def forward(ctx, input, rois, offset, output_size, spatial_scale=1.0, sampling_ratio=0, gamma=0.1): if offset is None: offset = input.new_zeros(0) ctx.output_size = _pair(output_size) ctx.spatial_scale = float(spatial_scale) ctx.sampling_ratio = int(sampling_ratio) ctx.gamma = float(gamma) assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!' output_shape = (rois.size(0), input.size(1), ctx.output_size[0], ctx.output_size[1]) output = input.new_zeros(output_shape) ext_module.deform_roi_pool_forward( input, rois, offset, output, pooled_height=ctx.output_size[0], pooled_width=ctx.output_size[1], spatial_scale=ctx.spatial_scale, sampling_ratio=ctx.sampling_ratio, gamma=ctx.gamma) ctx.save_for_backward(input, rois, offset) return output @staticmethod @once_differentiable def backward(ctx, grad_output): input, rois, offset = ctx.saved_tensors grad_input = grad_output.new_zeros(input.shape) grad_offset = grad_output.new_zeros(offset.shape) ext_module.deform_roi_pool_backward( grad_output, input, rois, offset, grad_input, grad_offset, pooled_height=ctx.output_size[0], pooled_width=ctx.output_size[1], spatial_scale=ctx.spatial_scale, sampling_ratio=ctx.sampling_ratio, gamma=ctx.gamma) if grad_offset.numel() == 0: grad_offset = None return grad_input, None, grad_offset, None, None, None, None deform_roi_pool = DeformRoIPoolFunction.apply class DeformRoIPool(nn.Module): def __init__(self, output_size, spatial_scale=1.0, sampling_ratio=0, gamma=0.1): super(DeformRoIPool, self).__init__() self.output_size = _pair(output_size) self.spatial_scale = float(spatial_scale) self.sampling_ratio = int(sampling_ratio) self.gamma = float(gamma) def forward(self, input, rois, offset=None): return deform_roi_pool(input, rois, offset, self.output_size, self.spatial_scale, self.sampling_ratio, self.gamma) class DeformRoIPoolPack(DeformRoIPool): def __init__(self, output_size, output_channels, deform_fc_channels=1024, spatial_scale=1.0, sampling_ratio=0, gamma=0.1): super(DeformRoIPoolPack, self).__init__(output_size, spatial_scale, sampling_ratio, gamma) self.output_channels = output_channels self.deform_fc_channels = deform_fc_channels self.offset_fc = nn.Sequential( nn.Linear( self.output_size[0] * self.output_size[1] * self.output_channels, self.deform_fc_channels), nn.ReLU(inplace=True), nn.Linear(self.deform_fc_channels, self.deform_fc_channels), nn.ReLU(inplace=True), nn.Linear(self.deform_fc_channels, self.output_size[0] * self.output_size[1] * 2)) self.offset_fc[-1].weight.data.zero_() self.offset_fc[-1].bias.data.zero_() def forward(self, input, rois): assert input.size(1) == self.output_channels x = deform_roi_pool(input, rois, None, self.output_size, self.spatial_scale, self.sampling_ratio, self.gamma) rois_num = rois.size(0) offset = self.offset_fc(x.view(rois_num, -1)) offset = offset.view(rois_num, 2, self.output_size[0], self.output_size[1]) return deform_roi_pool(input, rois, offset, self.output_size, self.spatial_scale, self.sampling_ratio, self.gamma) class ModulatedDeformRoIPoolPack(DeformRoIPool): def __init__(self, output_size, output_channels, deform_fc_channels=1024, spatial_scale=1.0, sampling_ratio=0, gamma=0.1): super(ModulatedDeformRoIPoolPack, self).__init__(output_size, spatial_scale, sampling_ratio, gamma) self.output_channels = output_channels self.deform_fc_channels = deform_fc_channels self.offset_fc = nn.Sequential( nn.Linear( self.output_size[0] * self.output_size[1] * self.output_channels, self.deform_fc_channels), nn.ReLU(inplace=True), nn.Linear(self.deform_fc_channels, self.deform_fc_channels), nn.ReLU(inplace=True), nn.Linear(self.deform_fc_channels, self.output_size[0] * self.output_size[1] * 2)) self.offset_fc[-1].weight.data.zero_() self.offset_fc[-1].bias.data.zero_() self.mask_fc = nn.Sequential( nn.Linear( self.output_size[0] * self.output_size[1] * self.output_channels, self.deform_fc_channels), nn.ReLU(inplace=True), nn.Linear(self.deform_fc_channels, self.output_size[0] * self.output_size[1] * 1), nn.Sigmoid()) self.mask_fc[2].weight.data.zero_() self.mask_fc[2].bias.data.zero_() def forward(self, input, rois): assert input.size(1) == self.output_channels x = deform_roi_pool(input, rois, None, self.output_size, self.spatial_scale, self.sampling_ratio, self.gamma) rois_num = rois.size(0) offset = self.offset_fc(x.view(rois_num, -1)) offset = offset.view(rois_num, 2, self.output_size[0], self.output_size[1]) mask = self.mask_fc(x.view(rois_num, -1)) mask = mask.view(rois_num, 1, self.output_size[0], self.output_size[1]) d = deform_roi_pool(input, rois, offset, self.output_size, self.spatial_scale, self.sampling_ratio, self.gamma) return d * mask ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/deprecated_wrappers.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. # This file is for backward compatibility. # Module wrappers for empty tensor have been moved to mmcv.cnn.bricks. import warnings from ..cnn.bricks.wrappers import Conv2d, ConvTranspose2d, Linear, MaxPool2d class Conv2d_deprecated(Conv2d): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) warnings.warn( 'Importing Conv2d wrapper from "mmcv.ops" will be deprecated in' ' the future. Please import them from "mmcv.cnn" instead', DeprecationWarning) class ConvTranspose2d_deprecated(ConvTranspose2d): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) warnings.warn( 'Importing ConvTranspose2d wrapper from "mmcv.ops" will be ' 'deprecated in the future. Please import them from "mmcv.cnn" ' 'instead', DeprecationWarning) class MaxPool2d_deprecated(MaxPool2d): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) warnings.warn( 'Importing MaxPool2d wrapper from "mmcv.ops" will be deprecated in' ' the future. Please import them from "mmcv.cnn" instead', DeprecationWarning) class Linear_deprecated(Linear): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) warnings.warn( 'Importing Linear wrapper from "mmcv.ops" will be deprecated in' ' the future. Please import them from "mmcv.cnn" instead', DeprecationWarning) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/focal_loss.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch import torch.nn as nn from torch.autograd import Function from torch.autograd.function import once_differentiable from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', [ 'sigmoid_focal_loss_forward', 'sigmoid_focal_loss_backward', 'softmax_focal_loss_forward', 'softmax_focal_loss_backward' ]) class SigmoidFocalLossFunction(Function): @staticmethod def symbolic(g, input, target, gamma, alpha, weight, reduction): return g.op( 'mmcv::MMCVSigmoidFocalLoss', input, target, gamma_f=gamma, alpha_f=alpha, weight_f=weight, reduction_s=reduction) @staticmethod def forward(ctx, input, target, gamma=2.0, alpha=0.25, weight=None, reduction='mean'): assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor)) assert input.dim() == 2 assert target.dim() == 1 assert input.size(0) == target.size(0) if weight is None: weight = input.new_empty(0) else: assert weight.dim() == 1 assert input.size(1) == weight.size(0) ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2} assert reduction in ctx.reduction_dict.keys() ctx.gamma = float(gamma) ctx.alpha = float(alpha) ctx.reduction = ctx.reduction_dict[reduction] output = input.new_zeros(input.size()) ext_module.sigmoid_focal_loss_forward( input, target, weight, output, gamma=ctx.gamma, alpha=ctx.alpha) if ctx.reduction == ctx.reduction_dict['mean']: output = output.sum() / input.size(0) elif ctx.reduction == ctx.reduction_dict['sum']: output = output.sum() ctx.save_for_backward(input, target, weight) return output @staticmethod @once_differentiable def backward(ctx, grad_output): input, target, weight = ctx.saved_tensors grad_input = input.new_zeros(input.size()) ext_module.sigmoid_focal_loss_backward( input, target, weight, grad_input, gamma=ctx.gamma, alpha=ctx.alpha) grad_input *= grad_output if ctx.reduction == ctx.reduction_dict['mean']: grad_input /= input.size(0) return grad_input, None, None, None, None, None sigmoid_focal_loss = SigmoidFocalLossFunction.apply class SigmoidFocalLoss(nn.Module): def __init__(self, gamma, alpha, weight=None, reduction='mean'): super(SigmoidFocalLoss, self).__init__() self.gamma = gamma self.alpha = alpha self.register_buffer('weight', weight) self.reduction = reduction def forward(self, input, target): return sigmoid_focal_loss(input, target, self.gamma, self.alpha, self.weight, self.reduction) def __repr__(self): s = self.__class__.__name__ s += f'(gamma={self.gamma}, ' s += f'alpha={self.alpha}, ' s += f'reduction={self.reduction})' return s class SoftmaxFocalLossFunction(Function): @staticmethod def symbolic(g, input, target, gamma, alpha, weight, reduction): return g.op( 'mmcv::MMCVSoftmaxFocalLoss', input, target, gamma_f=gamma, alpha_f=alpha, weight_f=weight, reduction_s=reduction) @staticmethod def forward(ctx, input, target, gamma=2.0, alpha=0.25, weight=None, reduction='mean'): assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor)) assert input.dim() == 2 assert target.dim() == 1 assert input.size(0) == target.size(0) if weight is None: weight = input.new_empty(0) else: assert weight.dim() == 1 assert input.size(1) == weight.size(0) ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2} assert reduction in ctx.reduction_dict.keys() ctx.gamma = float(gamma) ctx.alpha = float(alpha) ctx.reduction = ctx.reduction_dict[reduction] channel_stats, _ = torch.max(input, dim=1) input_softmax = input - channel_stats.unsqueeze(1).expand_as(input) input_softmax.exp_() channel_stats = input_softmax.sum(dim=1) input_softmax /= channel_stats.unsqueeze(1).expand_as(input) output = input.new_zeros(input.size(0)) ext_module.softmax_focal_loss_forward( input_softmax, target, weight, output, gamma=ctx.gamma, alpha=ctx.alpha) if ctx.reduction == ctx.reduction_dict['mean']: output = output.sum() / input.size(0) elif ctx.reduction == ctx.reduction_dict['sum']: output = output.sum() ctx.save_for_backward(input_softmax, target, weight) return output @staticmethod def backward(ctx, grad_output): input_softmax, target, weight = ctx.saved_tensors buff = input_softmax.new_zeros(input_softmax.size(0)) grad_input = input_softmax.new_zeros(input_softmax.size()) ext_module.softmax_focal_loss_backward( input_softmax, target, weight, buff, grad_input, gamma=ctx.gamma, alpha=ctx.alpha) grad_input *= grad_output if ctx.reduction == ctx.reduction_dict['mean']: grad_input /= input_softmax.size(0) return grad_input, None, None, None, None, None softmax_focal_loss = SoftmaxFocalLossFunction.apply class SoftmaxFocalLoss(nn.Module): def __init__(self, gamma, alpha, weight=None, reduction='mean'): super(SoftmaxFocalLoss, self).__init__() self.gamma = gamma self.alpha = alpha self.register_buffer('weight', weight) self.reduction = reduction def forward(self, input, target): return softmax_focal_loss(input, target, self.gamma, self.alpha, self.weight, self.reduction) def __repr__(self): s = self.__class__.__name__ s += f'(gamma={self.gamma}, ' s += f'alpha={self.alpha}, ' s += f'reduction={self.reduction})' return s ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/furthest_point_sample.py ================================================ import torch from torch.autograd import Function from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', [ 'furthest_point_sampling_forward', 'furthest_point_sampling_with_dist_forward' ]) class FurthestPointSampling(Function): """Uses iterative furthest point sampling to select a set of features whose corresponding points have the furthest distance.""" @staticmethod def forward(ctx, points_xyz: torch.Tensor, num_points: int) -> torch.Tensor: """ Args: points_xyz (torch.Tensor): (B, N, 3) where N > num_points. num_points (int): Number of points in the sampled set. Returns: torch.Tensor: (B, num_points) indices of the sampled points. """ assert points_xyz.is_contiguous() B, N = points_xyz.size()[:2] output = torch.cuda.IntTensor(B, num_points) temp = torch.cuda.FloatTensor(B, N).fill_(1e10) ext_module.furthest_point_sampling_forward( points_xyz, temp, output, b=B, n=N, m=num_points, ) if torch.__version__ != 'parrots': ctx.mark_non_differentiable(output) return output @staticmethod def backward(xyz, a=None): return None, None class FurthestPointSamplingWithDist(Function): """Uses iterative furthest point sampling to select a set of features whose corresponding points have the furthest distance.""" @staticmethod def forward(ctx, points_dist: torch.Tensor, num_points: int) -> torch.Tensor: """ Args: points_dist (torch.Tensor): (B, N, N) Distance between each point pair. num_points (int): Number of points in the sampled set. Returns: torch.Tensor: (B, num_points) indices of the sampled points. """ assert points_dist.is_contiguous() B, N, _ = points_dist.size() output = points_dist.new_zeros([B, num_points], dtype=torch.int32) temp = points_dist.new_zeros([B, N]).fill_(1e10) ext_module.furthest_point_sampling_with_dist_forward( points_dist, temp, output, b=B, n=N, m=num_points) if torch.__version__ != 'parrots': ctx.mark_non_differentiable(output) return output @staticmethod def backward(xyz, a=None): return None, None furthest_point_sample = FurthestPointSampling.apply furthest_point_sample_with_dist = FurthestPointSamplingWithDist.apply ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/fused_bias_leakyrelu.py ================================================ # modified from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_act.py # noqa:E501 # Copyright (c) 2021, NVIDIA Corporation. All rights reserved. # NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator # Augmentation (ADA) # ======================================================================= # 1. Definitions # "Licensor" means any person or entity that distributes its Work. # "Software" means the original work of authorship made available under # this License. # "Work" means the Software and any additions to or derivative works of # the Software that are made available under this License. # The terms "reproduce," "reproduction," "derivative works," and # "distribution" have the meaning as provided under U.S. copyright law; # provided, however, that for the purposes of this License, derivative # works shall not include works that remain separable from, or merely # link (or bind by name) to the interfaces of, the Work. # Works, including the Software, are "made available" under this License # by including in or with the Work either (a) a copyright notice # referencing the applicability of this License to the Work, or (b) a # copy of this License. # 2. License Grants # 2.1 Copyright Grant. Subject to the terms and conditions of this # License, each Licensor grants to you a perpetual, worldwide, # non-exclusive, royalty-free, copyright license to reproduce, # prepare derivative works of, publicly display, publicly perform, # sublicense and distribute its Work and any resulting derivative # works in any form. # 3. Limitations # 3.1 Redistribution. You may reproduce or distribute the Work only # if (a) you do so under this License, (b) you include a complete # copy of this License with your distribution, and (c) you retain # without modification any copyright, patent, trademark, or # attribution notices that are present in the Work. # 3.2 Derivative Works. You may specify that additional or different # terms apply to the use, reproduction, and distribution of your # derivative works of the Work ("Your Terms") only if (a) Your Terms # provide that the use limitation in Section 3.3 applies to your # derivative works, and (b) you identify the specific derivative # works that are subject to Your Terms. Notwithstanding Your Terms, # this License (including the redistribution requirements in Section # 3.1) will continue to apply to the Work itself. # 3.3 Use Limitation. The Work and any derivative works thereof only # may be used or intended for use non-commercially. Notwithstanding # the foregoing, NVIDIA and its affiliates may use the Work and any # derivative works commercially. As used herein, "non-commercially" # means for research or evaluation purposes only. # 3.4 Patent Claims. If you bring or threaten to bring a patent claim # against any Licensor (including any claim, cross-claim or # counterclaim in a lawsuit) to enforce any patents that you allege # are infringed by any Work, then your rights under this License from # such Licensor (including the grant in Section 2.1) will terminate # immediately. # 3.5 Trademarks. This License does not grant any rights to use any # Licensor’s or its affiliates’ names, logos, or trademarks, except # as necessary to reproduce the notices described in this License. # 3.6 Termination. If you violate any term of this License, then your # rights under this License (including the grant in Section 2.1) will # terminate immediately. # 4. Disclaimer of Warranty. # THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR # NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER # THIS LICENSE. # 5. Limitation of Liability. # EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL # THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE # SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, # INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF # OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK # (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, # LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER # COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF # THE POSSIBILITY OF SUCH DAMAGES. # ======================================================================= import torch import torch.nn.functional as F from torch import nn from torch.autograd import Function from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', ['fused_bias_leakyrelu']) class FusedBiasLeakyReLUFunctionBackward(Function): """Calculate second order deviation. This function is to compute the second order deviation for the fused leaky relu operation. """ @staticmethod def forward(ctx, grad_output, out, negative_slope, scale): ctx.save_for_backward(out) ctx.negative_slope = negative_slope ctx.scale = scale empty = grad_output.new_empty(0) grad_input = ext_module.fused_bias_leakyrelu( grad_output, empty, out, act=3, grad=1, alpha=negative_slope, scale=scale) dim = [0] if grad_input.ndim > 2: dim += list(range(2, grad_input.ndim)) grad_bias = grad_input.sum(dim).detach() return grad_input, grad_bias @staticmethod def backward(ctx, gradgrad_input, gradgrad_bias): out, = ctx.saved_tensors # The second order deviation, in fact, contains two parts, while the # the first part is zero. Thus, we direct consider the second part # which is similar with the first order deviation in implementation. gradgrad_out = ext_module.fused_bias_leakyrelu( gradgrad_input, gradgrad_bias.to(out.dtype), out, act=3, grad=1, alpha=ctx.negative_slope, scale=ctx.scale) return gradgrad_out, None, None, None class FusedBiasLeakyReLUFunction(Function): @staticmethod def forward(ctx, input, bias, negative_slope, scale): empty = input.new_empty(0) out = ext_module.fused_bias_leakyrelu( input, bias, empty, act=3, grad=0, alpha=negative_slope, scale=scale) ctx.save_for_backward(out) ctx.negative_slope = negative_slope ctx.scale = scale return out @staticmethod def backward(ctx, grad_output): out, = ctx.saved_tensors grad_input, grad_bias = FusedBiasLeakyReLUFunctionBackward.apply( grad_output, out, ctx.negative_slope, ctx.scale) return grad_input, grad_bias, None, None class FusedBiasLeakyReLU(nn.Module): r"""Fused bias leaky ReLU. This function is introduced in the StyleGAN2: `Analyzing and Improving the Image Quality of StyleGAN `_ The bias term comes from the convolution operation. In addition, to keep the variance of the feature map or gradients unchanged, they also adopt a scale similarly with Kaiming initialization. However, since the :math:`1+{alpha}^2` is too small, we can just ignore it. Therefore, the final scale is just :math:`\sqrt{2}`. Of course, you may change it with your own scale. TODO: Implement the CPU version. Args: channel (int): The channel number of the feature map. negative_slope (float, optional): Same as nn.LeakyRelu. Defaults to 0.2. scale (float, optional): A scalar to adjust the variance of the feature map. Defaults to 2**0.5. """ def __init__(self, num_channels, negative_slope=0.2, scale=2**0.5): super(FusedBiasLeakyReLU, self).__init__() self.bias = nn.Parameter(torch.zeros(num_channels)) self.negative_slope = negative_slope self.scale = scale def forward(self, input): return fused_bias_leakyrelu(input, self.bias, self.negative_slope, self.scale) def fused_bias_leakyrelu(input, bias, negative_slope=0.2, scale=2**0.5): r"""Fused bias leaky ReLU function. This function is introduced in the StyleGAN2: `Analyzing and Improving the Image Quality of StyleGAN `_ The bias term comes from the convolution operation. In addition, to keep the variance of the feature map or gradients unchanged, they also adopt a scale similarly with Kaiming initialization. However, since the :math:`1+{alpha}^2` is too small, we can just ignore it. Therefore, the final scale is just :math:`\sqrt{2}`. Of course, you may change it with your own scale. Args: input (torch.Tensor): Input feature map. bias (nn.Parameter): The bias from convolution operation. negative_slope (float, optional): Same as nn.LeakyRelu. Defaults to 0.2. scale (float, optional): A scalar to adjust the variance of the feature map. Defaults to 2**0.5. Returns: torch.Tensor: Feature map after non-linear activation. """ if not input.is_cuda: return bias_leakyrelu_ref(input, bias, negative_slope, scale) return FusedBiasLeakyReLUFunction.apply(input, bias.to(input.dtype), negative_slope, scale) def bias_leakyrelu_ref(x, bias, negative_slope=0.2, scale=2**0.5): if bias is not None: assert bias.ndim == 1 assert bias.shape[0] == x.shape[1] x = x + bias.reshape([-1 if i == 1 else 1 for i in range(x.ndim)]) x = F.leaky_relu(x, negative_slope) if scale != 1: x = x * scale return x ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/gather_points.py ================================================ import torch from torch.autograd import Function from ..utils import ext_loader ext_module = ext_loader.load_ext( '_ext', ['gather_points_forward', 'gather_points_backward']) class GatherPoints(Function): """Gather points with given index.""" @staticmethod def forward(ctx, features: torch.Tensor, indices: torch.Tensor) -> torch.Tensor: """ Args: features (torch.Tensor): (B, C, N) features to gather. indices (torch.Tensor): (B, M) where M is the number of points. Returns: torch.Tensor: (B, C, M) where M is the number of points. """ assert features.is_contiguous() assert indices.is_contiguous() B, npoint = indices.size() _, C, N = features.size() output = torch.cuda.FloatTensor(B, C, npoint) ext_module.gather_points_forward( features, indices, output, b=B, c=C, n=N, npoints=npoint) ctx.for_backwards = (indices, C, N) if torch.__version__ != 'parrots': ctx.mark_non_differentiable(indices) return output @staticmethod def backward(ctx, grad_out): idx, C, N = ctx.for_backwards B, npoint = idx.size() grad_features = torch.cuda.FloatTensor(B, C, N).zero_() grad_out_data = grad_out.data.contiguous() ext_module.gather_points_backward( grad_out_data, idx, grad_features.data, b=B, c=C, n=N, npoints=npoint) return grad_features, None gather_points = GatherPoints.apply ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/group_points.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from typing import Tuple import torch from torch import nn as nn from torch.autograd import Function from ..utils import ext_loader from .ball_query import ball_query from .knn import knn ext_module = ext_loader.load_ext( '_ext', ['group_points_forward', 'group_points_backward']) class QueryAndGroup(nn.Module): """Groups points with a ball query of radius. Args: max_radius (float): The maximum radius of the balls. If None is given, we will use kNN sampling instead of ball query. sample_num (int): Maximum number of features to gather in the ball. min_radius (float, optional): The minimum radius of the balls. Default: 0. use_xyz (bool, optional): Whether to use xyz. Default: True. return_grouped_xyz (bool, optional): Whether to return grouped xyz. Default: False. normalize_xyz (bool, optional): Whether to normalize xyz. Default: False. uniform_sample (bool, optional): Whether to sample uniformly. Default: False return_unique_cnt (bool, optional): Whether to return the count of unique samples. Default: False. return_grouped_idx (bool, optional): Whether to return grouped idx. Default: False. """ def __init__(self, max_radius, sample_num, min_radius=0, use_xyz=True, return_grouped_xyz=False, normalize_xyz=False, uniform_sample=False, return_unique_cnt=False, return_grouped_idx=False): super().__init__() self.max_radius = max_radius self.min_radius = min_radius self.sample_num = sample_num self.use_xyz = use_xyz self.return_grouped_xyz = return_grouped_xyz self.normalize_xyz = normalize_xyz self.uniform_sample = uniform_sample self.return_unique_cnt = return_unique_cnt self.return_grouped_idx = return_grouped_idx if self.return_unique_cnt: assert self.uniform_sample, \ 'uniform_sample should be True when ' \ 'returning the count of unique samples' if self.max_radius is None: assert not self.normalize_xyz, \ 'can not normalize grouped xyz when max_radius is None' def forward(self, points_xyz, center_xyz, features=None): """ Args: points_xyz (torch.Tensor): (B, N, 3) xyz coordinates of the points. center_xyz (torch.Tensor): (B, npoint, 3) coordinates of the centriods. features (torch.Tensor): (B, C, N) The features of grouped points. Returns: torch.Tensor: (B, 3 + C, npoint, sample_num) Grouped concatenated coordinates and features of points. """ # if self.max_radius is None, we will perform kNN instead of ball query # idx is of shape [B, npoint, sample_num] if self.max_radius is None: idx = knn(self.sample_num, points_xyz, center_xyz, False) idx = idx.transpose(1, 2).contiguous() else: idx = ball_query(self.min_radius, self.max_radius, self.sample_num, points_xyz, center_xyz) if self.uniform_sample: unique_cnt = torch.zeros((idx.shape[0], idx.shape[1])) for i_batch in range(idx.shape[0]): for i_region in range(idx.shape[1]): unique_ind = torch.unique(idx[i_batch, i_region, :]) num_unique = unique_ind.shape[0] unique_cnt[i_batch, i_region] = num_unique sample_ind = torch.randint( 0, num_unique, (self.sample_num - num_unique, ), dtype=torch.long) all_ind = torch.cat((unique_ind, unique_ind[sample_ind])) idx[i_batch, i_region, :] = all_ind xyz_trans = points_xyz.transpose(1, 2).contiguous() # (B, 3, npoint, sample_num) grouped_xyz = grouping_operation(xyz_trans, idx) grouped_xyz_diff = grouped_xyz - \ center_xyz.transpose(1, 2).unsqueeze(-1) # relative offsets if self.normalize_xyz: grouped_xyz_diff /= self.max_radius if features is not None: grouped_features = grouping_operation(features, idx) if self.use_xyz: # (B, C + 3, npoint, sample_num) new_features = torch.cat([grouped_xyz_diff, grouped_features], dim=1) else: new_features = grouped_features else: assert (self.use_xyz ), 'Cannot have not features and not use xyz as a feature!' new_features = grouped_xyz_diff ret = [new_features] if self.return_grouped_xyz: ret.append(grouped_xyz) if self.return_unique_cnt: ret.append(unique_cnt) if self.return_grouped_idx: ret.append(idx) if len(ret) == 1: return ret[0] else: return tuple(ret) class GroupAll(nn.Module): """Group xyz with feature. Args: use_xyz (bool): Whether to use xyz. """ def __init__(self, use_xyz: bool = True): super().__init__() self.use_xyz = use_xyz def forward(self, xyz: torch.Tensor, new_xyz: torch.Tensor, features: torch.Tensor = None): """ Args: xyz (Tensor): (B, N, 3) xyz coordinates of the features. new_xyz (Tensor): new xyz coordinates of the features. features (Tensor): (B, C, N) features to group. Returns: Tensor: (B, C + 3, 1, N) Grouped feature. """ grouped_xyz = xyz.transpose(1, 2).unsqueeze(2) if features is not None: grouped_features = features.unsqueeze(2) if self.use_xyz: # (B, 3 + C, 1, N) new_features = torch.cat([grouped_xyz, grouped_features], dim=1) else: new_features = grouped_features else: new_features = grouped_xyz return new_features class GroupingOperation(Function): """Group feature with given index.""" @staticmethod def forward(ctx, features: torch.Tensor, indices: torch.Tensor) -> torch.Tensor: """ Args: features (Tensor): (B, C, N) tensor of features to group. indices (Tensor): (B, npoint, nsample) the indices of features to group with. Returns: Tensor: (B, C, npoint, nsample) Grouped features. """ features = features.contiguous() indices = indices.contiguous() B, nfeatures, nsample = indices.size() _, C, N = features.size() output = torch.cuda.FloatTensor(B, C, nfeatures, nsample) ext_module.group_points_forward( features, indices, output, b=B, c=C, n=N, npoints=nfeatures, nsample=nsample) ctx.for_backwards = (indices, N) return output @staticmethod def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: """ Args: grad_out (Tensor): (B, C, npoint, nsample) tensor of the gradients of the output from forward. Returns: Tensor: (B, C, N) gradient of the features. """ idx, N = ctx.for_backwards B, C, npoint, nsample = grad_out.size() grad_features = torch.cuda.FloatTensor(B, C, N).zero_() grad_out_data = grad_out.data.contiguous() ext_module.group_points_backward( grad_out_data, idx, grad_features.data, b=B, c=C, n=N, npoints=npoint, nsample=nsample) return grad_features, None grouping_operation = GroupingOperation.apply ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/info.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import glob import os import torch if torch.__version__ == 'parrots': import parrots def get_compiler_version(): return 'GCC ' + parrots.version.compiler def get_compiling_cuda_version(): return parrots.version.cuda else: from ..utils import ext_loader ext_module = ext_loader.load_ext( '_ext', ['get_compiler_version', 'get_compiling_cuda_version']) def get_compiler_version(): return ext_module.get_compiler_version() def get_compiling_cuda_version(): return ext_module.get_compiling_cuda_version() def get_onnxruntime_op_path(): wildcard = os.path.join( os.path.abspath(os.path.dirname(os.path.dirname(__file__))), '_ext_ort.*.so') paths = glob.glob(wildcard) if len(paths) > 0: return paths[0] else: return '' ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/iou3d.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', [ 'iou3d_boxes_iou_bev_forward', 'iou3d_nms_forward', 'iou3d_nms_normal_forward' ]) def boxes_iou_bev(boxes_a, boxes_b): """Calculate boxes IoU in the Bird's Eye View. Args: boxes_a (torch.Tensor): Input boxes a with shape (M, 5). boxes_b (torch.Tensor): Input boxes b with shape (N, 5). Returns: torch.Tensor: IoU result with shape (M, N). """ ans_iou = boxes_a.new_zeros( torch.Size((boxes_a.shape[0], boxes_b.shape[0]))) ext_module.iou3d_boxes_iou_bev_forward(boxes_a.contiguous(), boxes_b.contiguous(), ans_iou) return ans_iou def nms_bev(boxes, scores, thresh, pre_max_size=None, post_max_size=None): """NMS function GPU implementation (for BEV boxes). The overlap of two boxes for IoU calculation is defined as the exact overlapping area of the two boxes. In this function, one can also set ``pre_max_size`` and ``post_max_size``. Args: boxes (torch.Tensor): Input boxes with the shape of [N, 5] ([x1, y1, x2, y2, ry]). scores (torch.Tensor): Scores of boxes with the shape of [N]. thresh (float): Overlap threshold of NMS. pre_max_size (int, optional): Max size of boxes before NMS. Default: None. post_max_size (int, optional): Max size of boxes after NMS. Default: None. Returns: torch.Tensor: Indexes after NMS. """ assert boxes.size(1) == 5, 'Input boxes shape should be [N, 5]' order = scores.sort(0, descending=True)[1] if pre_max_size is not None: order = order[:pre_max_size] boxes = boxes[order].contiguous() keep = torch.zeros(boxes.size(0), dtype=torch.long) num_out = torch.zeros(size=(), dtype=torch.long) ext_module.iou3d_nms_forward( boxes, keep, num_out, nms_overlap_thresh=thresh) keep = order[keep[:num_out].cuda(boxes.device)].contiguous() if post_max_size is not None: keep = keep[:post_max_size] return keep def nms_normal_bev(boxes, scores, thresh): """Normal NMS function GPU implementation (for BEV boxes). The overlap of two boxes for IoU calculation is defined as the exact overlapping area of the two boxes WITH their yaw angle set to 0. Args: boxes (torch.Tensor): Input boxes with shape (N, 5). scores (torch.Tensor): Scores of predicted boxes with shape (N). thresh (float): Overlap threshold of NMS. Returns: torch.Tensor: Remaining indices with scores in descending order. """ assert boxes.shape[1] == 5, 'Input boxes shape should be [N, 5]' order = scores.sort(0, descending=True)[1] boxes = boxes[order].contiguous() keep = torch.zeros(boxes.size(0), dtype=torch.long) num_out = torch.zeros(size=(), dtype=torch.long) ext_module.iou3d_nms_normal_forward( boxes, keep, num_out, nms_overlap_thresh=thresh) return order[keep[:num_out].cuda(boxes.device)].contiguous() ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/knn.py ================================================ import torch from torch.autograd import Function from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', ['knn_forward']) class KNN(Function): r"""KNN (CUDA) based on heap data structure. Modified from `PAConv `_. Find k-nearest points. """ @staticmethod def forward(ctx, k: int, xyz: torch.Tensor, center_xyz: torch.Tensor = None, transposed: bool = False) -> torch.Tensor: """ Args: k (int): number of nearest neighbors. xyz (torch.Tensor): (B, N, 3) if transposed == False, else (B, 3, N). xyz coordinates of the features. center_xyz (torch.Tensor, optional): (B, npoint, 3) if transposed is False, else (B, 3, npoint). centers of the knn query. Default: None. transposed (bool, optional): whether the input tensors are transposed. Should not explicitly use this keyword when calling knn (=KNN.apply), just add the fourth param. Default: False. Returns: torch.Tensor: (B, k, npoint) tensor with the indices of the features that form k-nearest neighbours. """ assert (k > 0) & (k < 100), 'k should be in range(0, 100)' if center_xyz is None: center_xyz = xyz if transposed: xyz = xyz.transpose(2, 1).contiguous() center_xyz = center_xyz.transpose(2, 1).contiguous() assert xyz.is_contiguous() # [B, N, 3] assert center_xyz.is_contiguous() # [B, npoint, 3] center_xyz_device = center_xyz.get_device() assert center_xyz_device == xyz.get_device(), \ 'center_xyz and xyz should be put on the same device' if torch.cuda.current_device() != center_xyz_device: torch.cuda.set_device(center_xyz_device) B, npoint, _ = center_xyz.shape N = xyz.shape[1] idx = center_xyz.new_zeros((B, npoint, k)).int() dist2 = center_xyz.new_zeros((B, npoint, k)).float() ext_module.knn_forward( xyz, center_xyz, idx, dist2, b=B, n=N, m=npoint, nsample=k) # idx shape to [B, k, npoint] idx = idx.transpose(2, 1).contiguous() if torch.__version__ != 'parrots': ctx.mark_non_differentiable(idx) return idx @staticmethod def backward(ctx, a=None): return None, None, None knn = KNN.apply ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/masked_conv.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import math import torch import torch.nn as nn from torch.autograd import Function from torch.autograd.function import once_differentiable from torch.nn.modules.utils import _pair from ..utils import ext_loader ext_module = ext_loader.load_ext( '_ext', ['masked_im2col_forward', 'masked_col2im_forward']) class MaskedConv2dFunction(Function): @staticmethod def symbolic(g, features, mask, weight, bias, padding, stride): return g.op( 'mmcv::MMCVMaskedConv2d', features, mask, weight, bias, padding_i=padding, stride_i=stride) @staticmethod def forward(ctx, features, mask, weight, bias, padding=0, stride=1): assert mask.dim() == 3 and mask.size(0) == 1 assert features.dim() == 4 and features.size(0) == 1 assert features.size()[2:] == mask.size()[1:] pad_h, pad_w = _pair(padding) stride_h, stride_w = _pair(stride) if stride_h != 1 or stride_w != 1: raise ValueError( 'Stride could not only be 1 in masked_conv2d currently.') out_channel, in_channel, kernel_h, kernel_w = weight.size() batch_size = features.size(0) out_h = int( math.floor((features.size(2) + 2 * pad_h - (kernel_h - 1) - 1) / stride_h + 1)) out_w = int( math.floor((features.size(3) + 2 * pad_w - (kernel_h - 1) - 1) / stride_w + 1)) mask_inds = torch.nonzero(mask[0] > 0, as_tuple=False) output = features.new_zeros(batch_size, out_channel, out_h, out_w) if mask_inds.numel() > 0: mask_h_idx = mask_inds[:, 0].contiguous() mask_w_idx = mask_inds[:, 1].contiguous() data_col = features.new_zeros(in_channel * kernel_h * kernel_w, mask_inds.size(0)) ext_module.masked_im2col_forward( features, mask_h_idx, mask_w_idx, data_col, kernel_h=kernel_h, kernel_w=kernel_w, pad_h=pad_h, pad_w=pad_w) masked_output = torch.addmm(1, bias[:, None], 1, weight.view(out_channel, -1), data_col) ext_module.masked_col2im_forward( masked_output, mask_h_idx, mask_w_idx, output, height=out_h, width=out_w, channels=out_channel) return output @staticmethod @once_differentiable def backward(ctx, grad_output): return (None, ) * 5 masked_conv2d = MaskedConv2dFunction.apply class MaskedConv2d(nn.Conv2d): """A MaskedConv2d which inherits the official Conv2d. The masked forward doesn't implement the backward function and only supports the stride parameter to be 1 currently. """ def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True): super(MaskedConv2d, self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias) def forward(self, input, mask=None): if mask is None: # fallback to the normal Conv2d return super(MaskedConv2d, self).forward(input) else: return masked_conv2d(input, mask, self.weight, self.bias, self.padding) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/merge_cells.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from abc import abstractmethod import torch import torch.nn as nn import torch.nn.functional as F from ..cnn import ConvModule class BaseMergeCell(nn.Module): """The basic class for cells used in NAS-FPN and NAS-FCOS. BaseMergeCell takes 2 inputs. After applying convolution on them, they are resized to the target size. Then, they go through binary_op, which depends on the type of cell. If with_out_conv is True, the result of output will go through another convolution layer. Args: in_channels (int): number of input channels in out_conv layer. out_channels (int): number of output channels in out_conv layer. with_out_conv (bool): Whether to use out_conv layer out_conv_cfg (dict): Config dict for convolution layer, which should contain "groups", "kernel_size", "padding", "bias" to build out_conv layer. out_norm_cfg (dict): Config dict for normalization layer in out_conv. out_conv_order (tuple): The order of conv/norm/activation layers in out_conv. with_input1_conv (bool): Whether to use convolution on input1. with_input2_conv (bool): Whether to use convolution on input2. input_conv_cfg (dict): Config dict for building input1_conv layer and input2_conv layer, which is expected to contain the type of convolution. Default: None, which means using conv2d. input_norm_cfg (dict): Config dict for normalization layer in input1_conv and input2_conv layer. Default: None. upsample_mode (str): Interpolation method used to resize the output of input1_conv and input2_conv to target size. Currently, we support ['nearest', 'bilinear']. Default: 'nearest'. """ def __init__(self, fused_channels=256, out_channels=256, with_out_conv=True, out_conv_cfg=dict( groups=1, kernel_size=3, padding=1, bias=True), out_norm_cfg=None, out_conv_order=('act', 'conv', 'norm'), with_input1_conv=False, with_input2_conv=False, input_conv_cfg=None, input_norm_cfg=None, upsample_mode='nearest'): super(BaseMergeCell, self).__init__() assert upsample_mode in ['nearest', 'bilinear'] self.with_out_conv = with_out_conv self.with_input1_conv = with_input1_conv self.with_input2_conv = with_input2_conv self.upsample_mode = upsample_mode if self.with_out_conv: self.out_conv = ConvModule( fused_channels, out_channels, **out_conv_cfg, norm_cfg=out_norm_cfg, order=out_conv_order) self.input1_conv = self._build_input_conv( out_channels, input_conv_cfg, input_norm_cfg) if with_input1_conv else nn.Sequential() self.input2_conv = self._build_input_conv( out_channels, input_conv_cfg, input_norm_cfg) if with_input2_conv else nn.Sequential() def _build_input_conv(self, channel, conv_cfg, norm_cfg): return ConvModule( channel, channel, 3, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=True) @abstractmethod def _binary_op(self, x1, x2): pass def _resize(self, x, size): if x.shape[-2:] == size: return x elif x.shape[-2:] < size: return F.interpolate(x, size=size, mode=self.upsample_mode) else: assert x.shape[-2] % size[-2] == 0 and x.shape[-1] % size[-1] == 0 kernel_size = x.shape[-1] // size[-1] x = F.max_pool2d(x, kernel_size=kernel_size, stride=kernel_size) return x def forward(self, x1, x2, out_size=None): assert x1.shape[:2] == x2.shape[:2] assert out_size is None or len(out_size) == 2 if out_size is None: # resize to larger one out_size = max(x1.size()[2:], x2.size()[2:]) x1 = self.input1_conv(x1) x2 = self.input2_conv(x2) x1 = self._resize(x1, out_size) x2 = self._resize(x2, out_size) x = self._binary_op(x1, x2) if self.with_out_conv: x = self.out_conv(x) return x class SumCell(BaseMergeCell): def __init__(self, in_channels, out_channels, **kwargs): super(SumCell, self).__init__(in_channels, out_channels, **kwargs) def _binary_op(self, x1, x2): return x1 + x2 class ConcatCell(BaseMergeCell): def __init__(self, in_channels, out_channels, **kwargs): super(ConcatCell, self).__init__(in_channels * 2, out_channels, **kwargs) def _binary_op(self, x1, x2): ret = torch.cat([x1, x2], dim=1) return ret class GlobalPoolingCell(BaseMergeCell): def __init__(self, in_channels=None, out_channels=None, **kwargs): super().__init__(in_channels, out_channels, **kwargs) self.global_pool = nn.AdaptiveAvgPool2d((1, 1)) def _binary_op(self, x1, x2): x2_att = self.global_pool(x2).sigmoid() return x2 + x2_att * x1 ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/min_area_polygons.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', ['min_area_polygons']) def min_area_polygons(pointsets): """Find the smallest polygons that surrounds all points in the point sets. Args: pointsets (Tensor): point sets with shape (N, 18). Returns: torch.Tensor: Return the smallest polygons with shape (N, 8). """ polygons = pointsets.new_zeros((pointsets.size(0), 8)) ext_module.min_area_polygons(pointsets, polygons) return polygons ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/modulated_deform_conv.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import math import torch import torch.nn as nn from torch.autograd import Function from torch.autograd.function import once_differentiable from torch.nn.modules.utils import _pair, _single from mmcv.utils import deprecated_api_warning from ..cnn import CONV_LAYERS from ..utils import ext_loader, print_log ext_module = ext_loader.load_ext( '_ext', ['modulated_deform_conv_forward', 'modulated_deform_conv_backward']) class ModulatedDeformConv2dFunction(Function): @staticmethod def symbolic(g, input, offset, mask, weight, bias, stride, padding, dilation, groups, deform_groups): input_tensors = [input, offset, mask, weight] if bias is not None: input_tensors.append(bias) return g.op( 'mmcv::MMCVModulatedDeformConv2d', *input_tensors, stride_i=stride, padding_i=padding, dilation_i=dilation, groups_i=groups, deform_groups_i=deform_groups) @staticmethod def forward(ctx, input, offset, mask, weight, bias=None, stride=1, padding=0, dilation=1, groups=1, deform_groups=1): if input is not None and input.dim() != 4: raise ValueError( f'Expected 4D tensor as input, got {input.dim()}D tensor \ instead.') ctx.stride = _pair(stride) ctx.padding = _pair(padding) ctx.dilation = _pair(dilation) ctx.groups = groups ctx.deform_groups = deform_groups ctx.with_bias = bias is not None if not ctx.with_bias: bias = input.new_empty(0) # fake tensor # When pytorch version >= 1.6.0, amp is adopted for fp16 mode; # amp won't cast the type of model (float32), but "offset" is cast # to float16 by nn.Conv2d automatically, leading to the type # mismatch with input (when it is float32) or weight. # The flag for whether to use fp16 or amp is the type of "offset", # we cast weight and input to temporarily support fp16 and amp # whatever the pytorch version is. input = input.type_as(offset) weight = weight.type_as(input) bias = bias.type_as(input) ctx.save_for_backward(input, offset, mask, weight, bias) output = input.new_empty( ModulatedDeformConv2dFunction._output_size(ctx, input, weight)) ctx._bufs = [input.new_empty(0), input.new_empty(0)] ext_module.modulated_deform_conv_forward( input, weight, bias, ctx._bufs[0], offset, mask, output, ctx._bufs[1], kernel_h=weight.size(2), kernel_w=weight.size(3), stride_h=ctx.stride[0], stride_w=ctx.stride[1], pad_h=ctx.padding[0], pad_w=ctx.padding[1], dilation_h=ctx.dilation[0], dilation_w=ctx.dilation[1], group=ctx.groups, deformable_group=ctx.deform_groups, with_bias=ctx.with_bias) return output @staticmethod @once_differentiable def backward(ctx, grad_output): input, offset, mask, weight, bias = ctx.saved_tensors grad_input = torch.zeros_like(input) grad_offset = torch.zeros_like(offset) grad_mask = torch.zeros_like(mask) grad_weight = torch.zeros_like(weight) grad_bias = torch.zeros_like(bias) grad_output = grad_output.contiguous() ext_module.modulated_deform_conv_backward( input, weight, bias, ctx._bufs[0], offset, mask, ctx._bufs[1], grad_input, grad_weight, grad_bias, grad_offset, grad_mask, grad_output, kernel_h=weight.size(2), kernel_w=weight.size(3), stride_h=ctx.stride[0], stride_w=ctx.stride[1], pad_h=ctx.padding[0], pad_w=ctx.padding[1], dilation_h=ctx.dilation[0], dilation_w=ctx.dilation[1], group=ctx.groups, deformable_group=ctx.deform_groups, with_bias=ctx.with_bias) if not ctx.with_bias: grad_bias = None return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias, None, None, None, None, None) @staticmethod def _output_size(ctx, input, weight): channels = weight.size(0) output_size = (input.size(0), channels) for d in range(input.dim() - 2): in_size = input.size(d + 2) pad = ctx.padding[d] kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1 stride_ = ctx.stride[d] output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, ) if not all(map(lambda s: s > 0, output_size)): raise ValueError( 'convolution input is too small (output would be ' + 'x'.join(map(str, output_size)) + ')') return output_size modulated_deform_conv2d = ModulatedDeformConv2dFunction.apply class ModulatedDeformConv2d(nn.Module): @deprecated_api_warning({'deformable_groups': 'deform_groups'}, cls_name='ModulatedDeformConv2d') def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, deform_groups=1, bias=True): super(ModulatedDeformConv2d, self).__init__() self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = _pair(kernel_size) self.stride = _pair(stride) self.padding = _pair(padding) self.dilation = _pair(dilation) self.groups = groups self.deform_groups = deform_groups # enable compatibility with nn.Conv2d self.transposed = False self.output_padding = _single(0) self.weight = nn.Parameter( torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) if bias: self.bias = nn.Parameter(torch.Tensor(out_channels)) else: self.register_parameter('bias', None) self.init_weights() def init_weights(self): n = self.in_channels for k in self.kernel_size: n *= k stdv = 1. / math.sqrt(n) self.weight.data.uniform_(-stdv, stdv) if self.bias is not None: self.bias.data.zero_() def forward(self, x, offset, mask): return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups, self.deform_groups) @CONV_LAYERS.register_module('DCNv2') class ModulatedDeformConv2dPack(ModulatedDeformConv2d): """A ModulatedDeformable Conv Encapsulation that acts as normal Conv layers. Args: in_channels (int): Same as nn.Conv2d. out_channels (int): Same as nn.Conv2d. kernel_size (int or tuple[int]): Same as nn.Conv2d. stride (int): Same as nn.Conv2d, while tuple is not supported. padding (int): Same as nn.Conv2d, while tuple is not supported. dilation (int): Same as nn.Conv2d, while tuple is not supported. groups (int): Same as nn.Conv2d. bias (bool or str): If specified as `auto`, it will be decided by the norm_cfg. Bias will be set as True if norm_cfg is None, otherwise False. """ _version = 2 def __init__(self, *args, **kwargs): super(ModulatedDeformConv2dPack, self).__init__(*args, **kwargs) self.conv_offset = nn.Conv2d( self.in_channels, self.deform_groups * 3 * self.kernel_size[0] * self.kernel_size[1], kernel_size=self.kernel_size, stride=self.stride, padding=self.padding, dilation=self.dilation, bias=True) self.init_weights() def init_weights(self): super(ModulatedDeformConv2dPack, self).init_weights() if hasattr(self, 'conv_offset'): self.conv_offset.weight.data.zero_() self.conv_offset.bias.data.zero_() def forward(self, x): out = self.conv_offset(x) o1, o2, mask = torch.chunk(out, 3, dim=1) offset = torch.cat((o1, o2), dim=1) mask = torch.sigmoid(mask) return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups, self.deform_groups) def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs): version = local_metadata.get('version', None) if version is None or version < 2: # the key is different in early versions # In version < 2, ModulatedDeformConvPack # loads previous benchmark models. if (prefix + 'conv_offset.weight' not in state_dict and prefix[:-1] + '_offset.weight' in state_dict): state_dict[prefix + 'conv_offset.weight'] = state_dict.pop( prefix[:-1] + '_offset.weight') if (prefix + 'conv_offset.bias' not in state_dict and prefix[:-1] + '_offset.bias' in state_dict): state_dict[prefix + 'conv_offset.bias'] = state_dict.pop(prefix[:-1] + '_offset.bias') if version is not None and version > 1: print_log( f'ModulatedDeformConvPack {prefix.rstrip(".")} is upgraded to ' 'version 2.', logger='root') super()._load_from_state_dict(state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/multi_scale_deform_attn.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import math import warnings import torch import torch.nn as nn import torch.nn.functional as F from torch.autograd.function import Function, once_differentiable from mmcv import deprecated_api_warning from mmcv.cnn import constant_init, xavier_init from mmcv.cnn.bricks.registry import ATTENTION from mmcv.runner import BaseModule from ..utils import ext_loader ext_module = ext_loader.load_ext( '_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward']) class MultiScaleDeformableAttnFunction(Function): @staticmethod def forward(ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step): """GPU version of multi-scale deformable attention. Args: value (torch.Tensor): The value has shape (bs, num_keys, mum_heads, embed_dims//num_heads) value_spatial_shapes (torch.Tensor): Spatial shape of each feature map, has shape (num_levels, 2), last dimension 2 represent (h, w) sampling_locations (torch.Tensor): The location of sampling points, has shape (bs ,num_queries, num_heads, num_levels, num_points, 2), the last dimension 2 represent (x, y). attention_weights (torch.Tensor): The weight of sampling points used when calculate the attention, has shape (bs ,num_queries, num_heads, num_levels, num_points), im2col_step (Tensor): The step used in image to column. Returns: torch.Tensor: has shape (bs, num_queries, embed_dims) """ ctx.im2col_step = im2col_step output = ext_module.ms_deform_attn_forward( value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step=ctx.im2col_step) ctx.save_for_backward(value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights) return output @staticmethod @once_differentiable def backward(ctx, grad_output): """GPU version of backward function. Args: grad_output (torch.Tensor): Gradient of output tensor of forward. Returns: tuple[Tensor]: Gradient of input tensors in forward. """ value, value_spatial_shapes, value_level_start_index,\ sampling_locations, attention_weights = ctx.saved_tensors grad_value = torch.zeros_like(value) grad_sampling_loc = torch.zeros_like(sampling_locations) grad_attn_weight = torch.zeros_like(attention_weights) ext_module.ms_deform_attn_backward( value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, grad_output.contiguous(), grad_value, grad_sampling_loc, grad_attn_weight, im2col_step=ctx.im2col_step) return grad_value, None, None, \ grad_sampling_loc, grad_attn_weight, None def multi_scale_deformable_attn_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights): """CPU version of multi-scale deformable attention. Args: value (torch.Tensor): The value has shape (bs, num_keys, mum_heads, embed_dims//num_heads) value_spatial_shapes (torch.Tensor): Spatial shape of each feature map, has shape (num_levels, 2), last dimension 2 represent (h, w) sampling_locations (torch.Tensor): The location of sampling points, has shape (bs ,num_queries, num_heads, num_levels, num_points, 2), the last dimension 2 represent (x, y). attention_weights (torch.Tensor): The weight of sampling points used when calculate the attention, has shape (bs ,num_queries, num_heads, num_levels, num_points), Returns: torch.Tensor: has shape (bs, num_queries, embed_dims) """ bs, _, num_heads, embed_dims = value.shape _, num_queries, num_heads, num_levels, num_points, _ =\ sampling_locations.shape value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1) sampling_grids = 2 * sampling_locations - 1 sampling_value_list = [] for level, (H_, W_) in enumerate(value_spatial_shapes): # bs, H_*W_, num_heads, embed_dims -> # bs, H_*W_, num_heads*embed_dims -> # bs, num_heads*embed_dims, H_*W_ -> # bs*num_heads, embed_dims, H_, W_ value_l_ = value_list[level].flatten(2).transpose(1, 2).reshape( bs * num_heads, embed_dims, H_, W_) # bs, num_queries, num_heads, num_points, 2 -> # bs, num_heads, num_queries, num_points, 2 -> # bs*num_heads, num_queries, num_points, 2 sampling_grid_l_ = sampling_grids[:, :, :, level].transpose(1, 2).flatten(0, 1) # bs*num_heads, embed_dims, num_queries, num_points sampling_value_l_ = F.grid_sample( value_l_, sampling_grid_l_, mode='bilinear', padding_mode='zeros', align_corners=False) sampling_value_list.append(sampling_value_l_) # (bs, num_queries, num_heads, num_levels, num_points) -> # (bs, num_heads, num_queries, num_levels, num_points) -> # (bs, num_heads, 1, num_queries, num_levels*num_points) attention_weights = attention_weights.transpose(1, 2).reshape( bs * num_heads, 1, num_queries, num_levels * num_points) output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(bs, num_heads * embed_dims, num_queries) return output.transpose(1, 2).contiguous() @ATTENTION.register_module() class MultiScaleDeformableAttention(BaseModule): """An attention module used in Deformable-Detr. `Deformable DETR: Deformable Transformers for End-to-End Object Detection. `_. Args: embed_dims (int): The embedding dimension of Attention. Default: 256. num_heads (int): Parallel attention heads. Default: 64. num_levels (int): The number of feature map used in Attention. Default: 4. num_points (int): The number of sampling points for each query in each head. Default: 4. im2col_step (int): The step used in image_to_column. Default: 64. dropout (float): A Dropout layer on `inp_identity`. Default: 0.1. batch_first (bool): Key, Query and Value are shape of (batch, n, embed_dim) or (n, batch, embed_dim). Default to False. norm_cfg (dict): Config dict for normalization layer. Default: None. init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. Default: None. """ def __init__(self, embed_dims=256, num_heads=8, num_levels=4, num_points=4, im2col_step=64, dropout=0.1, batch_first=False, norm_cfg=None, init_cfg=None): super().__init__(init_cfg) if embed_dims % num_heads != 0: raise ValueError(f'embed_dims must be divisible by num_heads, ' f'but got {embed_dims} and {num_heads}') dim_per_head = embed_dims // num_heads self.norm_cfg = norm_cfg self.dropout = nn.Dropout(dropout) self.batch_first = batch_first # you'd better set dim_per_head to a power of 2 # which is more efficient in the CUDA implementation def _is_power_of_2(n): if (not isinstance(n, int)) or (n < 0): raise ValueError( 'invalid input for _is_power_of_2: {} (type: {})'.format( n, type(n))) return (n & (n - 1) == 0) and n != 0 if not _is_power_of_2(dim_per_head): warnings.warn( "You'd better set embed_dims in " 'MultiScaleDeformAttention to make ' 'the dimension of each attention head a power of 2 ' 'which is more efficient in our CUDA implementation.') self.im2col_step = im2col_step self.embed_dims = embed_dims self.num_levels = num_levels self.num_heads = num_heads self.num_points = num_points self.sampling_offsets = nn.Linear( embed_dims, num_heads * num_levels * num_points * 2) self.attention_weights = nn.Linear(embed_dims, num_heads * num_levels * num_points) self.value_proj = nn.Linear(embed_dims, embed_dims) self.output_proj = nn.Linear(embed_dims, embed_dims) self.init_weights() def init_weights(self): """Default initialization for Parameters of Module.""" constant_init(self.sampling_offsets, 0.) thetas = torch.arange( self.num_heads, dtype=torch.float32) * (2.0 * math.pi / self.num_heads) grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view( self.num_heads, 1, 1, 2).repeat(1, self.num_levels, self.num_points, 1) for i in range(self.num_points): grid_init[:, :, i, :] *= i + 1 self.sampling_offsets.bias.data = grid_init.view(-1) constant_init(self.attention_weights, val=0., bias=0.) xavier_init(self.value_proj, distribution='uniform', bias=0.) xavier_init(self.output_proj, distribution='uniform', bias=0.) self._is_init = True @deprecated_api_warning({'residual': 'identity'}, cls_name='MultiScaleDeformableAttention') def forward(self, query, key=None, value=None, identity=None, query_pos=None, key_padding_mask=None, reference_points=None, spatial_shapes=None, level_start_index=None, **kwargs): """Forward Function of MultiScaleDeformAttention. Args: query (torch.Tensor): Query of Transformer with shape (num_query, bs, embed_dims). key (torch.Tensor): The key tensor with shape `(num_key, bs, embed_dims)`. value (torch.Tensor): The value tensor with shape `(num_key, bs, embed_dims)`. identity (torch.Tensor): The tensor used for addition, with the same shape as `query`. Default None. If None, `query` will be used. query_pos (torch.Tensor): The positional encoding for `query`. Default: None. key_pos (torch.Tensor): The positional encoding for `key`. Default None. reference_points (torch.Tensor): The normalized reference points with shape (bs, num_query, num_levels, 2), all elements is range in [0, 1], top-left (0,0), bottom-right (1, 1), including padding area. or (N, Length_{query}, num_levels, 4), add additional two dimensions is (w, h) to form reference boxes. key_padding_mask (torch.Tensor): ByteTensor for `query`, with shape [bs, num_key]. spatial_shapes (torch.Tensor): Spatial shape of features in different levels. With shape (num_levels, 2), last dimension represents (h, w). level_start_index (torch.Tensor): The start index of each level. A tensor has shape ``(num_levels, )`` and can be represented as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...]. Returns: torch.Tensor: forwarded results with shape [num_query, bs, embed_dims]. """ if value is None: value = query if identity is None: identity = query if query_pos is not None: query = query + query_pos if not self.batch_first: # change to (bs, num_query ,embed_dims) query = query.permute(1, 0, 2) value = value.permute(1, 0, 2) bs, num_query, _ = query.shape bs, num_value, _ = value.shape assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value value = self.value_proj(value) if key_padding_mask is not None: value = value.masked_fill(key_padding_mask[..., None], 0.0) value = value.view(bs, num_value, self.num_heads, -1) sampling_offsets = self.sampling_offsets(query).view( bs, num_query, self.num_heads, self.num_levels, self.num_points, 2) attention_weights = self.attention_weights(query).view( bs, num_query, self.num_heads, self.num_levels * self.num_points) attention_weights = attention_weights.softmax(-1) attention_weights = attention_weights.view(bs, num_query, self.num_heads, self.num_levels, self.num_points) if reference_points.shape[-1] == 2: offset_normalizer = torch.stack( [spatial_shapes[..., 1], spatial_shapes[..., 0]], -1) sampling_locations = reference_points[:, :, None, :, None, :] \ + sampling_offsets \ / offset_normalizer[None, None, None, :, None, :] elif reference_points.shape[-1] == 4: sampling_locations = reference_points[:, :, None, :, None, :2] \ + sampling_offsets / self.num_points \ * reference_points[:, :, None, :, None, 2:] \ * 0.5 else: raise ValueError( f'Last dim of reference_points must be' f' 2 or 4, but get {reference_points.shape[-1]} instead.') if torch.cuda.is_available() and value.is_cuda: output = MultiScaleDeformableAttnFunction.apply( value, spatial_shapes, level_start_index, sampling_locations, attention_weights, self.im2col_step) else: output = multi_scale_deformable_attn_pytorch( value, spatial_shapes, sampling_locations, attention_weights) output = self.output_proj(output) if not self.batch_first: # (num_query, bs ,embed_dims) output = output.permute(1, 0, 2) return self.dropout(output) + identity ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/nms.py ================================================ import os import numpy as np import torch from mmcv.utils import deprecated_api_warning from ..utils import ext_loader ext_module = ext_loader.load_ext( '_ext', ['nms', 'softnms', 'nms_match', 'nms_rotated']) # This function is modified from: https://github.com/pytorch/vision/ class NMSop(torch.autograd.Function): @staticmethod def forward(ctx, bboxes, scores, iou_threshold, offset, score_threshold, max_num): is_filtering_by_score = score_threshold > 0 if is_filtering_by_score: valid_mask = scores > score_threshold bboxes, scores = bboxes[valid_mask], scores[valid_mask] valid_inds = torch.nonzero( valid_mask, as_tuple=False).squeeze(dim=1) inds = ext_module.nms( bboxes, scores, iou_threshold=float(iou_threshold), offset=offset) if max_num > 0: inds = inds[:max_num] if is_filtering_by_score: inds = valid_inds[inds] return inds @staticmethod def symbolic(g, bboxes, scores, iou_threshold, offset, score_threshold, max_num): from ..onnx import is_custom_op_loaded has_custom_op = is_custom_op_loaded() # TensorRT nms plugin is aligned with original nms in ONNXRuntime is_trt_backend = os.environ.get('ONNX_BACKEND') == 'MMCVTensorRT' if has_custom_op and (not is_trt_backend): return g.op( 'mmcv::NonMaxSuppression', bboxes, scores, iou_threshold_f=float(iou_threshold), offset_i=int(offset)) else: from torch.onnx.symbolic_opset9 import select, squeeze, unsqueeze from ..onnx.onnx_utils.symbolic_helper import _size_helper boxes = unsqueeze(g, bboxes, 0) scores = unsqueeze(g, unsqueeze(g, scores, 0), 0) if max_num > 0: max_num = g.op( 'Constant', value_t=torch.tensor(max_num, dtype=torch.long)) else: dim = g.op('Constant', value_t=torch.tensor(0)) max_num = _size_helper(g, bboxes, dim) max_output_per_class = max_num iou_threshold = g.op( 'Constant', value_t=torch.tensor([iou_threshold], dtype=torch.float)) score_threshold = g.op( 'Constant', value_t=torch.tensor([score_threshold], dtype=torch.float)) nms_out = g.op('NonMaxSuppression', boxes, scores, max_output_per_class, iou_threshold, score_threshold) return squeeze( g, select( g, nms_out, 1, g.op( 'Constant', value_t=torch.tensor([2], dtype=torch.long))), 1) class SoftNMSop(torch.autograd.Function): @staticmethod def forward(ctx, boxes, scores, iou_threshold, sigma, min_score, method, offset): dets = boxes.new_empty((boxes.size(0), 5), device='cpu') inds = ext_module.softnms( boxes.cpu(), scores.cpu(), dets.cpu(), iou_threshold=float(iou_threshold), sigma=float(sigma), min_score=float(min_score), method=int(method), offset=int(offset)) return dets, inds @staticmethod def symbolic(g, boxes, scores, iou_threshold, sigma, min_score, method, offset): from packaging import version assert version.parse(torch.__version__) >= version.parse('1.7.0') nms_out = g.op( 'mmcv::SoftNonMaxSuppression', boxes, scores, iou_threshold_f=float(iou_threshold), sigma_f=float(sigma), min_score_f=float(min_score), method_i=int(method), offset_i=int(offset), outputs=2) return nms_out @deprecated_api_warning({'iou_thr': 'iou_threshold'}) def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1): """Dispatch to either CPU or GPU NMS implementations. The input can be either torch tensor or numpy array. GPU NMS will be used if the input is gpu tensor, otherwise CPU NMS will be used. The returned type will always be the same as inputs. Arguments: boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4). scores (torch.Tensor or np.ndarray): scores in shape (N, ). iou_threshold (float): IoU threshold for NMS. offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset). score_threshold (float): score threshold for NMS. max_num (int): maximum number of boxes after NMS. Returns: tuple: kept dets (boxes and scores) and indice, which always have the same data type as the input. Example: >>> boxes = np.array([[49.1, 32.4, 51.0, 35.9], >>> [49.3, 32.9, 51.0, 35.3], >>> [49.2, 31.8, 51.0, 35.4], >>> [35.1, 11.5, 39.1, 15.7], >>> [35.6, 11.8, 39.3, 14.2], >>> [35.3, 11.5, 39.9, 14.5], >>> [35.2, 11.7, 39.7, 15.7]], dtype=np.float32) >>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.5, 0.4, 0.3],\ dtype=np.float32) >>> iou_threshold = 0.6 >>> dets, inds = nms(boxes, scores, iou_threshold) >>> assert len(inds) == len(dets) == 3 """ assert isinstance(boxes, (torch.Tensor, np.ndarray)) assert isinstance(scores, (torch.Tensor, np.ndarray)) is_numpy = False if isinstance(boxes, np.ndarray): is_numpy = True boxes = torch.from_numpy(boxes) if isinstance(scores, np.ndarray): scores = torch.from_numpy(scores) assert boxes.size(1) == 4 assert boxes.size(0) == scores.size(0) assert offset in (0, 1) if torch.__version__ == 'parrots': indata_list = [boxes, scores] indata_dict = { 'iou_threshold': float(iou_threshold), 'offset': int(offset) } inds = ext_module.nms(*indata_list, **indata_dict) else: inds = NMSop.apply(boxes, scores, iou_threshold, offset, score_threshold, max_num) dets = torch.cat((boxes[inds], scores[inds].reshape(-1, 1)), dim=1) if is_numpy: dets = dets.cpu().numpy() inds = inds.cpu().numpy() return dets, inds @deprecated_api_warning({'iou_thr': 'iou_threshold'}) def soft_nms(boxes, scores, iou_threshold=0.3, sigma=0.5, min_score=1e-3, method='linear', offset=0): """Dispatch to only CPU Soft NMS implementations. The input can be either a torch tensor or numpy array. The returned type will always be the same as inputs. Args: boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4). scores (torch.Tensor or np.ndarray): scores in shape (N, ). iou_threshold (float): IoU threshold for NMS. sigma (float): hyperparameter for gaussian method min_score (float): score filter threshold method (str): either 'linear' or 'gaussian' offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset). Returns: tuple: kept dets (boxes and scores) and indice, which always have the same data type as the input. Example: >>> boxes = np.array([[4., 3., 5., 3.], >>> [4., 3., 5., 4.], >>> [3., 1., 3., 1.], >>> [3., 1., 3., 1.], >>> [3., 1., 3., 1.], >>> [3., 1., 3., 1.]], dtype=np.float32) >>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.4, 0.0], dtype=np.float32) >>> iou_threshold = 0.6 >>> dets, inds = soft_nms(boxes, scores, iou_threshold, sigma=0.5) >>> assert len(inds) == len(dets) == 5 """ assert isinstance(boxes, (torch.Tensor, np.ndarray)) assert isinstance(scores, (torch.Tensor, np.ndarray)) is_numpy = False if isinstance(boxes, np.ndarray): is_numpy = True boxes = torch.from_numpy(boxes) if isinstance(scores, np.ndarray): scores = torch.from_numpy(scores) assert boxes.size(1) == 4 assert boxes.size(0) == scores.size(0) assert offset in (0, 1) method_dict = {'naive': 0, 'linear': 1, 'gaussian': 2} assert method in method_dict.keys() if torch.__version__ == 'parrots': dets = boxes.new_empty((boxes.size(0), 5), device='cpu') indata_list = [boxes.cpu(), scores.cpu(), dets.cpu()] indata_dict = { 'iou_threshold': float(iou_threshold), 'sigma': float(sigma), 'min_score': min_score, 'method': method_dict[method], 'offset': int(offset) } inds = ext_module.softnms(*indata_list, **indata_dict) else: dets, inds = SoftNMSop.apply(boxes.cpu(), scores.cpu(), float(iou_threshold), float(sigma), float(min_score), method_dict[method], int(offset)) dets = dets[:inds.size(0)] if is_numpy: dets = dets.cpu().numpy() inds = inds.cpu().numpy() return dets, inds else: return dets.to(device=boxes.device), inds.to(device=boxes.device) def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False): r"""Performs non-maximum suppression in a batched fashion. Modified from `torchvision/ops/boxes.py#L39 `_. In order to perform NMS independently per class, we add an offset to all the boxes. The offset is dependent only on the class idx, and is large enough so that boxes from different classes do not overlap. Note: In v1.4.1 and later, ``batched_nms`` supports skipping the NMS and returns sorted raw results when `nms_cfg` is None. Args: boxes (torch.Tensor): boxes in shape (N, 4). scores (torch.Tensor): scores in shape (N, ). idxs (torch.Tensor): each index value correspond to a bbox cluster, and NMS will not be applied between elements of different idxs, shape (N, ). nms_cfg (dict | None): Supports skipping the nms when `nms_cfg` is None, otherwise it should specify nms type and other parameters like `iou_thr`. Possible keys includes the following. - iou_thr (float): IoU threshold used for NMS. - split_thr (float): threshold number of boxes. In some cases the number of boxes is large (e.g., 200k). To avoid OOM during training, the users could set `split_thr` to a small value. If the number of boxes is greater than the threshold, it will perform NMS on each group of boxes separately and sequentially. Defaults to 10000. class_agnostic (bool): if true, nms is class agnostic, i.e. IoU thresholding happens over all boxes, regardless of the predicted class. Returns: tuple: kept dets and indice. - boxes (Tensor): Bboxes with score after nms, has shape (num_bboxes, 5). last dimension 5 arrange as (x1, y1, x2, y2, score) - keep (Tensor): The indices of remaining boxes in input boxes. """ # skip nms when nms_cfg is None if nms_cfg is None: scores, inds = scores.sort(descending=True) boxes = boxes[inds] return torch.cat([boxes, scores[:, None]], -1), inds nms_cfg_ = nms_cfg.copy() class_agnostic = nms_cfg_.pop('class_agnostic', class_agnostic) if class_agnostic: boxes_for_nms = boxes else: max_coordinate = boxes.max() offsets = idxs.to(boxes) * (max_coordinate + torch.tensor(1).to(boxes)) boxes_for_nms = boxes + offsets[:, None] nms_type = nms_cfg_.pop('type', 'nms') nms_op = eval(nms_type) split_thr = nms_cfg_.pop('split_thr', 10000) # Won't split to multiple nms nodes when exporting to onnx if boxes_for_nms.shape[0] < split_thr or torch.onnx.is_in_onnx_export(): dets, keep = nms_op(boxes_for_nms, scores, **nms_cfg_) boxes = boxes[keep] # -1 indexing works abnormal in TensorRT # This assumes `dets` has 5 dimensions where # the last dimension is score. # TODO: more elegant way to handle the dimension issue. # Some type of nms would reweight the score, such as SoftNMS scores = dets[:, 4] else: max_num = nms_cfg_.pop('max_num', -1) total_mask = scores.new_zeros(scores.size(), dtype=torch.bool) # Some type of nms would reweight the score, such as SoftNMS scores_after_nms = scores.new_zeros(scores.size()) for id in torch.unique(idxs): mask = (idxs == id).nonzero(as_tuple=False).view(-1) dets, keep = nms_op(boxes_for_nms[mask], scores[mask], **nms_cfg_) total_mask[mask[keep]] = True scores_after_nms[mask[keep]] = dets[:, -1] keep = total_mask.nonzero(as_tuple=False).view(-1) scores, inds = scores_after_nms[keep].sort(descending=True) keep = keep[inds] boxes = boxes[keep] if max_num > 0: keep = keep[:max_num] boxes = boxes[:max_num] scores = scores[:max_num] boxes = torch.cat([boxes, scores[:, None]], -1) return boxes, keep def nms_match(dets, iou_threshold): """Matched dets into different groups by NMS. NMS match is Similar to NMS but when a bbox is suppressed, nms match will record the indice of suppressed bbox and form a group with the indice of kept bbox. In each group, indice is sorted as score order. Args: dets (torch.Tensor | np.ndarray): Det boxes with scores, shape (N, 5). iou_thr (float): IoU thresh for NMS. Returns: list[torch.Tensor | np.ndarray]: The outer list corresponds different matched group, the inner Tensor corresponds the indices for a group in score order. """ if dets.shape[0] == 0: matched = [] else: assert dets.shape[-1] == 5, 'inputs dets.shape should be (N, 5), ' \ f'but get {dets.shape}' if isinstance(dets, torch.Tensor): dets_t = dets.detach().cpu() else: dets_t = torch.from_numpy(dets) indata_list = [dets_t] indata_dict = {'iou_threshold': float(iou_threshold)} matched = ext_module.nms_match(*indata_list, **indata_dict) if torch.__version__ == 'parrots': matched = matched.tolist() if isinstance(dets, torch.Tensor): return [dets.new_tensor(m, dtype=torch.long) for m in matched] else: return [np.array(m, dtype=int) for m in matched] def nms_rotated(dets, scores, iou_threshold, labels=None, clockwise=True): """Performs non-maximum suppression (NMS) on the rotated boxes according to their intersection-over-union (IoU). Rotated NMS iteratively removes lower scoring rotated boxes which have an IoU greater than iou_threshold with another (higher scoring) rotated box. Args: dets (Tensor): Rotated boxes in shape (N, 5). They are expected to be in (x_ctr, y_ctr, width, height, angle_radian) format. scores (Tensor): scores in shape (N, ). iou_threshold (float): IoU thresh for NMS. labels (Tensor): boxes' label in shape (N,). clockwise (bool): flag indicating whether the positive angular orientation is clockwise. default True. `New in version 1.4.3.` Returns: tuple: kept dets(boxes and scores) and indice, which is always the same data type as the input. """ if dets.shape[0] == 0: return dets, None if not clockwise: flip_mat = dets.new_ones(dets.shape[-1]) flip_mat[-1] = -1 dets_cw = dets * flip_mat else: dets_cw = dets multi_label = labels is not None if multi_label: dets_wl = torch.cat((dets_cw, labels.unsqueeze(1)), 1) else: dets_wl = dets_cw _, order = scores.sort(0, descending=True) dets_sorted = dets_wl.index_select(0, order) if torch.__version__ == 'parrots': keep_inds = ext_module.nms_rotated( dets_wl, scores, order, dets_sorted, iou_threshold=iou_threshold, multi_label=multi_label) else: keep_inds = ext_module.nms_rotated(dets_wl, scores, order, dets_sorted, iou_threshold, multi_label) dets = torch.cat((dets[keep_inds], scores[keep_inds].reshape(-1, 1)), dim=1) return dets, keep_inds ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/pixel_group.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import numpy as np import torch from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', ['pixel_group']) def pixel_group(score, mask, embedding, kernel_label, kernel_contour, kernel_region_num, distance_threshold): """Group pixels into text instances, which is widely used text detection methods. Arguments: score (np.array or torch.Tensor): The foreground score with size hxw. mask (np.array or Tensor): The foreground mask with size hxw. embedding (np.array or torch.Tensor): The embedding with size hxwxc to distinguish instances. kernel_label (np.array or torch.Tensor): The instance kernel index with size hxw. kernel_contour (np.array or torch.Tensor): The kernel contour with size hxw. kernel_region_num (int): The instance kernel region number. distance_threshold (float): The embedding distance threshold between kernel and pixel in one instance. Returns: list[list[float]]: The instance coordinates and attributes list. Each element consists of averaged confidence, pixel number, and coordinates (x_i, y_i for all pixels) in order. """ assert isinstance(score, (torch.Tensor, np.ndarray)) assert isinstance(mask, (torch.Tensor, np.ndarray)) assert isinstance(embedding, (torch.Tensor, np.ndarray)) assert isinstance(kernel_label, (torch.Tensor, np.ndarray)) assert isinstance(kernel_contour, (torch.Tensor, np.ndarray)) assert isinstance(kernel_region_num, int) assert isinstance(distance_threshold, float) if isinstance(score, np.ndarray): score = torch.from_numpy(score) if isinstance(mask, np.ndarray): mask = torch.from_numpy(mask) if isinstance(embedding, np.ndarray): embedding = torch.from_numpy(embedding) if isinstance(kernel_label, np.ndarray): kernel_label = torch.from_numpy(kernel_label) if isinstance(kernel_contour, np.ndarray): kernel_contour = torch.from_numpy(kernel_contour) if torch.__version__ == 'parrots': label = ext_module.pixel_group( score, mask, embedding, kernel_label, kernel_contour, kernel_region_num=kernel_region_num, distance_threshold=distance_threshold) label = label.tolist() label = label[0] list_index = kernel_region_num pixel_assignment = [] for x in range(kernel_region_num): pixel_assignment.append( np.array( label[list_index:list_index + int(label[x])], dtype=np.float)) list_index = list_index + int(label[x]) else: pixel_assignment = ext_module.pixel_group(score, mask, embedding, kernel_label, kernel_contour, kernel_region_num, distance_threshold) return pixel_assignment ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/point_sample.py ================================================ # Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend # noqa from os import path as osp import torch import torch.nn as nn import torch.nn.functional as F from torch.nn.modules.utils import _pair from torch.onnx.operators import shape_as_tensor def bilinear_grid_sample(im, grid, align_corners=False): """Given an input and a flow-field grid, computes the output using input values and pixel locations from grid. Supported only bilinear interpolation method to sample the input pixels. Args: im (torch.Tensor): Input feature map, shape (N, C, H, W) grid (torch.Tensor): Point coordinates, shape (N, Hg, Wg, 2) align_corners {bool}: If set to True, the extrema (-1 and 1) are considered as referring to the center points of the input’s corner pixels. If set to False, they are instead considered as referring to the corner points of the input’s corner pixels, making the sampling more resolution agnostic. Returns: torch.Tensor: A tensor with sampled points, shape (N, C, Hg, Wg) """ n, c, h, w = im.shape gn, gh, gw, _ = grid.shape assert n == gn x = grid[:, :, :, 0] y = grid[:, :, :, 1] if align_corners: x = ((x + 1) / 2) * (w - 1) y = ((y + 1) / 2) * (h - 1) else: x = ((x + 1) * w - 1) / 2 y = ((y + 1) * h - 1) / 2 x = x.view(n, -1) y = y.view(n, -1) x0 = torch.floor(x).long() y0 = torch.floor(y).long() x1 = x0 + 1 y1 = y0 + 1 wa = ((x1 - x) * (y1 - y)).unsqueeze(1) wb = ((x1 - x) * (y - y0)).unsqueeze(1) wc = ((x - x0) * (y1 - y)).unsqueeze(1) wd = ((x - x0) * (y - y0)).unsqueeze(1) # Apply default for grid_sample function zero padding im_padded = F.pad(im, pad=[1, 1, 1, 1], mode='constant', value=0) padded_h = h + 2 padded_w = w + 2 # save points positions after padding x0, x1, y0, y1 = x0 + 1, x1 + 1, y0 + 1, y1 + 1 # Clip coordinates to padded image size x0 = torch.where(x0 < 0, torch.tensor(0), x0) x0 = torch.where(x0 > padded_w - 1, torch.tensor(padded_w - 1), x0) x1 = torch.where(x1 < 0, torch.tensor(0), x1) x1 = torch.where(x1 > padded_w - 1, torch.tensor(padded_w - 1), x1) y0 = torch.where(y0 < 0, torch.tensor(0), y0) y0 = torch.where(y0 > padded_h - 1, torch.tensor(padded_h - 1), y0) y1 = torch.where(y1 < 0, torch.tensor(0), y1) y1 = torch.where(y1 > padded_h - 1, torch.tensor(padded_h - 1), y1) im_padded = im_padded.view(n, c, -1) x0_y0 = (x0 + y0 * padded_w).unsqueeze(1).expand(-1, c, -1) x0_y1 = (x0 + y1 * padded_w).unsqueeze(1).expand(-1, c, -1) x1_y0 = (x1 + y0 * padded_w).unsqueeze(1).expand(-1, c, -1) x1_y1 = (x1 + y1 * padded_w).unsqueeze(1).expand(-1, c, -1) Ia = torch.gather(im_padded, 2, x0_y0) Ib = torch.gather(im_padded, 2, x0_y1) Ic = torch.gather(im_padded, 2, x1_y0) Id = torch.gather(im_padded, 2, x1_y1) return (Ia * wa + Ib * wb + Ic * wc + Id * wd).reshape(n, c, gh, gw) def is_in_onnx_export_without_custom_ops(): from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() return torch.onnx.is_in_onnx_export( ) and not osp.exists(ort_custom_op_path) def normalize(grid): """Normalize input grid from [-1, 1] to [0, 1] Args: grid (torch.Tensor): The grid to be normalize, range [-1, 1]. Returns: torch.Tensor: Normalized grid, range [0, 1]. """ return (grid + 1.0) / 2.0 def denormalize(grid): """Denormalize input grid from range [0, 1] to [-1, 1] Args: grid (torch.Tensor): The grid to be denormalize, range [0, 1]. Returns: torch.Tensor: Denormalized grid, range [-1, 1]. """ return grid * 2.0 - 1.0 def generate_grid(num_grid, size, device): """Generate regular square grid of points in [0, 1] x [0, 1] coordinate space. Args: num_grid (int): The number of grids to sample, one for each region. size (tuple[int, int]): The side size of the regular grid. device (torch.device): Desired device of returned tensor. Returns: torch.Tensor: A tensor of shape (num_grid, size[0]*size[1], 2) that contains coordinates for the regular grids. """ affine_trans = torch.tensor([[[1., 0., 0.], [0., 1., 0.]]], device=device) grid = F.affine_grid( affine_trans, torch.Size((1, 1, *size)), align_corners=False) grid = normalize(grid) return grid.view(1, -1, 2).expand(num_grid, -1, -1) def rel_roi_point_to_abs_img_point(rois, rel_roi_points): """Convert roi based relative point coordinates to image based absolute point coordinates. Args: rois (torch.Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5) rel_roi_points (torch.Tensor): Point coordinates inside RoI, relative to RoI, location, range (0, 1), shape (N, P, 2) Returns: torch.Tensor: Image based absolute point coordinates, shape (N, P, 2) """ with torch.no_grad(): assert rel_roi_points.size(0) == rois.size(0) assert rois.dim() == 2 assert rel_roi_points.dim() == 3 assert rel_roi_points.size(2) == 2 # remove batch idx if rois.size(1) == 5: rois = rois[:, 1:] abs_img_points = rel_roi_points.clone() # To avoid an error during exporting to onnx use independent # variables instead inplace computation xs = abs_img_points[:, :, 0] * (rois[:, None, 2] - rois[:, None, 0]) ys = abs_img_points[:, :, 1] * (rois[:, None, 3] - rois[:, None, 1]) xs += rois[:, None, 0] ys += rois[:, None, 1] abs_img_points = torch.stack([xs, ys], dim=2) return abs_img_points def get_shape_from_feature_map(x): """Get spatial resolution of input feature map considering exporting to onnx mode. Args: x (torch.Tensor): Input tensor, shape (N, C, H, W) Returns: torch.Tensor: Spatial resolution (width, height), shape (1, 1, 2) """ if torch.onnx.is_in_onnx_export(): img_shape = shape_as_tensor(x)[2:].flip(0).view(1, 1, 2).to( x.device).float() else: img_shape = torch.tensor(x.shape[2:]).flip(0).view(1, 1, 2).to( x.device).float() return img_shape def abs_img_point_to_rel_img_point(abs_img_points, img, spatial_scale=1.): """Convert image based absolute point coordinates to image based relative coordinates for sampling. Args: abs_img_points (torch.Tensor): Image based absolute point coordinates, shape (N, P, 2) img (tuple or torch.Tensor): (height, width) of image or feature map. spatial_scale (float, optional): Scale points by this factor. Default: 1. Returns: Tensor: Image based relative point coordinates for sampling, shape (N, P, 2). """ assert (isinstance(img, tuple) and len(img) == 2) or \ (isinstance(img, torch.Tensor) and len(img.shape) == 4) if isinstance(img, tuple): h, w = img scale = torch.tensor([w, h], dtype=torch.float, device=abs_img_points.device) scale = scale.view(1, 1, 2) else: scale = get_shape_from_feature_map(img) return abs_img_points / scale * spatial_scale def rel_roi_point_to_rel_img_point(rois, rel_roi_points, img, spatial_scale=1.): """Convert roi based relative point coordinates to image based absolute point coordinates. Args: rois (torch.Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5) rel_roi_points (torch.Tensor): Point coordinates inside RoI, relative to RoI, location, range (0, 1), shape (N, P, 2) img (tuple or torch.Tensor): (height, width) of image or feature map. spatial_scale (float, optional): Scale points by this factor. Default: 1. Returns: torch.Tensor: Image based relative point coordinates for sampling, shape (N, P, 2). """ abs_img_point = rel_roi_point_to_abs_img_point(rois, rel_roi_points) rel_img_point = abs_img_point_to_rel_img_point(abs_img_point, img, spatial_scale) return rel_img_point def point_sample(input, points, align_corners=False, **kwargs): """A wrapper around :func:`grid_sample` to support 3D point_coords tensors Unlike :func:`torch.nn.functional.grid_sample` it assumes point_coords to lie inside ``[0, 1] x [0, 1]`` square. Args: input (torch.Tensor): Feature map, shape (N, C, H, W). points (torch.Tensor): Image based absolute point coordinates (normalized), range [0, 1] x [0, 1], shape (N, P, 2) or (N, Hgrid, Wgrid, 2). align_corners (bool, optional): Whether align_corners. Default: False Returns: torch.Tensor: Features of `point` on `input`, shape (N, C, P) or (N, C, Hgrid, Wgrid). """ add_dim = False if points.dim() == 3: add_dim = True points = points.unsqueeze(2) if is_in_onnx_export_without_custom_ops(): # If custom ops for onnx runtime not compiled use python # implementation of grid_sample function to make onnx graph # with supported nodes output = bilinear_grid_sample( input, denormalize(points), align_corners=align_corners) else: output = F.grid_sample( input, denormalize(points), align_corners=align_corners, **kwargs) if add_dim: output = output.squeeze(3) return output class SimpleRoIAlign(nn.Module): def __init__(self, output_size, spatial_scale, aligned=True): """Simple RoI align in PointRend, faster than standard RoIAlign. Args: output_size (tuple[int]): h, w spatial_scale (float): scale the input boxes by this number aligned (bool): if False, use the legacy implementation in MMDetection, align_corners=True will be used in F.grid_sample. If True, align the results more perfectly. """ super(SimpleRoIAlign, self).__init__() self.output_size = _pair(output_size) self.spatial_scale = float(spatial_scale) # to be consistent with other RoI ops self.use_torchvision = False self.aligned = aligned def forward(self, features, rois): num_imgs = features.size(0) num_rois = rois.size(0) rel_roi_points = generate_grid( num_rois, self.output_size, device=rois.device) if torch.onnx.is_in_onnx_export(): rel_img_points = rel_roi_point_to_rel_img_point( rois, rel_roi_points, features, self.spatial_scale) rel_img_points = rel_img_points.reshape(num_imgs, -1, *rel_img_points.shape[1:]) point_feats = point_sample( features, rel_img_points, align_corners=not self.aligned) point_feats = point_feats.transpose(1, 2) else: point_feats = [] for batch_ind in range(num_imgs): # unravel batch dim feat = features[batch_ind].unsqueeze(0) inds = (rois[:, 0].long() == batch_ind) if inds.any(): rel_img_points = rel_roi_point_to_rel_img_point( rois[inds], rel_roi_points[inds], feat, self.spatial_scale).unsqueeze(0) point_feat = point_sample( feat, rel_img_points, align_corners=not self.aligned) point_feat = point_feat.squeeze(0).transpose(0, 1) point_feats.append(point_feat) point_feats = torch.cat(point_feats, dim=0) channels = features.size(1) roi_feats = point_feats.reshape(num_rois, channels, *self.output_size) return roi_feats def __repr__(self): format_str = self.__class__.__name__ format_str += '(output_size={}, spatial_scale={}'.format( self.output_size, self.spatial_scale) return format_str ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/points_in_boxes.py ================================================ import torch from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', [ 'points_in_boxes_part_forward', 'points_in_boxes_cpu_forward', 'points_in_boxes_all_forward' ]) def points_in_boxes_part(points, boxes): """Find the box in which each point is (CUDA). Args: points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate. boxes (torch.Tensor): [B, T, 7], num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz] in LiDAR/DEPTH coordinate, (x, y, z) is the bottom center. Returns: torch.Tensor: Return the box indices of points with the shape of (B, M). Default background = -1. """ assert points.shape[0] == boxes.shape[0], \ 'Points and boxes should have the same batch size, ' \ f'but got {points.shape[0]} and {boxes.shape[0]}' assert boxes.shape[2] == 7, \ 'boxes dimension should be 7, ' \ f'but got unexpected shape {boxes.shape[2]}' assert points.shape[2] == 3, \ 'points dimension should be 3, ' \ f'but got unexpected shape {points.shape[2]}' batch_size, num_points, _ = points.shape box_idxs_of_pts = points.new_zeros((batch_size, num_points), dtype=torch.int).fill_(-1) # If manually put the tensor 'points' or 'boxes' on a device # which is not the current device, some temporary variables # will be created on the current device in the cuda op, # and the output will be incorrect. # Therefore, we force the current device to be the same # as the device of the tensors if it was not. # Please refer to https://github.com/open-mmlab/mmdetection3d/issues/305 # for the incorrect output before the fix. points_device = points.get_device() assert points_device == boxes.get_device(), \ 'Points and boxes should be put on the same device' if torch.cuda.current_device() != points_device: torch.cuda.set_device(points_device) ext_module.points_in_boxes_part_forward(boxes.contiguous(), points.contiguous(), box_idxs_of_pts) return box_idxs_of_pts def points_in_boxes_cpu(points, boxes): """Find all boxes in which each point is (CPU). The CPU version of :meth:`points_in_boxes_all`. Args: points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate boxes (torch.Tensor): [B, T, 7], num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz], (x, y, z) is the bottom center. Returns: torch.Tensor: Return the box indices of points with the shape of (B, M, T). Default background = 0. """ assert points.shape[0] == boxes.shape[0], \ 'Points and boxes should have the same batch size, ' \ f'but got {points.shape[0]} and {boxes.shape[0]}' assert boxes.shape[2] == 7, \ 'boxes dimension should be 7, ' \ f'but got unexpected shape {boxes.shape[2]}' assert points.shape[2] == 3, \ 'points dimension should be 3, ' \ f'but got unexpected shape {points.shape[2]}' batch_size, num_points, _ = points.shape num_boxes = boxes.shape[1] point_indices = points.new_zeros((batch_size, num_boxes, num_points), dtype=torch.int) for b in range(batch_size): ext_module.points_in_boxes_cpu_forward(boxes[b].float().contiguous(), points[b].float().contiguous(), point_indices[b]) point_indices = point_indices.transpose(1, 2) return point_indices def points_in_boxes_all(points, boxes): """Find all boxes in which each point is (CUDA). Args: points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate boxes (torch.Tensor): [B, T, 7], num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz], (x, y, z) is the bottom center. Returns: torch.Tensor: Return the box indices of points with the shape of (B, M, T). Default background = 0. """ assert boxes.shape[0] == points.shape[0], \ 'Points and boxes should have the same batch size, ' \ f'but got {boxes.shape[0]} and {boxes.shape[0]}' assert boxes.shape[2] == 7, \ 'boxes dimension should be 7, ' \ f'but got unexpected shape {boxes.shape[2]}' assert points.shape[2] == 3, \ 'points dimension should be 3, ' \ f'but got unexpected shape {points.shape[2]}' batch_size, num_points, _ = points.shape num_boxes = boxes.shape[1] box_idxs_of_pts = points.new_zeros((batch_size, num_points, num_boxes), dtype=torch.int).fill_(0) # Same reason as line 25-32 points_device = points.get_device() assert points_device == boxes.get_device(), \ 'Points and boxes should be put on the same device' if torch.cuda.current_device() != points_device: torch.cuda.set_device(points_device) ext_module.points_in_boxes_all_forward(boxes.contiguous(), points.contiguous(), box_idxs_of_pts) return box_idxs_of_pts ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/points_in_polygons.py ================================================ import torch from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', ['points_in_polygons_forward']) def points_in_polygons(points, polygons): """Judging whether points are inside polygons, which is used in the ATSS assignment for the rotated boxes. It should be noted that when the point is just at the polygon boundary, the judgment will be inaccurate, but the effect on assignment is limited. Args: points (torch.Tensor): It has shape (B, 2), indicating (x, y). M means the number of predicted points. polygons (torch.Tensor): It has shape (M, 8), indicating (x1, y1, x2, y2, x3, y3, x4, y4). M means the number of ground truth polygons. Returns: torch.Tensor: Return the result with the shape of (B, M), 1 indicates that the point is inside the polygon, 0 indicates that the point is outside the polygon. """ assert points.shape[1] == 2, \ 'points dimension should be 2, ' \ f'but got unexpected shape {points.shape[1]}' assert polygons.shape[1] == 8, \ 'polygons dimension should be 8, ' \ f'but got unexpected shape {polygons.shape[1]}' output = torch.full([points.shape[0], polygons.shape[0]], 0.).cuda().float() ext_module.points_in_polygons_forward(points.contiguous(), polygons.contiguous(), output) return output ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/points_sampler.py ================================================ from typing import List import torch from torch import nn as nn from mmcv.runner import force_fp32 from .furthest_point_sample import (furthest_point_sample, furthest_point_sample_with_dist) def calc_square_dist(point_feat_a, point_feat_b, norm=True): """Calculating square distance between a and b. Args: point_feat_a (torch.Tensor): (B, N, C) Feature vector of each point. point_feat_b (torch.Tensor): (B, M, C) Feature vector of each point. norm (bool, optional): Whether to normalize the distance. Default: True. Returns: torch.Tensor: (B, N, M) Square distance between each point pair. """ num_channel = point_feat_a.shape[-1] # [bs, n, 1] a_square = torch.sum(point_feat_a.unsqueeze(dim=2).pow(2), dim=-1) # [bs, 1, m] b_square = torch.sum(point_feat_b.unsqueeze(dim=1).pow(2), dim=-1) corr_matrix = torch.matmul(point_feat_a, point_feat_b.transpose(1, 2)) dist = a_square + b_square - 2 * corr_matrix if norm: dist = torch.sqrt(dist) / num_channel return dist def get_sampler_cls(sampler_type): """Get the type and mode of points sampler. Args: sampler_type (str): The type of points sampler. The valid value are "D-FPS", "F-FPS", or "FS". Returns: class: Points sampler type. """ sampler_mappings = { 'D-FPS': DFPSSampler, 'F-FPS': FFPSSampler, 'FS': FSSampler, } try: return sampler_mappings[sampler_type] except KeyError: raise KeyError( f'Supported `sampler_type` are {sampler_mappings.keys()}, but got \ {sampler_type}') class PointsSampler(nn.Module): """Points sampling. Args: num_point (list[int]): Number of sample points. fps_mod_list (list[str], optional): Type of FPS method, valid mod ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS']. F-FPS: using feature distances for FPS. D-FPS: using Euclidean distances of points for FPS. FS: using F-FPS and D-FPS simultaneously. fps_sample_range_list (list[int], optional): Range of points to apply FPS. Default: [-1]. """ def __init__(self, num_point: List[int], fps_mod_list: List[str] = ['D-FPS'], fps_sample_range_list: List[int] = [-1]): super().__init__() # FPS would be applied to different fps_mod in the list, # so the length of the num_point should be equal to # fps_mod_list and fps_sample_range_list. assert len(num_point) == len(fps_mod_list) == len( fps_sample_range_list) self.num_point = num_point self.fps_sample_range_list = fps_sample_range_list self.samplers = nn.ModuleList() for fps_mod in fps_mod_list: self.samplers.append(get_sampler_cls(fps_mod)()) self.fp16_enabled = False @force_fp32() def forward(self, points_xyz, features): """ Args: points_xyz (torch.Tensor): (B, N, 3) xyz coordinates of the points. features (torch.Tensor): (B, C, N) features of the points. Returns: torch.Tensor: (B, npoint, sample_num) Indices of sampled points. """ indices = [] last_fps_end_index = 0 for fps_sample_range, sampler, npoint in zip( self.fps_sample_range_list, self.samplers, self.num_point): assert fps_sample_range < points_xyz.shape[1] if fps_sample_range == -1: sample_points_xyz = points_xyz[:, last_fps_end_index:] if features is not None: sample_features = features[:, :, last_fps_end_index:] else: sample_features = None else: sample_points_xyz = \ points_xyz[:, last_fps_end_index:fps_sample_range] if features is not None: sample_features = features[:, :, last_fps_end_index: fps_sample_range] else: sample_features = None fps_idx = sampler(sample_points_xyz.contiguous(), sample_features, npoint) indices.append(fps_idx + last_fps_end_index) last_fps_end_index += fps_sample_range indices = torch.cat(indices, dim=1) return indices class DFPSSampler(nn.Module): """Using Euclidean distances of points for FPS.""" def __init__(self): super().__init__() def forward(self, points, features, npoint): """Sampling points with D-FPS.""" fps_idx = furthest_point_sample(points.contiguous(), npoint) return fps_idx class FFPSSampler(nn.Module): """Using feature distances for FPS.""" def __init__(self): super().__init__() def forward(self, points, features, npoint): """Sampling points with F-FPS.""" assert features is not None, \ 'feature input to FFPS_Sampler should not be None' features_for_fps = torch.cat([points, features.transpose(1, 2)], dim=2) features_dist = calc_square_dist( features_for_fps, features_for_fps, norm=False) fps_idx = furthest_point_sample_with_dist(features_dist, npoint) return fps_idx class FSSampler(nn.Module): """Using F-FPS and D-FPS simultaneously.""" def __init__(self): super().__init__() def forward(self, points, features, npoint): """Sampling points with FS_Sampling.""" assert features is not None, \ 'feature input to FS_Sampler should not be None' ffps_sampler = FFPSSampler() dfps_sampler = DFPSSampler() fps_idx_ffps = ffps_sampler(points, features, npoint) fps_idx_dfps = dfps_sampler(points, features, npoint) fps_idx = torch.cat([fps_idx_ffps, fps_idx_dfps], dim=1) return fps_idx ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/psa_mask.py ================================================ # Modified from https://github.com/hszhao/semseg/blob/master/lib/psa from torch import nn from torch.autograd import Function from torch.nn.modules.utils import _pair from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', ['psamask_forward', 'psamask_backward']) class PSAMaskFunction(Function): @staticmethod def symbolic(g, input, psa_type, mask_size): return g.op( 'mmcv::MMCVPSAMask', input, psa_type_i=psa_type, mask_size_i=mask_size) @staticmethod def forward(ctx, input, psa_type, mask_size): ctx.psa_type = psa_type ctx.mask_size = _pair(mask_size) ctx.save_for_backward(input) h_mask, w_mask = ctx.mask_size batch_size, channels, h_feature, w_feature = input.size() assert channels == h_mask * w_mask output = input.new_zeros( (batch_size, h_feature * w_feature, h_feature, w_feature)) ext_module.psamask_forward( input, output, psa_type=psa_type, num_=batch_size, h_feature=h_feature, w_feature=w_feature, h_mask=h_mask, w_mask=w_mask, half_h_mask=(h_mask - 1) // 2, half_w_mask=(w_mask - 1) // 2) return output @staticmethod def backward(ctx, grad_output): input = ctx.saved_tensors[0] psa_type = ctx.psa_type h_mask, w_mask = ctx.mask_size batch_size, channels, h_feature, w_feature = input.size() grad_input = grad_output.new_zeros( (batch_size, channels, h_feature, w_feature)) ext_module.psamask_backward( grad_output, grad_input, psa_type=psa_type, num_=batch_size, h_feature=h_feature, w_feature=w_feature, h_mask=h_mask, w_mask=w_mask, half_h_mask=(h_mask - 1) // 2, half_w_mask=(w_mask - 1) // 2) return grad_input, None, None, None psa_mask = PSAMaskFunction.apply class PSAMask(nn.Module): def __init__(self, psa_type, mask_size=None): super(PSAMask, self).__init__() assert psa_type in ['collect', 'distribute'] if psa_type == 'collect': psa_type_enum = 0 else: psa_type_enum = 1 self.psa_type_enum = psa_type_enum self.mask_size = mask_size self.psa_type = psa_type def forward(self, input): return psa_mask(input, self.psa_type_enum, self.mask_size) def __repr__(self): s = self.__class__.__name__ s += f'(psa_type={self.psa_type}, ' s += f'mask_size={self.mask_size})' return s ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/readme.md ================================================ test ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/riroi_align_rotated.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch.nn as nn from torch.autograd import Function from ..utils import ext_loader, is_tuple_of ext_module = ext_loader.load_ext( '_ext', ['riroi_align_rotated_forward', 'riroi_align_rotated_backward']) class RiRoIAlignRotatedFunction(Function): @staticmethod def forward(ctx, features, rois, out_size, spatial_scale, num_samples=0, num_orientations=8, clockwise=False): if isinstance(out_size, int): out_h = out_size out_w = out_size elif is_tuple_of(out_size, int): assert len(out_size) == 2 out_h, out_w = out_size else: raise TypeError( f'"out_size" should be an integer or tuple of integers,' f' but got {out_size}') ctx.spatial_scale = spatial_scale ctx.num_samples = num_samples ctx.num_orientations = num_orientations ctx.clockwise = clockwise ctx.save_for_backward(rois) ctx.feature_size = features.size() batch_size, num_channels, _, _ = features.size() num_rois = rois.size(0) output = features.new_zeros(num_rois, num_channels, out_h, out_w) ext_module.riroi_align_rotated_forward( features, rois, output, pooled_height=out_h, pooled_width=out_w, spatial_scale=spatial_scale, num_samples=num_samples, num_orientations=num_orientations, clockwise=clockwise) return output @staticmethod def backward(ctx, grad_output): feature_size = ctx.feature_size spatial_scale = ctx.spatial_scale num_orientations = ctx.num_orientations clockwise = ctx.clockwise num_samples = ctx.num_samples rois = ctx.saved_tensors[0] assert feature_size is not None batch_size, num_channels, feature_h, feature_w = feature_size out_w = grad_output.size(3) out_h = grad_output.size(2) grad_input = grad_rois = None if ctx.needs_input_grad[0]: grad_input = rois.new_zeros(batch_size, num_channels, feature_h, feature_w) ext_module.riroi_align_rotated_backward( grad_output.contiguous(), rois, grad_input, pooled_height=out_h, pooled_width=out_w, spatial_scale=spatial_scale, num_samples=num_samples, num_orientations=num_orientations, clockwise=clockwise) return grad_input, grad_rois, None, None, None, None, None riroi_align_rotated = RiRoIAlignRotatedFunction.apply class RiRoIAlignRotated(nn.Module): """Rotation-invariant RoI align pooling layer for rotated proposals. It accepts a feature map of shape (N, C, H, W) and rois with shape (n, 6) with each roi decoded as (batch_index, center_x, center_y, w, h, angle). The angle is in radian. The details are described in the paper `ReDet: A Rotation-equivariant Detector for Aerial Object Detection `_. Args: out_size (tuple): fixed dimensional RoI output with shape (h, w). spatial_scale (float): scale the input boxes by this number num_samples (int): number of inputs samples to take for each output sample. 0 to take samples densely for current models. num_orientations (int): number of oriented channels. clockwise (bool): If True, the angle in each proposal follows a clockwise fashion in image space, otherwise, the angle is counterclockwise. Default: False. """ def __init__(self, out_size, spatial_scale, num_samples=0, num_orientations=8, clockwise=False): super(RiRoIAlignRotated, self).__init__() self.out_size = out_size self.spatial_scale = float(spatial_scale) self.num_samples = int(num_samples) self.num_orientations = int(num_orientations) self.clockwise = clockwise def forward(self, features, rois): return RiRoIAlignRotatedFunction.apply(features, rois, self.out_size, self.spatial_scale, self.num_samples, self.num_orientations, self.clockwise) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/roi_align.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch import torch.nn as nn from torch.autograd import Function from torch.autograd.function import once_differentiable from torch.nn.modules.utils import _pair from ..utils import deprecated_api_warning, ext_loader ext_module = ext_loader.load_ext('_ext', ['roi_align_forward', 'roi_align_backward']) class RoIAlignFunction(Function): @staticmethod def symbolic(g, input, rois, output_size, spatial_scale, sampling_ratio, pool_mode, aligned): from ..onnx import is_custom_op_loaded has_custom_op = is_custom_op_loaded() if has_custom_op: return g.op( 'mmcv::MMCVRoiAlign', input, rois, output_height_i=output_size[0], output_width_i=output_size[1], spatial_scale_f=spatial_scale, sampling_ratio_i=sampling_ratio, mode_s=pool_mode, aligned_i=aligned) else: from torch.onnx.symbolic_opset9 import sub, squeeze from torch.onnx.symbolic_helper import _slice_helper from torch.onnx import TensorProtoDataType # batch_indices = rois[:, 0].long() batch_indices = _slice_helper( g, rois, axes=[1], starts=[0], ends=[1]) batch_indices = squeeze(g, batch_indices, 1) batch_indices = g.op( 'Cast', batch_indices, to_i=TensorProtoDataType.INT64) # rois = rois[:, 1:] rois = _slice_helper(g, rois, axes=[1], starts=[1], ends=[5]) if aligned: # rois -= 0.5/spatial_scale aligned_offset = g.op( 'Constant', value_t=torch.tensor([0.5 / spatial_scale], dtype=torch.float32)) rois = sub(g, rois, aligned_offset) # roi align return g.op( 'RoiAlign', input, rois, batch_indices, output_height_i=output_size[0], output_width_i=output_size[1], spatial_scale_f=spatial_scale, sampling_ratio_i=max(0, sampling_ratio), mode_s=pool_mode) @staticmethod def forward(ctx, input, rois, output_size, spatial_scale=1.0, sampling_ratio=0, pool_mode='avg', aligned=True): ctx.output_size = _pair(output_size) ctx.spatial_scale = spatial_scale ctx.sampling_ratio = sampling_ratio assert pool_mode in ('max', 'avg') ctx.pool_mode = 0 if pool_mode == 'max' else 1 ctx.aligned = aligned ctx.input_shape = input.size() assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!' output_shape = (rois.size(0), input.size(1), ctx.output_size[0], ctx.output_size[1]) output = input.new_zeros(output_shape) if ctx.pool_mode == 0: argmax_y = input.new_zeros(output_shape) argmax_x = input.new_zeros(output_shape) else: argmax_y = input.new_zeros(0) argmax_x = input.new_zeros(0) ext_module.roi_align_forward( input, rois, output, argmax_y, argmax_x, aligned_height=ctx.output_size[0], aligned_width=ctx.output_size[1], spatial_scale=ctx.spatial_scale, sampling_ratio=ctx.sampling_ratio, pool_mode=ctx.pool_mode, aligned=ctx.aligned) ctx.save_for_backward(rois, argmax_y, argmax_x) return output @staticmethod @once_differentiable def backward(ctx, grad_output): rois, argmax_y, argmax_x = ctx.saved_tensors grad_input = grad_output.new_zeros(ctx.input_shape) # complex head architecture may cause grad_output uncontiguous. grad_output = grad_output.contiguous() ext_module.roi_align_backward( grad_output, rois, argmax_y, argmax_x, grad_input, aligned_height=ctx.output_size[0], aligned_width=ctx.output_size[1], spatial_scale=ctx.spatial_scale, sampling_ratio=ctx.sampling_ratio, pool_mode=ctx.pool_mode, aligned=ctx.aligned) return grad_input, None, None, None, None, None, None roi_align = RoIAlignFunction.apply class RoIAlign(nn.Module): """RoI align pooling layer. Args: output_size (tuple): h, w spatial_scale (float): scale the input boxes by this number sampling_ratio (int): number of inputs samples to take for each output sample. 0 to take samples densely for current models. pool_mode (str, 'avg' or 'max'): pooling mode in each bin. aligned (bool): if False, use the legacy implementation in MMDetection. If True, align the results more perfectly. use_torchvision (bool): whether to use roi_align from torchvision. Note: The implementation of RoIAlign when aligned=True is modified from https://github.com/facebookresearch/detectron2/ The meaning of aligned=True: Given a continuous coordinate c, its two neighboring pixel indices (in our pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled from the underlying signal at continuous coordinates 0.5 and 1.5). But the original roi_align (aligned=False) does not subtract the 0.5 when computing neighboring pixel indices and therefore it uses pixels with a slightly incorrect alignment (relative to our pixel model) when performing bilinear interpolation. With `aligned=True`, we first appropriately scale the ROI and then shift it by -0.5 prior to calling roi_align. This produces the correct neighbors; The difference does not make a difference to the model's performance if ROIAlign is used together with conv layers. """ @deprecated_api_warning( { 'out_size': 'output_size', 'sample_num': 'sampling_ratio' }, cls_name='RoIAlign') def __init__(self, output_size, spatial_scale=1.0, sampling_ratio=0, pool_mode='avg', aligned=True, use_torchvision=False): super(RoIAlign, self).__init__() self.output_size = _pair(output_size) self.spatial_scale = float(spatial_scale) self.sampling_ratio = int(sampling_ratio) self.pool_mode = pool_mode self.aligned = aligned self.use_torchvision = use_torchvision def forward(self, input, rois): """ Args: input: NCHW images rois: Bx5 boxes. First column is the index into N.\ The other 4 columns are xyxy. """ if self.use_torchvision: from torchvision.ops import roi_align as tv_roi_align if 'aligned' in tv_roi_align.__code__.co_varnames: return tv_roi_align(input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.aligned) else: if self.aligned: rois -= rois.new_tensor([0.] + [0.5 / self.spatial_scale] * 4) return tv_roi_align(input, rois, self.output_size, self.spatial_scale, self.sampling_ratio) else: return roi_align(input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.pool_mode, self.aligned) def __repr__(self): s = self.__class__.__name__ s += f'(output_size={self.output_size}, ' s += f'spatial_scale={self.spatial_scale}, ' s += f'sampling_ratio={self.sampling_ratio}, ' s += f'pool_mode={self.pool_mode}, ' s += f'aligned={self.aligned}, ' s += f'use_torchvision={self.use_torchvision})' return s ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/roi_align_rotated.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch.nn as nn from torch.autograd import Function from ..utils import ext_loader ext_module = ext_loader.load_ext( '_ext', ['roi_align_rotated_forward', 'roi_align_rotated_backward']) class RoIAlignRotatedFunction(Function): @staticmethod def symbolic(g, features, rois, out_size, spatial_scale, sample_num, aligned, clockwise): if isinstance(out_size, int): out_h = out_size out_w = out_size elif isinstance(out_size, tuple): assert len(out_size) == 2 assert isinstance(out_size[0], int) assert isinstance(out_size[1], int) out_h, out_w = out_size else: raise TypeError( '"out_size" must be an integer or tuple of integers') return g.op( 'mmcv::MMCVRoIAlignRotated', features, rois, output_height_i=out_h, output_width_i=out_h, spatial_scale_f=spatial_scale, sampling_ratio_i=sample_num, aligned_i=aligned, clockwise_i=clockwise) @staticmethod def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0, aligned=True, clockwise=False): if isinstance(out_size, int): out_h = out_size out_w = out_size elif isinstance(out_size, tuple): assert len(out_size) == 2 assert isinstance(out_size[0], int) assert isinstance(out_size[1], int) out_h, out_w = out_size else: raise TypeError( '"out_size" must be an integer or tuple of integers') ctx.spatial_scale = spatial_scale ctx.sample_num = sample_num ctx.aligned = aligned ctx.clockwise = clockwise ctx.save_for_backward(rois) ctx.feature_size = features.size() batch_size, num_channels, data_height, data_width = features.size() num_rois = rois.size(0) output = features.new_zeros(num_rois, num_channels, out_h, out_w) ext_module.roi_align_rotated_forward( features, rois, output, pooled_height=out_h, pooled_width=out_w, spatial_scale=spatial_scale, sample_num=sample_num, aligned=aligned, clockwise=clockwise) return output @staticmethod def backward(ctx, grad_output): feature_size = ctx.feature_size spatial_scale = ctx.spatial_scale aligned = ctx.aligned clockwise = ctx.clockwise sample_num = ctx.sample_num rois = ctx.saved_tensors[0] assert feature_size is not None batch_size, num_channels, data_height, data_width = feature_size out_w = grad_output.size(3) out_h = grad_output.size(2) grad_input = grad_rois = None if ctx.needs_input_grad[0]: grad_input = rois.new_zeros(batch_size, num_channels, data_height, data_width) ext_module.roi_align_rotated_backward( grad_output.contiguous(), rois, grad_input, pooled_height=out_h, pooled_width=out_w, spatial_scale=spatial_scale, sample_num=sample_num, aligned=aligned, clockwise=clockwise) return grad_input, grad_rois, None, None, None, None, None roi_align_rotated = RoIAlignRotatedFunction.apply class RoIAlignRotated(nn.Module): """RoI align pooling layer for rotated proposals. It accepts a feature map of shape (N, C, H, W) and rois with shape (n, 6) with each roi decoded as (batch_index, center_x, center_y, w, h, angle). The angle is in radian. Args: out_size (tuple): h, w spatial_scale (float): scale the input boxes by this number sample_num (int): number of inputs samples to take for each output sample. 0 to take samples densely for current models. aligned (bool): if False, use the legacy implementation in MMDetection. If True, align the results more perfectly. Default: True. clockwise (bool): If True, the angle in each proposal follows a clockwise fashion in image space, otherwise, the angle is counterclockwise. Default: False. Note: The implementation of RoIAlign when aligned=True is modified from https://github.com/facebookresearch/detectron2/ The meaning of aligned=True: Given a continuous coordinate c, its two neighboring pixel indices (in our pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled from the underlying signal at continuous coordinates 0.5 and 1.5). But the original roi_align (aligned=False) does not subtract the 0.5 when computing neighboring pixel indices and therefore it uses pixels with a slightly incorrect alignment (relative to our pixel model) when performing bilinear interpolation. With `aligned=True`, we first appropriately scale the ROI and then shift it by -0.5 prior to calling roi_align. This produces the correct neighbors; The difference does not make a difference to the model's performance if ROIAlign is used together with conv layers. """ def __init__(self, out_size, spatial_scale, sample_num=0, aligned=True, clockwise=False): super(RoIAlignRotated, self).__init__() self.out_size = out_size self.spatial_scale = float(spatial_scale) self.sample_num = int(sample_num) self.aligned = aligned self.clockwise = clockwise def forward(self, features, rois): return RoIAlignRotatedFunction.apply(features, rois, self.out_size, self.spatial_scale, self.sample_num, self.aligned, self.clockwise) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/roi_pool.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch import torch.nn as nn from torch.autograd import Function from torch.autograd.function import once_differentiable from torch.nn.modules.utils import _pair from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', ['roi_pool_forward', 'roi_pool_backward']) class RoIPoolFunction(Function): @staticmethod def symbolic(g, input, rois, output_size, spatial_scale): return g.op( 'MaxRoiPool', input, rois, pooled_shape_i=output_size, spatial_scale_f=spatial_scale) @staticmethod def forward(ctx, input, rois, output_size, spatial_scale=1.0): ctx.output_size = _pair(output_size) ctx.spatial_scale = spatial_scale ctx.input_shape = input.size() assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!' output_shape = (rois.size(0), input.size(1), ctx.output_size[0], ctx.output_size[1]) output = input.new_zeros(output_shape) argmax = input.new_zeros(output_shape, dtype=torch.int) ext_module.roi_pool_forward( input, rois, output, argmax, pooled_height=ctx.output_size[0], pooled_width=ctx.output_size[1], spatial_scale=ctx.spatial_scale) ctx.save_for_backward(rois, argmax) return output @staticmethod @once_differentiable def backward(ctx, grad_output): rois, argmax = ctx.saved_tensors grad_input = grad_output.new_zeros(ctx.input_shape) ext_module.roi_pool_backward( grad_output, rois, argmax, grad_input, pooled_height=ctx.output_size[0], pooled_width=ctx.output_size[1], spatial_scale=ctx.spatial_scale) return grad_input, None, None, None roi_pool = RoIPoolFunction.apply class RoIPool(nn.Module): def __init__(self, output_size, spatial_scale=1.0): super(RoIPool, self).__init__() self.output_size = _pair(output_size) self.spatial_scale = float(spatial_scale) def forward(self, input, rois): return roi_pool(input, rois, self.output_size, self.spatial_scale) def __repr__(self): s = self.__class__.__name__ s += f'(output_size={self.output_size}, ' s += f'spatial_scale={self.spatial_scale})' return s ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/roiaware_pool3d.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch from torch import nn as nn from torch.autograd import Function import mmcv from ..utils import ext_loader ext_module = ext_loader.load_ext( '_ext', ['roiaware_pool3d_forward', 'roiaware_pool3d_backward']) class RoIAwarePool3d(nn.Module): """Encode the geometry-specific features of each 3D proposal. Please refer to `PartA2 `_ for more details. Args: out_size (int or tuple): The size of output features. n or [n1, n2, n3]. max_pts_per_voxel (int, optional): The maximum number of points per voxel. Default: 128. mode (str, optional): Pooling method of RoIAware, 'max' or 'avg'. Default: 'max'. """ def __init__(self, out_size, max_pts_per_voxel=128, mode='max'): super().__init__() self.out_size = out_size self.max_pts_per_voxel = max_pts_per_voxel assert mode in ['max', 'avg'] pool_mapping = {'max': 0, 'avg': 1} self.mode = pool_mapping[mode] def forward(self, rois, pts, pts_feature): """ Args: rois (torch.Tensor): [N, 7], in LiDAR coordinate, (x, y, z) is the bottom center of rois. pts (torch.Tensor): [npoints, 3], coordinates of input points. pts_feature (torch.Tensor): [npoints, C], features of input points. Returns: torch.Tensor: Pooled features whose shape is [N, out_x, out_y, out_z, C]. """ return RoIAwarePool3dFunction.apply(rois, pts, pts_feature, self.out_size, self.max_pts_per_voxel, self.mode) class RoIAwarePool3dFunction(Function): @staticmethod def forward(ctx, rois, pts, pts_feature, out_size, max_pts_per_voxel, mode): """ Args: rois (torch.Tensor): [N, 7], in LiDAR coordinate, (x, y, z) is the bottom center of rois. pts (torch.Tensor): [npoints, 3], coordinates of input points. pts_feature (torch.Tensor): [npoints, C], features of input points. out_size (int or tuple): The size of output features. n or [n1, n2, n3]. max_pts_per_voxel (int): The maximum number of points per voxel. Default: 128. mode (int): Pooling method of RoIAware, 0 (max pool) or 1 (average pool). Returns: torch.Tensor: Pooled features whose shape is [N, out_x, out_y, out_z, C]. """ if isinstance(out_size, int): out_x = out_y = out_z = out_size else: assert len(out_size) == 3 assert mmcv.is_tuple_of(out_size, int) out_x, out_y, out_z = out_size num_rois = rois.shape[0] num_channels = pts_feature.shape[-1] num_pts = pts.shape[0] pooled_features = pts_feature.new_zeros( (num_rois, out_x, out_y, out_z, num_channels)) argmax = pts_feature.new_zeros( (num_rois, out_x, out_y, out_z, num_channels), dtype=torch.int) pts_idx_of_voxels = pts_feature.new_zeros( (num_rois, out_x, out_y, out_z, max_pts_per_voxel), dtype=torch.int) ext_module.roiaware_pool3d_forward( rois, pts, pts_feature, argmax, pts_idx_of_voxels, pooled_features, pool_method=mode) ctx.roiaware_pool3d_for_backward = (pts_idx_of_voxels, argmax, mode, num_pts, num_channels) return pooled_features @staticmethod def backward(ctx, grad_out): ret = ctx.roiaware_pool3d_for_backward pts_idx_of_voxels, argmax, mode, num_pts, num_channels = ret grad_in = grad_out.new_zeros((num_pts, num_channels)) ext_module.roiaware_pool3d_backward( pts_idx_of_voxels, argmax, grad_out.contiguous(), grad_in, pool_method=mode) return None, None, grad_in, None, None, None ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/roipoint_pool3d.py ================================================ from torch import nn as nn from torch.autograd import Function from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', ['roipoint_pool3d_forward']) class RoIPointPool3d(nn.Module): """Encode the geometry-specific features of each 3D proposal. Please refer to `Paper of PartA2 `_ for more details. Args: num_sampled_points (int, optional): Number of samples in each roi. Default: 512. """ def __init__(self, num_sampled_points=512): super().__init__() self.num_sampled_points = num_sampled_points def forward(self, points, point_features, boxes3d): """ Args: points (torch.Tensor): Input points whose shape is (B, N, C). point_features (torch.Tensor): Features of input points whose shape is (B, N, C). boxes3d (B, M, 7), Input bounding boxes whose shape is (B, M, 7). Returns: tuple[torch.Tensor]: A tuple contains two elements. The first one is the pooled features whose shape is (B, M, 512, 3 + C). The second is an empty flag whose shape is (B, M). """ return RoIPointPool3dFunction.apply(points, point_features, boxes3d, self.num_sampled_points) class RoIPointPool3dFunction(Function): @staticmethod def forward(ctx, points, point_features, boxes3d, num_sampled_points=512): """ Args: points (torch.Tensor): Input points whose shape is (B, N, C). point_features (torch.Tensor): Features of input points whose shape is (B, N, C). boxes3d (B, M, 7), Input bounding boxes whose shape is (B, M, 7). num_sampled_points (int, optional): The num of sampled points. Default: 512. Returns: tuple[torch.Tensor]: A tuple contains two elements. The first one is the pooled features whose shape is (B, M, 512, 3 + C). The second is an empty flag whose shape is (B, M). """ assert len(points.shape) == 3 and points.shape[2] == 3 batch_size, boxes_num, feature_len = points.shape[0], boxes3d.shape[ 1], point_features.shape[2] pooled_boxes3d = boxes3d.view(batch_size, -1, 7) pooled_features = point_features.new_zeros( (batch_size, boxes_num, num_sampled_points, 3 + feature_len)) pooled_empty_flag = point_features.new_zeros( (batch_size, boxes_num)).int() ext_module.roipoint_pool3d_forward(points.contiguous(), pooled_boxes3d.contiguous(), point_features.contiguous(), pooled_features, pooled_empty_flag) return pooled_features, pooled_empty_flag @staticmethod def backward(ctx, grad_out): raise NotImplementedError ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/rotated_feature_align.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch from torch.autograd import Function from torch.autograd.function import once_differentiable from ..utils import ext_loader ext_module = ext_loader.load_ext( '_ext', ['rotated_feature_align_forward', 'rotated_feature_align_backward']) class RotatedFeatureAlignFunction(Function): """Using the feature interpolation to obtain the position information correspond to the refined rotate anchors and reconstruct the feature maps in pixel-wise manner to achieve feature alignment. The details are described in the paper `R3Det: Refined Single-Stage Detector with Feature Refinement for Rotating Object `_. """ @staticmethod def forward(ctx, features, best_rbboxes, spatial_scale, points): """ Args: features (torch.Tensor): Input features with shape [N,C,H,W]. best_rbboxes (torch.Tensor): Refined rotate anchors with shape [N,H,W,5]. Coordinate format (cx,cx,h,w,a). spatial_scale (float): The scale of feature map size and input image size. points (int, optional): The number of sample points. Only 1 and 5 are supported. Defaults to 1. Returns: torch.Tensor: Refined features with shape [N,C,H,W]. """ ctx.spatial_scale = spatial_scale ctx.points = points ctx.save_for_backward(best_rbboxes) assert points in [1, 5] output = torch.zeros_like(features) ext_module.rotated_feature_align_forward( features, best_rbboxes, output, spatial_scale=spatial_scale, points=points) return output @staticmethod @once_differentiable def backward(ctx, grad_output): """ Args: grad_output (torch.Tensor): The gradiant of output features with shape [N,C,H,W]. Returns: torch.Tensor: The gradiant of input features with shape [N,C,H,W]. """ best_rbboxes = ctx.saved_tensors[0] points = ctx.points spatial_scale = ctx.spatial_scale grad_input = None if ctx.needs_input_grad[0]: grad_input = torch.zeros_like(grad_output) ext_module.rotated_feature_align_backward( grad_output.contiguous(), best_rbboxes, grad_input, spatial_scale=spatial_scale, points=points) return grad_input, None, None, None def rotated_feature_align(features, best_rbboxes, spatial_scale=1 / 8, points=1): return RotatedFeatureAlignFunction.apply(features, best_rbboxes, spatial_scale, points) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/saconv.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch import torch.nn as nn import torch.nn.functional as F from mmcv.cnn import CONV_LAYERS, ConvAWS2d, constant_init from mmcv.ops.deform_conv import deform_conv2d from mmcv.utils import TORCH_VERSION, digit_version @CONV_LAYERS.register_module(name='SAC') class SAConv2d(ConvAWS2d): """SAC (Switchable Atrous Convolution) This is an implementation of `DetectoRS: Detecting Objects with Recursive Feature Pyramid and Switchable Atrous Convolution `_. Args: in_channels (int): Number of channels in the input image out_channels (int): Number of channels produced by the convolution kernel_size (int or tuple): Size of the convolving kernel stride (int or tuple, optional): Stride of the convolution. Default: 1 padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0 padding_mode (string, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'`` dilation (int or tuple, optional): Spacing between kernel elements. Default: 1 groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True`` use_deform: If ``True``, replace convolution with deformable convolution. Default: ``False``. """ def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, use_deform=False): super().__init__( in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias) self.use_deform = use_deform self.switch = nn.Conv2d( self.in_channels, 1, kernel_size=1, stride=stride, bias=True) self.weight_diff = nn.Parameter(torch.Tensor(self.weight.size())) self.pre_context = nn.Conv2d( self.in_channels, self.in_channels, kernel_size=1, bias=True) self.post_context = nn.Conv2d( self.out_channels, self.out_channels, kernel_size=1, bias=True) if self.use_deform: self.offset_s = nn.Conv2d( self.in_channels, 18, kernel_size=3, padding=1, stride=stride, bias=True) self.offset_l = nn.Conv2d( self.in_channels, 18, kernel_size=3, padding=1, stride=stride, bias=True) self.init_weights() def init_weights(self): constant_init(self.switch, 0, bias=1) self.weight_diff.data.zero_() constant_init(self.pre_context, 0) constant_init(self.post_context, 0) if self.use_deform: constant_init(self.offset_s, 0) constant_init(self.offset_l, 0) def forward(self, x): # pre-context avg_x = F.adaptive_avg_pool2d(x, output_size=1) avg_x = self.pre_context(avg_x) avg_x = avg_x.expand_as(x) x = x + avg_x # switch avg_x = F.pad(x, pad=(2, 2, 2, 2), mode='reflect') avg_x = F.avg_pool2d(avg_x, kernel_size=5, stride=1, padding=0) switch = self.switch(avg_x) # sac weight = self._get_weight(self.weight) zero_bias = torch.zeros( self.out_channels, device=weight.device, dtype=weight.dtype) if self.use_deform: offset = self.offset_s(avg_x) out_s = deform_conv2d(x, offset, weight, self.stride, self.padding, self.dilation, self.groups, 1) else: if (TORCH_VERSION == 'parrots' or digit_version(TORCH_VERSION) < digit_version('1.5.0')): out_s = super().conv2d_forward(x, weight) elif digit_version(TORCH_VERSION) >= digit_version('1.8.0'): # bias is a required argument of _conv_forward in torch 1.8.0 out_s = super()._conv_forward(x, weight, zero_bias) else: out_s = super()._conv_forward(x, weight) ori_p = self.padding ori_d = self.dilation self.padding = tuple(3 * p for p in self.padding) self.dilation = tuple(3 * d for d in self.dilation) weight = weight + self.weight_diff if self.use_deform: offset = self.offset_l(avg_x) out_l = deform_conv2d(x, offset, weight, self.stride, self.padding, self.dilation, self.groups, 1) else: if (TORCH_VERSION == 'parrots' or digit_version(TORCH_VERSION) < digit_version('1.5.0')): out_l = super().conv2d_forward(x, weight) elif digit_version(TORCH_VERSION) >= digit_version('1.8.0'): # bias is a required argument of _conv_forward in torch 1.8.0 out_l = super()._conv_forward(x, weight, zero_bias) else: out_l = super()._conv_forward(x, weight) out = switch * out_s + (1 - switch) * out_l self.padding = ori_p self.dilation = ori_d # post-context avg_x = F.adaptive_avg_pool2d(out, output_size=1) avg_x = self.post_context(avg_x) avg_x = avg_x.expand_as(out) out = out + avg_x return out ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/scatter_points.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch import torch.nn.functional as F from torch import nn from torch.autograd import Function from ..utils import ext_loader ext_module = ext_loader.load_ext( '_ext', ['dynamic_point_to_voxel_forward', 'dynamic_point_to_voxel_backward']) class _DynamicScatter(Function): @staticmethod def forward(ctx, feats, coors, reduce_type='max'): """convert kitti points(N, >=3) to voxels. Args: feats (torch.Tensor): [N, C]. Points features to be reduced into voxels. coors (torch.Tensor): [N, ndim]. Corresponding voxel coordinates (specifically multi-dim voxel index) of each points. reduce_type (str, optional): Reduce op. support 'max', 'sum' and 'mean'. Default: 'max'. Returns: tuple[torch.Tensor]: A tuple contains two elements. The first one is the voxel features with shape [M, C] which are respectively reduced from input features that share the same voxel coordinates. The second is voxel coordinates with shape [M, ndim]. """ results = ext_module.dynamic_point_to_voxel_forward( feats, coors, reduce_type) (voxel_feats, voxel_coors, point2voxel_map, voxel_points_count) = results ctx.reduce_type = reduce_type ctx.save_for_backward(feats, voxel_feats, point2voxel_map, voxel_points_count) ctx.mark_non_differentiable(voxel_coors) return voxel_feats, voxel_coors @staticmethod def backward(ctx, grad_voxel_feats, grad_voxel_coors=None): (feats, voxel_feats, point2voxel_map, voxel_points_count) = ctx.saved_tensors grad_feats = torch.zeros_like(feats) # TODO: whether to use index put or use cuda_backward # To use index put, need point to voxel index ext_module.dynamic_point_to_voxel_backward( grad_feats, grad_voxel_feats.contiguous(), feats, voxel_feats, point2voxel_map, voxel_points_count, ctx.reduce_type) return grad_feats, None, None dynamic_scatter = _DynamicScatter.apply class DynamicScatter(nn.Module): """Scatters points into voxels, used in the voxel encoder with dynamic voxelization. Note: The CPU and GPU implementation get the same output, but have numerical difference after summation and division (e.g., 5e-7). Args: voxel_size (list): list [x, y, z] size of three dimension. point_cloud_range (list): The coordinate range of points, [x_min, y_min, z_min, x_max, y_max, z_max]. average_points (bool): whether to use avg pooling to scatter points into voxel. """ def __init__(self, voxel_size, point_cloud_range, average_points: bool): super().__init__() self.voxel_size = voxel_size self.point_cloud_range = point_cloud_range self.average_points = average_points def forward_single(self, points, coors): """Scatters points into voxels. Args: points (torch.Tensor): Points to be reduced into voxels. coors (torch.Tensor): Corresponding voxel coordinates (specifically multi-dim voxel index) of each points. Returns: tuple[torch.Tensor]: A tuple contains two elements. The first one is the voxel features with shape [M, C] which are respectively reduced from input features that share the same voxel coordinates. The second is voxel coordinates with shape [M, ndim]. """ reduce = 'mean' if self.average_points else 'max' return dynamic_scatter(points.contiguous(), coors.contiguous(), reduce) def forward(self, points, coors): """Scatters points/features into voxels. Args: points (torch.Tensor): Points to be reduced into voxels. coors (torch.Tensor): Corresponding voxel coordinates (specifically multi-dim voxel index) of each points. Returns: tuple[torch.Tensor]: A tuple contains two elements. The first one is the voxel features with shape [M, C] which are respectively reduced from input features that share the same voxel coordinates. The second is voxel coordinates with shape [M, ndim]. """ if coors.size(-1) == 3: return self.forward_single(points, coors) else: batch_size = coors[-1, 0] + 1 voxels, voxel_coors = [], [] for i in range(batch_size): inds = torch.where(coors[:, 0] == i) voxel, voxel_coor = self.forward_single( points[inds], coors[inds][:, 1:]) coor_pad = F.pad(voxel_coor, (1, 0), mode='constant', value=i) voxel_coors.append(coor_pad) voxels.append(voxel) features = torch.cat(voxels, dim=0) feature_coors = torch.cat(voxel_coors, dim=0) return features, feature_coors def __repr__(self): s = self.__class__.__name__ + '(' s += 'voxel_size=' + str(self.voxel_size) s += ', point_cloud_range=' + str(self.point_cloud_range) s += ', average_points=' + str(self.average_points) s += ')' return s ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/sync_bn.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch import torch.distributed as dist import torch.nn.functional as F from torch.autograd import Function from torch.autograd.function import once_differentiable from torch.nn.modules.module import Module from torch.nn.parameter import Parameter from mmcv.cnn import NORM_LAYERS from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', [ 'sync_bn_forward_mean', 'sync_bn_forward_var', 'sync_bn_forward_output', 'sync_bn_backward_param', 'sync_bn_backward_data' ]) class SyncBatchNormFunction(Function): @staticmethod def symbolic(g, input, running_mean, running_var, weight, bias, momentum, eps, group, group_size, stats_mode): return g.op( 'mmcv::MMCVSyncBatchNorm', input, running_mean, running_var, weight, bias, momentum_f=momentum, eps_f=eps, group_i=group, group_size_i=group_size, stats_mode=stats_mode) @staticmethod def forward(self, input, running_mean, running_var, weight, bias, momentum, eps, group, group_size, stats_mode): self.momentum = momentum self.eps = eps self.group = group self.group_size = group_size self.stats_mode = stats_mode assert isinstance( input, (torch.HalfTensor, torch.FloatTensor, torch.cuda.HalfTensor, torch.cuda.FloatTensor)), \ f'only support Half or Float Tensor, but {input.type()}' output = torch.zeros_like(input) input3d = input.flatten(start_dim=2) output3d = output.view_as(input3d) num_channels = input3d.size(1) # ensure mean/var/norm/std are initialized as zeros # ``torch.empty()`` does not guarantee that mean = torch.zeros( num_channels, dtype=torch.float, device=input3d.device) var = torch.zeros( num_channels, dtype=torch.float, device=input3d.device) norm = torch.zeros_like( input3d, dtype=torch.float, device=input3d.device) std = torch.zeros( num_channels, dtype=torch.float, device=input3d.device) batch_size = input3d.size(0) if batch_size > 0: ext_module.sync_bn_forward_mean(input3d, mean) batch_flag = torch.ones([1], device=mean.device, dtype=mean.dtype) else: # skip updating mean and leave it as zeros when the input is empty batch_flag = torch.zeros([1], device=mean.device, dtype=mean.dtype) # synchronize mean and the batch flag vec = torch.cat([mean, batch_flag]) if self.stats_mode == 'N': vec *= batch_size if self.group_size > 1: dist.all_reduce(vec, group=self.group) total_batch = vec[-1].detach() mean = vec[:num_channels] if self.stats_mode == 'default': mean = mean / self.group_size elif self.stats_mode == 'N': mean = mean / total_batch.clamp(min=1) else: raise NotImplementedError # leave var as zeros when the input is empty if batch_size > 0: ext_module.sync_bn_forward_var(input3d, mean, var) if self.stats_mode == 'N': var *= batch_size if self.group_size > 1: dist.all_reduce(var, group=self.group) if self.stats_mode == 'default': var /= self.group_size elif self.stats_mode == 'N': var /= total_batch.clamp(min=1) else: raise NotImplementedError # if the total batch size over all the ranks is zero, # we should not update the statistics in the current batch update_flag = total_batch.clamp(max=1) momentum = update_flag * self.momentum ext_module.sync_bn_forward_output( input3d, mean, var, weight, bias, running_mean, running_var, norm, std, output3d, eps=self.eps, momentum=momentum, group_size=self.group_size) self.save_for_backward(norm, std, weight) return output @staticmethod @once_differentiable def backward(self, grad_output): norm, std, weight = self.saved_tensors grad_weight = torch.zeros_like(weight) grad_bias = torch.zeros_like(weight) grad_input = torch.zeros_like(grad_output) grad_output3d = grad_output.flatten(start_dim=2) grad_input3d = grad_input.view_as(grad_output3d) batch_size = grad_input3d.size(0) if batch_size > 0: ext_module.sync_bn_backward_param(grad_output3d, norm, grad_weight, grad_bias) # all reduce if self.group_size > 1: dist.all_reduce(grad_weight, group=self.group) dist.all_reduce(grad_bias, group=self.group) grad_weight /= self.group_size grad_bias /= self.group_size if batch_size > 0: ext_module.sync_bn_backward_data(grad_output3d, weight, grad_weight, grad_bias, norm, std, grad_input3d) return grad_input, None, None, grad_weight, grad_bias, \ None, None, None, None, None @NORM_LAYERS.register_module(name='MMSyncBN') class SyncBatchNorm(Module): """Synchronized Batch Normalization. Args: num_features (int): number of features/chennels in input tensor eps (float, optional): a value added to the denominator for numerical stability. Defaults to 1e-5. momentum (float, optional): the value used for the running_mean and running_var computation. Defaults to 0.1. affine (bool, optional): whether to use learnable affine parameters. Defaults to True. track_running_stats (bool, optional): whether to track the running mean and variance during training. When set to False, this module does not track such statistics, and initializes statistics buffers ``running_mean`` and ``running_var`` as ``None``. When these buffers are ``None``, this module always uses batch statistics in both training and eval modes. Defaults to True. group (int, optional): synchronization of stats happen within each process group individually. By default it is synchronization across the whole world. Defaults to None. stats_mode (str, optional): The statistical mode. Available options includes ``'default'`` and ``'N'``. Defaults to 'default'. When ``stats_mode=='default'``, it computes the overall statistics using those from each worker with equal weight, i.e., the statistics are synchronized and simply divied by ``group``. This mode will produce inaccurate statistics when empty tensors occur. When ``stats_mode=='N'``, it compute the overall statistics using the total number of batches in each worker ignoring the number of group, i.e., the statistics are synchronized and then divied by the total batch ``N``. This mode is beneficial when empty tensors occur during training, as it average the total mean by the real number of batch. """ def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True, group=None, stats_mode='default'): super(SyncBatchNorm, self).__init__() self.num_features = num_features self.eps = eps self.momentum = momentum self.affine = affine self.track_running_stats = track_running_stats group = dist.group.WORLD if group is None else group self.group = group self.group_size = dist.get_world_size(group) assert stats_mode in ['default', 'N'], \ f'"stats_mode" only accepts "default" and "N", got "{stats_mode}"' self.stats_mode = stats_mode if self.affine: self.weight = Parameter(torch.Tensor(num_features)) self.bias = Parameter(torch.Tensor(num_features)) else: self.register_parameter('weight', None) self.register_parameter('bias', None) if self.track_running_stats: self.register_buffer('running_mean', torch.zeros(num_features)) self.register_buffer('running_var', torch.ones(num_features)) self.register_buffer('num_batches_tracked', torch.tensor(0, dtype=torch.long)) else: self.register_buffer('running_mean', None) self.register_buffer('running_var', None) self.register_buffer('num_batches_tracked', None) self.reset_parameters() def reset_running_stats(self): if self.track_running_stats: self.running_mean.zero_() self.running_var.fill_(1) self.num_batches_tracked.zero_() def reset_parameters(self): self.reset_running_stats() if self.affine: self.weight.data.uniform_() # pytorch use ones_() self.bias.data.zero_() def forward(self, input): if input.dim() < 2: raise ValueError( f'expected at least 2D input, got {input.dim()}D input') if self.momentum is None: exponential_average_factor = 0.0 else: exponential_average_factor = self.momentum if self.training and self.track_running_stats: if self.num_batches_tracked is not None: self.num_batches_tracked += 1 if self.momentum is None: # use cumulative moving average exponential_average_factor = 1.0 / float( self.num_batches_tracked) else: # use exponential moving average exponential_average_factor = self.momentum if self.training or not self.track_running_stats: return SyncBatchNormFunction.apply( input, self.running_mean, self.running_var, self.weight, self.bias, exponential_average_factor, self.eps, self.group, self.group_size, self.stats_mode) else: return F.batch_norm(input, self.running_mean, self.running_var, self.weight, self.bias, False, exponential_average_factor, self.eps) def __repr__(self): s = self.__class__.__name__ s += f'({self.num_features}, ' s += f'eps={self.eps}, ' s += f'momentum={self.momentum}, ' s += f'affine={self.affine}, ' s += f'track_running_stats={self.track_running_stats}, ' s += f'group_size={self.group_size},' s += f'stats_mode={self.stats_mode})' return s ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/three_interpolate.py ================================================ from typing import Tuple import torch from torch.autograd import Function from ..utils import ext_loader ext_module = ext_loader.load_ext( '_ext', ['three_interpolate_forward', 'three_interpolate_backward']) class ThreeInterpolate(Function): """Performs weighted linear interpolation on 3 features. Please refer to `Paper of PointNet++ `_ for more details. """ @staticmethod def forward(ctx, features: torch.Tensor, indices: torch.Tensor, weight: torch.Tensor) -> torch.Tensor: """ Args: features (torch.Tensor): (B, C, M) Features descriptors to be interpolated. indices (torch.Tensor): (B, n, 3) indices of three nearest neighbor features for the target features. weight (torch.Tensor): (B, n, 3) weights of three nearest neighbor features for the target features. Returns: torch.Tensor: (B, C, N) tensor of the interpolated features """ assert features.is_contiguous() assert indices.is_contiguous() assert weight.is_contiguous() B, c, m = features.size() n = indices.size(1) ctx.three_interpolate_for_backward = (indices, weight, m) output = torch.cuda.FloatTensor(B, c, n) ext_module.three_interpolate_forward( features, indices, weight, output, b=B, c=c, m=m, n=n) return output @staticmethod def backward( ctx, grad_out: torch.Tensor ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """ Args: grad_out (torch.Tensor): (B, C, N) tensor with gradients of outputs Returns: torch.Tensor: (B, C, M) tensor with gradients of features """ idx, weight, m = ctx.three_interpolate_for_backward B, c, n = grad_out.size() grad_features = torch.cuda.FloatTensor(B, c, m).zero_() grad_out_data = grad_out.data.contiguous() ext_module.three_interpolate_backward( grad_out_data, idx, weight, grad_features.data, b=B, c=c, n=n, m=m) return grad_features, None, None three_interpolate = ThreeInterpolate.apply ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/three_nn.py ================================================ from typing import Tuple import torch from torch.autograd import Function from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', ['three_nn_forward']) class ThreeNN(Function): """Find the top-3 nearest neighbors of the target set from the source set. Please refer to `Paper of PointNet++ `_ for more details. """ @staticmethod def forward(ctx, target: torch.Tensor, source: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: """ Args: target (torch.Tensor): shape (B, N, 3), points set that needs to find the nearest neighbors. source (torch.Tensor): shape (B, M, 3), points set that is used to find the nearest neighbors of points in target set. Returns: torch.Tensor: shape (B, N, 3), L2 distance of each point in target set to their corresponding top three nearest neighbors. """ target = target.contiguous() source = source.contiguous() B, N, _ = target.size() m = source.size(1) dist2 = torch.cuda.FloatTensor(B, N, 3) idx = torch.cuda.IntTensor(B, N, 3) ext_module.three_nn_forward(target, source, dist2, idx, b=B, n=N, m=m) if torch.__version__ != 'parrots': ctx.mark_non_differentiable(idx) return torch.sqrt(dist2), idx @staticmethod def backward(ctx, a=None, b=None): return None, None three_nn = ThreeNN.apply ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/tin_shift.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. # Code reference from "Temporal Interlacing Network" # https://github.com/deepcs233/TIN/blob/master/cuda_shift/rtc_wrap.py # Hao Shao, Shengju Qian, Yu Liu # shaoh19@mails.tsinghua.edu.cn, sjqian@cse.cuhk.edu.hk, yuliu@ee.cuhk.edu.hk import torch import torch.nn as nn from torch.autograd import Function from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', ['tin_shift_forward', 'tin_shift_backward']) class TINShiftFunction(Function): @staticmethod def forward(ctx, input, shift): C = input.size(2) num_segments = shift.size(1) if C // num_segments <= 0 or C % num_segments != 0: raise ValueError('C should be a multiple of num_segments, ' f'but got C={C} and num_segments={num_segments}.') ctx.save_for_backward(shift) out = torch.zeros_like(input) ext_module.tin_shift_forward(input, shift, out) return out @staticmethod def backward(ctx, grad_output): shift = ctx.saved_tensors[0] data_grad_input = grad_output.new(*grad_output.size()).zero_() shift_grad_input = shift.new(*shift.size()).zero_() ext_module.tin_shift_backward(grad_output, shift, data_grad_input) return data_grad_input, shift_grad_input tin_shift = TINShiftFunction.apply class TINShift(nn.Module): """Temporal Interlace Shift. Temporal Interlace shift is a differentiable temporal-wise frame shifting which is proposed in "Temporal Interlacing Network" Please refer to `Temporal Interlacing Network `_ for more details. Code is modified from https://github.com/mit-han-lab/temporal-shift-module """ def forward(self, input, shift): """Perform temporal interlace shift. Args: input (torch.Tensor): Feature map with shape [N, num_segments, C, H * W]. shift (torch.Tensor): Shift tensor with shape [N, num_segments]. Returns: Feature map after temporal interlace shift. """ return tin_shift(input, shift) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/upfirdn2d.py ================================================ # modified from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/upfirdn2d.py # noqa:E501 # Copyright (c) 2021, NVIDIA Corporation. All rights reserved. # NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator # Augmentation (ADA) # ======================================================================= # 1. Definitions # "Licensor" means any person or entity that distributes its Work. # "Software" means the original work of authorship made available under # this License. # "Work" means the Software and any additions to or derivative works of # the Software that are made available under this License. # The terms "reproduce," "reproduction," "derivative works," and # "distribution" have the meaning as provided under U.S. copyright law; # provided, however, that for the purposes of this License, derivative # works shall not include works that remain separable from, or merely # link (or bind by name) to the interfaces of, the Work. # Works, including the Software, are "made available" under this License # by including in or with the Work either (a) a copyright notice # referencing the applicability of this License to the Work, or (b) a # copy of this License. # 2. License Grants # 2.1 Copyright Grant. Subject to the terms and conditions of this # License, each Licensor grants to you a perpetual, worldwide, # non-exclusive, royalty-free, copyright license to reproduce, # prepare derivative works of, publicly display, publicly perform, # sublicense and distribute its Work and any resulting derivative # works in any form. # 3. Limitations # 3.1 Redistribution. You may reproduce or distribute the Work only # if (a) you do so under this License, (b) you include a complete # copy of this License with your distribution, and (c) you retain # without modification any copyright, patent, trademark, or # attribution notices that are present in the Work. # 3.2 Derivative Works. You may specify that additional or different # terms apply to the use, reproduction, and distribution of your # derivative works of the Work ("Your Terms") only if (a) Your Terms # provide that the use limitation in Section 3.3 applies to your # derivative works, and (b) you identify the specific derivative # works that are subject to Your Terms. Notwithstanding Your Terms, # this License (including the redistribution requirements in Section # 3.1) will continue to apply to the Work itself. # 3.3 Use Limitation. The Work and any derivative works thereof only # may be used or intended for use non-commercially. Notwithstanding # the foregoing, NVIDIA and its affiliates may use the Work and any # derivative works commercially. As used herein, "non-commercially" # means for research or evaluation purposes only. # 3.4 Patent Claims. If you bring or threaten to bring a patent claim # against any Licensor (including any claim, cross-claim or # counterclaim in a lawsuit) to enforce any patents that you allege # are infringed by any Work, then your rights under this License from # such Licensor (including the grant in Section 2.1) will terminate # immediately. # 3.5 Trademarks. This License does not grant any rights to use any # Licensor’s or its affiliates’ names, logos, or trademarks, except # as necessary to reproduce the notices described in this License. # 3.6 Termination. If you violate any term of this License, then your # rights under this License (including the grant in Section 2.1) will # terminate immediately. # 4. Disclaimer of Warranty. # THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR # NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER # THIS LICENSE. # 5. Limitation of Liability. # EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL # THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE # SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, # INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF # OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK # (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, # LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER # COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF # THE POSSIBILITY OF SUCH DAMAGES. # ======================================================================= import torch from torch.autograd import Function from torch.nn import functional as F from mmcv.utils import to_2tuple from ..utils import ext_loader upfirdn2d_ext = ext_loader.load_ext('_ext', ['upfirdn2d']) class UpFirDn2dBackward(Function): @staticmethod def forward(ctx, grad_output, kernel, grad_kernel, up, down, pad, g_pad, in_size, out_size): up_x, up_y = up down_x, down_y = down g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1 = g_pad grad_output = grad_output.reshape(-1, out_size[0], out_size[1], 1) grad_input = upfirdn2d_ext.upfirdn2d( grad_output, grad_kernel, up_x=down_x, up_y=down_y, down_x=up_x, down_y=up_y, pad_x0=g_pad_x0, pad_x1=g_pad_x1, pad_y0=g_pad_y0, pad_y1=g_pad_y1) grad_input = grad_input.view(in_size[0], in_size[1], in_size[2], in_size[3]) ctx.save_for_backward(kernel) pad_x0, pad_x1, pad_y0, pad_y1 = pad ctx.up_x = up_x ctx.up_y = up_y ctx.down_x = down_x ctx.down_y = down_y ctx.pad_x0 = pad_x0 ctx.pad_x1 = pad_x1 ctx.pad_y0 = pad_y0 ctx.pad_y1 = pad_y1 ctx.in_size = in_size ctx.out_size = out_size return grad_input @staticmethod def backward(ctx, gradgrad_input): kernel, = ctx.saved_tensors gradgrad_input = gradgrad_input.reshape(-1, ctx.in_size[2], ctx.in_size[3], 1) gradgrad_out = upfirdn2d_ext.upfirdn2d( gradgrad_input, kernel, up_x=ctx.up_x, up_y=ctx.up_y, down_x=ctx.down_x, down_y=ctx.down_y, pad_x0=ctx.pad_x0, pad_x1=ctx.pad_x1, pad_y0=ctx.pad_y0, pad_y1=ctx.pad_y1) # gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.out_size[0], # ctx.out_size[1], ctx.in_size[3]) gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.in_size[1], ctx.out_size[0], ctx.out_size[1]) return gradgrad_out, None, None, None, None, None, None, None, None class UpFirDn2d(Function): @staticmethod def forward(ctx, input, kernel, up, down, pad): up_x, up_y = up down_x, down_y = down pad_x0, pad_x1, pad_y0, pad_y1 = pad kernel_h, kernel_w = kernel.shape batch, channel, in_h, in_w = input.shape ctx.in_size = input.shape input = input.reshape(-1, in_h, in_w, 1) ctx.save_for_backward(kernel, torch.flip(kernel, [0, 1])) out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1 out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1 ctx.out_size = (out_h, out_w) ctx.up = (up_x, up_y) ctx.down = (down_x, down_y) ctx.pad = (pad_x0, pad_x1, pad_y0, pad_y1) g_pad_x0 = kernel_w - pad_x0 - 1 g_pad_y0 = kernel_h - pad_y0 - 1 g_pad_x1 = in_w * up_x - out_w * down_x + pad_x0 - up_x + 1 g_pad_y1 = in_h * up_y - out_h * down_y + pad_y0 - up_y + 1 ctx.g_pad = (g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1) out = upfirdn2d_ext.upfirdn2d( input, kernel, up_x=up_x, up_y=up_y, down_x=down_x, down_y=down_y, pad_x0=pad_x0, pad_x1=pad_x1, pad_y0=pad_y0, pad_y1=pad_y1) # out = out.view(major, out_h, out_w, minor) out = out.view(-1, channel, out_h, out_w) return out @staticmethod def backward(ctx, grad_output): kernel, grad_kernel = ctx.saved_tensors grad_input = UpFirDn2dBackward.apply( grad_output, kernel, grad_kernel, ctx.up, ctx.down, ctx.pad, ctx.g_pad, ctx.in_size, ctx.out_size, ) return grad_input, None, None, None, None def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)): """UpFRIDn for 2d features. UpFIRDn is short for upsample, apply FIR filter and downsample. More details can be found in: https://www.mathworks.com/help/signal/ref/upfirdn.html Args: input (torch.Tensor): Tensor with shape of (n, c, h, w). kernel (torch.Tensor): Filter kernel. up (int | tuple[int], optional): Upsampling factor. If given a number, we will use this factor for the both height and width side. Defaults to 1. down (int | tuple[int], optional): Downsampling factor. If given a number, we will use this factor for the both height and width side. Defaults to 1. pad (tuple[int], optional): Padding for tensors, (x_pad, y_pad) or (x_pad_0, x_pad_1, y_pad_0, y_pad_1). Defaults to (0, 0). Returns: torch.Tensor: Tensor after UpFIRDn. """ if input.device.type == 'cpu': if len(pad) == 2: pad = (pad[0], pad[1], pad[0], pad[1]) up = to_2tuple(up) down = to_2tuple(down) out = upfirdn2d_native(input, kernel, up[0], up[1], down[0], down[1], pad[0], pad[1], pad[2], pad[3]) else: _up = to_2tuple(up) _down = to_2tuple(down) if len(pad) == 4: _pad = pad elif len(pad) == 2: _pad = (pad[0], pad[1], pad[0], pad[1]) out = UpFirDn2d.apply(input, kernel, _up, _down, _pad) return out def upfirdn2d_native(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1): _, channel, in_h, in_w = input.shape input = input.reshape(-1, in_h, in_w, 1) _, in_h, in_w, minor = input.shape kernel_h, kernel_w = kernel.shape out = input.view(-1, in_h, 1, in_w, 1, minor) out = F.pad(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1]) out = out.view(-1, in_h * up_y, in_w * up_x, minor) out = F.pad( out, [0, 0, max(pad_x0, 0), max(pad_x1, 0), max(pad_y0, 0), max(pad_y1, 0)]) out = out[:, max(-pad_y0, 0):out.shape[1] - max(-pad_y1, 0), max(-pad_x0, 0):out.shape[2] - max(-pad_x1, 0), :, ] out = out.permute(0, 3, 1, 2) out = out.reshape( [-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1]) w = torch.flip(kernel, [0, 1]).view(1, 1, kernel_h, kernel_w) out = F.conv2d(out, w) out = out.reshape( -1, minor, in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1, in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1, ) out = out.permute(0, 2, 3, 1) out = out[:, ::down_y, ::down_x, :] out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1 out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1 return out.view(-1, channel, out_h, out_w) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/voxelize.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch from torch import nn from torch.autograd import Function from torch.nn.modules.utils import _pair from ..utils import ext_loader ext_module = ext_loader.load_ext( '_ext', ['dynamic_voxelize_forward', 'hard_voxelize_forward']) class _Voxelization(Function): @staticmethod def forward(ctx, points, voxel_size, coors_range, max_points=35, max_voxels=20000): """Convert kitti points(N, >=3) to voxels. Args: points (torch.Tensor): [N, ndim]. Points[:, :3] contain xyz points and points[:, 3:] contain other information like reflectivity. voxel_size (tuple or float): The size of voxel with the shape of [3]. coors_range (tuple or float): The coordinate range of voxel with the shape of [6]. max_points (int, optional): maximum points contained in a voxel. if max_points=-1, it means using dynamic_voxelize. Default: 35. max_voxels (int, optional): maximum voxels this function create. for second, 20000 is a good choice. Users should shuffle points before call this function because max_voxels may drop points. Default: 20000. Returns: tuple[torch.Tensor]: tuple[torch.Tensor]: A tuple contains three elements. The first one is the output voxels with the shape of [M, max_points, n_dim], which only contain points and returned when max_points != -1. The second is the voxel coordinates with shape of [M, 3]. The last is number of point per voxel with the shape of [M], which only returned when max_points != -1. """ if max_points == -1 or max_voxels == -1: coors = points.new_zeros(size=(points.size(0), 3), dtype=torch.int) ext_module.dynamic_voxelize_forward( points, torch.tensor(voxel_size, dtype=torch.float), torch.tensor(coors_range, dtype=torch.float), coors, NDim=3) return coors else: voxels = points.new_zeros( size=(max_voxels, max_points, points.size(1))) coors = points.new_zeros(size=(max_voxels, 3), dtype=torch.int) num_points_per_voxel = points.new_zeros( size=(max_voxels, ), dtype=torch.int) voxel_num = torch.zeros(size=(), dtype=torch.long) ext_module.hard_voxelize_forward( points, torch.tensor(voxel_size, dtype=torch.float), torch.tensor(coors_range, dtype=torch.float), voxels, coors, num_points_per_voxel, voxel_num, max_points=max_points, max_voxels=max_voxels, NDim=3) # select the valid voxels voxels_out = voxels[:voxel_num] coors_out = coors[:voxel_num] num_points_per_voxel_out = num_points_per_voxel[:voxel_num] return voxels_out, coors_out, num_points_per_voxel_out voxelization = _Voxelization.apply class Voxelization(nn.Module): """Convert kitti points(N, >=3) to voxels. Please refer to `Point-Voxel CNN for Efficient 3D Deep Learning `_ for more details. Args: voxel_size (tuple or float): The size of voxel with the shape of [3]. point_cloud_range (tuple or float): The coordinate range of voxel with the shape of [6]. max_num_points (int): maximum points contained in a voxel. if max_points=-1, it means using dynamic_voxelize. max_voxels (int, optional): maximum voxels this function create. for second, 20000 is a good choice. Users should shuffle points before call this function because max_voxels may drop points. Default: 20000. """ def __init__(self, voxel_size, point_cloud_range, max_num_points, max_voxels=20000): super().__init__() self.voxel_size = voxel_size self.point_cloud_range = point_cloud_range self.max_num_points = max_num_points if isinstance(max_voxels, tuple): self.max_voxels = max_voxels else: self.max_voxels = _pair(max_voxels) point_cloud_range = torch.tensor( point_cloud_range, dtype=torch.float32) voxel_size = torch.tensor(voxel_size, dtype=torch.float32) grid_size = (point_cloud_range[3:] - point_cloud_range[:3]) / voxel_size grid_size = torch.round(grid_size).long() input_feat_shape = grid_size[:2] self.grid_size = grid_size # the origin shape is as [x-len, y-len, z-len] # [w, h, d] -> [d, h, w] self.pcd_shape = [*input_feat_shape, 1][::-1] def forward(self, input): if self.training: max_voxels = self.max_voxels[0] else: max_voxels = self.max_voxels[1] return voxelization(input, self.voxel_size, self.point_cloud_range, self.max_num_points, max_voxels) def __repr__(self): s = self.__class__.__name__ + '(' s += 'voxel_size=' + str(self.voxel_size) s += ', point_cloud_range=' + str(self.point_cloud_range) s += ', max_num_points=' + str(self.max_num_points) s += ', max_voxels=' + str(self.max_voxels) s += ')' return s ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/parallel/__init__.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from .collate import collate from .data_container import DataContainer from .data_parallel import MMDataParallel from .distributed import MMDistributedDataParallel from .registry import MODULE_WRAPPERS from .scatter_gather import scatter, scatter_kwargs from .utils import is_module_wrapper __all__ = [ 'collate', 'DataContainer', 'MMDataParallel', 'MMDistributedDataParallel', 'scatter', 'scatter_kwargs', 'is_module_wrapper', 'MODULE_WRAPPERS' ] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/parallel/_functions.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch from torch.nn.parallel._functions import _get_stream def scatter(input, devices, streams=None): """Scatters tensor across multiple GPUs.""" if streams is None: streams = [None] * len(devices) if isinstance(input, list): chunk_size = (len(input) - 1) // len(devices) + 1 outputs = [ scatter(input[i], [devices[i // chunk_size]], [streams[i // chunk_size]]) for i in range(len(input)) ] return outputs elif isinstance(input, torch.Tensor): output = input.contiguous() # TODO: copy to a pinned buffer first (if copying from CPU) stream = streams[0] if output.numel() > 0 else None if devices != [-1]: with torch.cuda.device(devices[0]), torch.cuda.stream(stream): output = output.cuda(devices[0], non_blocking=True) return output else: raise Exception(f'Unknown type {type(input)}.') def synchronize_stream(output, devices, streams): if isinstance(output, list): chunk_size = len(output) // len(devices) for i in range(len(devices)): for j in range(chunk_size): synchronize_stream(output[i * chunk_size + j], [devices[i]], [streams[i]]) elif isinstance(output, torch.Tensor): if output.numel() != 0: with torch.cuda.device(devices[0]): main_stream = torch.cuda.current_stream() main_stream.wait_stream(streams[0]) output.record_stream(main_stream) else: raise Exception(f'Unknown type {type(output)}.') def get_input_device(input): if isinstance(input, list): for item in input: input_device = get_input_device(item) if input_device != -1: return input_device return -1 elif isinstance(input, torch.Tensor): return input.get_device() if input.is_cuda else -1 else: raise Exception(f'Unknown type {type(input)}.') class Scatter: @staticmethod def forward(target_gpus, input): input_device = get_input_device(input) streams = None if input_device == -1 and target_gpus != [-1]: # Perform CPU to GPU copies in a background stream streams = [_get_stream(device) for device in target_gpus] outputs = scatter(input, target_gpus, streams) # Synchronize with the copy stream if streams is not None: synchronize_stream(outputs, target_gpus, streams) return tuple(outputs) if isinstance(outputs, list) else (outputs, ) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/parallel/collate.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from collections.abc import Mapping, Sequence import torch import torch.nn.functional as F from torch.utils.data.dataloader import default_collate from .data_container import DataContainer def collate(batch, samples_per_gpu=1): """Puts each data field into a tensor/DataContainer with outer dimension batch size. Extend default_collate to add support for :type:`~mmcv.parallel.DataContainer`. There are 3 cases. 1. cpu_only = True, e.g., meta data 2. cpu_only = False, stack = True, e.g., images tensors 3. cpu_only = False, stack = False, e.g., gt bboxes """ if not isinstance(batch, Sequence): raise TypeError(f'{batch.dtype} is not supported.') if isinstance(batch[0], DataContainer): stacked = [] if batch[0].cpu_only: for i in range(0, len(batch), samples_per_gpu): stacked.append( [sample.data for sample in batch[i:i + samples_per_gpu]]) return DataContainer( stacked, batch[0].stack, batch[0].padding_value, cpu_only=True) elif batch[0].stack: for i in range(0, len(batch), samples_per_gpu): assert isinstance(batch[i].data, torch.Tensor) if batch[i].pad_dims is not None: ndim = batch[i].dim() assert ndim > batch[i].pad_dims max_shape = [0 for _ in range(batch[i].pad_dims)] for dim in range(1, batch[i].pad_dims + 1): max_shape[dim - 1] = batch[i].size(-dim) for sample in batch[i:i + samples_per_gpu]: for dim in range(0, ndim - batch[i].pad_dims): assert batch[i].size(dim) == sample.size(dim) for dim in range(1, batch[i].pad_dims + 1): max_shape[dim - 1] = max(max_shape[dim - 1], sample.size(-dim)) padded_samples = [] for sample in batch[i:i + samples_per_gpu]: pad = [0 for _ in range(batch[i].pad_dims * 2)] for dim in range(1, batch[i].pad_dims + 1): pad[2 * dim - 1] = max_shape[dim - 1] - sample.size(-dim) padded_samples.append( F.pad( sample.data, pad, value=sample.padding_value)) stacked.append(default_collate(padded_samples)) elif batch[i].pad_dims is None: stacked.append( default_collate([ sample.data for sample in batch[i:i + samples_per_gpu] ])) else: raise ValueError( 'pad_dims should be either None or integers (1-3)') else: for i in range(0, len(batch), samples_per_gpu): stacked.append( [sample.data for sample in batch[i:i + samples_per_gpu]]) return DataContainer(stacked, batch[0].stack, batch[0].padding_value) elif isinstance(batch[0], Sequence): transposed = zip(*batch) return [collate(samples, samples_per_gpu) for samples in transposed] elif isinstance(batch[0], Mapping): return { key: collate([d[key] for d in batch], samples_per_gpu) for key in batch[0] } else: return default_collate(batch) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/parallel/data_container.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import functools import torch def assert_tensor_type(func): @functools.wraps(func) def wrapper(*args, **kwargs): if not isinstance(args[0].data, torch.Tensor): raise AttributeError( f'{args[0].__class__.__name__} has no attribute ' f'{func.__name__} for type {args[0].datatype}') return func(*args, **kwargs) return wrapper class DataContainer: """A container for any type of objects. Typically tensors will be stacked in the collate function and sliced along some dimension in the scatter function. This behavior has some limitations. 1. All tensors have to be the same size. 2. Types are limited (numpy array or Tensor). We design `DataContainer` and `MMDataParallel` to overcome these limitations. The behavior can be either of the following. - copy to GPU, pad all tensors to the same size and stack them - copy to GPU without stacking - leave the objects as is and pass it to the model - pad_dims specifies the number of last few dimensions to do padding """ def __init__(self, data, stack=False, padding_value=0, cpu_only=False, pad_dims=2): self._data = data self._cpu_only = cpu_only self._stack = stack self._padding_value = padding_value assert pad_dims in [None, 1, 2, 3] self._pad_dims = pad_dims def __repr__(self): return f'{self.__class__.__name__}({repr(self.data)})' def __len__(self): return len(self._data) @property def data(self): return self._data @property def datatype(self): if isinstance(self.data, torch.Tensor): return self.data.type() else: return type(self.data) @property def cpu_only(self): return self._cpu_only @property def stack(self): return self._stack @property def padding_value(self): return self._padding_value @property def pad_dims(self): return self._pad_dims @assert_tensor_type def size(self, *args, **kwargs): return self.data.size(*args, **kwargs) @assert_tensor_type def dim(self): return self.data.dim() ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/parallel/data_parallel.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from itertools import chain from torch.nn.parallel import DataParallel from .scatter_gather import scatter_kwargs class MMDataParallel(DataParallel): """The DataParallel module that supports DataContainer. MMDataParallel has two main differences with PyTorch DataParallel: - It supports a custom type :class:`DataContainer` which allows more flexible control of input data during both GPU and CPU inference. - It implement two more APIs ``train_step()`` and ``val_step()``. .. warning:: MMDataParallel only supports single GPU training, if you need to train with multiple GPUs, please use MMDistributedDataParallel instead. If you have multiple GPUs and you just want to use MMDataParallel, you can set the environment variable ``CUDA_VISIBLE_DEVICES=0`` or instantiate ``MMDataParallel`` with ``device_ids=[0]``. Args: module (:class:`nn.Module`): Module to be encapsulated. device_ids (list[int]): Device IDS of modules to be scattered to. Defaults to None when GPU is not available. output_device (str | int): Device ID for output. Defaults to None. dim (int): Dimension used to scatter the data. Defaults to 0. """ def __init__(self, *args, dim=0, **kwargs): super(MMDataParallel, self).__init__(*args, dim=dim, **kwargs) self.dim = dim if isinstance(self.module, dict) and len(self.device_ids) == 1: for name, m in self.module.items(): self.module[name] = m.to(self.src_device_obj) def forward(self, *inputs, **kwargs): """Override the original forward function. The main difference lies in the CPU inference where the data in :class:`DataContainers` will still be gathered. """ if not self.device_ids: # We add the following line thus the module could gather and # convert data containers as those in GPU inference inputs, kwargs = self.scatter(inputs, kwargs, [-1]) return self.module(*inputs[0], **kwargs[0]) else: return super().forward(*inputs, **kwargs) def scatter(self, inputs, kwargs, device_ids): return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) def train_step(self, *inputs, **kwargs): if not self.device_ids: # We add the following line thus the module could gather and # convert data containers as those in GPU inference inputs, kwargs = self.scatter(inputs, kwargs, [-1]) return self.module.train_step(*inputs[0], **kwargs[0]) assert len(self.device_ids) == 1, \ ('MMDataParallel only supports single GPU training, if you need to' ' train with multiple GPUs, please use MMDistributedDataParallel' ' instead.') for t in chain(self.module.parameters(), self.module.buffers()): if t.device != self.src_device_obj: raise RuntimeError( 'module must have its parameters and buffers ' f'on device {self.src_device_obj} (device_ids[0]) but ' f'found one of them on device: {t.device}') inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) return self.module.train_step(*inputs[0], **kwargs[0]) def val_step(self, *inputs, **kwargs): if not self.device_ids: # We add the following line thus the module could gather and # convert data containers as those in GPU inference inputs, kwargs = self.scatter(inputs, kwargs, [-1]) return self.module.val_step(*inputs[0], **kwargs[0]) assert len(self.device_ids) == 1, \ ('MMDataParallel only supports single GPU training, if you need to' ' train with multiple GPUs, please use MMDistributedDataParallel' ' instead.') for t in chain(self.module.parameters(), self.module.buffers()): if t.device != self.src_device_obj: raise RuntimeError( 'module must have its parameters and buffers ' f'on device {self.src_device_obj} (device_ids[0]) but ' f'found one of them on device: {t.device}') inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) return self.module.val_step(*inputs[0], **kwargs[0]) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/parallel/distributed.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch from torch.nn.parallel.distributed import (DistributedDataParallel, _find_tensors) from mmcv import print_log from mmcv.utils import TORCH_VERSION, digit_version from .scatter_gather import scatter_kwargs class MMDistributedDataParallel(DistributedDataParallel): """The DDP module that supports DataContainer. MMDDP has two main differences with PyTorch DDP: - It supports a custom type :class:`DataContainer` which allows more flexible control of input data. - It implement two APIs ``train_step()`` and ``val_step()``. """ def to_kwargs(self, inputs, kwargs, device_id): # Use `self.to_kwargs` instead of `self.scatter` in pytorch1.8 # to move all tensors to device_id return scatter_kwargs(inputs, kwargs, [device_id], dim=self.dim) def scatter(self, inputs, kwargs, device_ids): return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) def train_step(self, *inputs, **kwargs): """train_step() API for module wrapped by DistributedDataParallel. This method is basically the same as ``DistributedDataParallel.forward()``, while replacing ``self.module.forward()`` with ``self.module.train_step()``. It is compatible with PyTorch 1.1 - 1.5. """ # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the # end of backward to the beginning of forward. if ('parrots' not in TORCH_VERSION and digit_version(TORCH_VERSION) >= digit_version('1.7') and self.reducer._rebuild_buckets()): print_log( 'Reducer buckets have been rebuilt in this iteration.', logger='mmcv') if getattr(self, 'require_forward_param_sync', True): self._sync_params() if self.device_ids: inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) if len(self.device_ids) == 1: output = self.module.train_step(*inputs[0], **kwargs[0]) else: outputs = self.parallel_apply( self._module_copies[:len(inputs)], inputs, kwargs) output = self.gather(outputs, self.output_device) else: output = self.module.train_step(*inputs, **kwargs) if torch.is_grad_enabled() and getattr( self, 'require_backward_grad_sync', True): if self.find_unused_parameters: self.reducer.prepare_for_backward(list(_find_tensors(output))) else: self.reducer.prepare_for_backward([]) else: if ('parrots' not in TORCH_VERSION and digit_version(TORCH_VERSION) > digit_version('1.2')): self.require_forward_param_sync = False return output def val_step(self, *inputs, **kwargs): """val_step() API for module wrapped by DistributedDataParallel. This method is basically the same as ``DistributedDataParallel.forward()``, while replacing ``self.module.forward()`` with ``self.module.val_step()``. It is compatible with PyTorch 1.1 - 1.5. """ # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the # end of backward to the beginning of forward. if ('parrots' not in TORCH_VERSION and digit_version(TORCH_VERSION) >= digit_version('1.7') and self.reducer._rebuild_buckets()): print_log( 'Reducer buckets have been rebuilt in this iteration.', logger='mmcv') if getattr(self, 'require_forward_param_sync', True): self._sync_params() if self.device_ids: inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) if len(self.device_ids) == 1: output = self.module.val_step(*inputs[0], **kwargs[0]) else: outputs = self.parallel_apply( self._module_copies[:len(inputs)], inputs, kwargs) output = self.gather(outputs, self.output_device) else: output = self.module.val_step(*inputs, **kwargs) if torch.is_grad_enabled() and getattr( self, 'require_backward_grad_sync', True): if self.find_unused_parameters: self.reducer.prepare_for_backward(list(_find_tensors(output))) else: self.reducer.prepare_for_backward([]) else: if ('parrots' not in TORCH_VERSION and digit_version(TORCH_VERSION) > digit_version('1.2')): self.require_forward_param_sync = False return output ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/parallel/distributed_deprecated.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch import torch.distributed as dist import torch.nn as nn from torch._utils import (_flatten_dense_tensors, _take_tensors, _unflatten_dense_tensors) from mmcv.utils import TORCH_VERSION, digit_version from .registry import MODULE_WRAPPERS from .scatter_gather import scatter_kwargs @MODULE_WRAPPERS.register_module() class MMDistributedDataParallel(nn.Module): def __init__(self, module, dim=0, broadcast_buffers=True, bucket_cap_mb=25): super(MMDistributedDataParallel, self).__init__() self.module = module self.dim = dim self.broadcast_buffers = broadcast_buffers self.broadcast_bucket_size = bucket_cap_mb * 1024 * 1024 self._sync_params() def _dist_broadcast_coalesced(self, tensors, buffer_size): for tensors in _take_tensors(tensors, buffer_size): flat_tensors = _flatten_dense_tensors(tensors) dist.broadcast(flat_tensors, 0) for tensor, synced in zip( tensors, _unflatten_dense_tensors(flat_tensors, tensors)): tensor.copy_(synced) def _sync_params(self): module_states = list(self.module.state_dict().values()) if len(module_states) > 0: self._dist_broadcast_coalesced(module_states, self.broadcast_bucket_size) if self.broadcast_buffers: if (TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) < digit_version('1.0')): buffers = [b.data for b in self.module._all_buffers()] else: buffers = [b.data for b in self.module.buffers()] if len(buffers) > 0: self._dist_broadcast_coalesced(buffers, self.broadcast_bucket_size) def scatter(self, inputs, kwargs, device_ids): return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) def forward(self, *inputs, **kwargs): inputs, kwargs = self.scatter(inputs, kwargs, [torch.cuda.current_device()]) return self.module(*inputs[0], **kwargs[0]) def train_step(self, *inputs, **kwargs): inputs, kwargs = self.scatter(inputs, kwargs, [torch.cuda.current_device()]) output = self.module.train_step(*inputs[0], **kwargs[0]) return output def val_step(self, *inputs, **kwargs): inputs, kwargs = self.scatter(inputs, kwargs, [torch.cuda.current_device()]) output = self.module.val_step(*inputs[0], **kwargs[0]) return output ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/parallel/registry.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from torch.nn.parallel import DataParallel, DistributedDataParallel from mmcv.utils import Registry MODULE_WRAPPERS = Registry('module wrapper') MODULE_WRAPPERS.register_module(module=DataParallel) MODULE_WRAPPERS.register_module(module=DistributedDataParallel) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/parallel/scatter_gather.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch from torch.nn.parallel._functions import Scatter as OrigScatter from ._functions import Scatter from .data_container import DataContainer def scatter(inputs, target_gpus, dim=0): """Scatter inputs to target gpus. The only difference from original :func:`scatter` is to add support for :type:`~mmcv.parallel.DataContainer`. """ def scatter_map(obj): if isinstance(obj, torch.Tensor): if target_gpus != [-1]: return OrigScatter.apply(target_gpus, None, dim, obj) else: # for CPU inference we use self-implemented scatter return Scatter.forward(target_gpus, obj) if isinstance(obj, DataContainer): if obj.cpu_only: return obj.data else: return Scatter.forward(target_gpus, obj.data) if isinstance(obj, tuple) and len(obj) > 0: return list(zip(*map(scatter_map, obj))) if isinstance(obj, list) and len(obj) > 0: out = list(map(list, zip(*map(scatter_map, obj)))) return out if isinstance(obj, dict) and len(obj) > 0: out = list(map(type(obj), zip(*map(scatter_map, obj.items())))) return out return [obj for targets in target_gpus] # After scatter_map is called, a scatter_map cell will exist. This cell # has a reference to the actual function scatter_map, which has references # to a closure that has a reference to the scatter_map cell (because the # fn is recursive). To avoid this reference cycle, we set the function to # None, clearing the cell try: return scatter_map(inputs) finally: scatter_map = None def scatter_kwargs(inputs, kwargs, target_gpus, dim=0): """Scatter with support for kwargs dictionary.""" inputs = scatter(inputs, target_gpus, dim) if inputs else [] kwargs = scatter(kwargs, target_gpus, dim) if kwargs else [] if len(inputs) < len(kwargs): inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) elif len(kwargs) < len(inputs): kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) inputs = tuple(inputs) kwargs = tuple(kwargs) return inputs, kwargs ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/parallel/utils.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from .registry import MODULE_WRAPPERS def is_module_wrapper(module): """Check if a module is a module wrapper. The following 3 modules in MMCV (and their subclasses) are regarded as module wrappers: DataParallel, DistributedDataParallel, MMDistributedDataParallel (the deprecated version). You may add you own module wrapper by registering it to mmcv.parallel.MODULE_WRAPPERS. Args: module (nn.Module): The module to be checked. Returns: bool: True if the input module is a module wrapper. """ module_wrappers = tuple(MODULE_WRAPPERS.module_dict.values()) return isinstance(module, module_wrappers) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/readme.md ================================================ test ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/__init__.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from .base_module import BaseModel, BaseModule, ModuleDict, ModuleList, Sequential, BaseBackbone, BaseNecks, BaseLosses, BaseNecksV2 from .base_runner import BaseRunner from .builder import RUNNERS, build_runner from .checkpoint import (CheckpointLoader, _load_checkpoint, _load_checkpoint_with_prefix, load_checkpoint, load_state_dict, save_checkpoint, weights_to_cpu) from .default_constructor import DefaultRunnerConstructor from .dist_utils import (allreduce_grads, allreduce_params, get_dist_info, init_dist, master_only) from .epoch_based_runner import EpochBasedRunner, Runner from .fp16_utils import LossScaler, auto_fp16, force_fp32, wrap_fp16_model from .hooks import (HOOKS, CheckpointHook, ClosureHook, DistEvalHook, DistSamplerSeedHook, DvcliveLoggerHook, EMAHook, EvalHook, Fp16OptimizerHook, GradientCumulativeFp16OptimizerHook, GradientCumulativeOptimizerHook, Hook, IterTimerHook, LoggerHook, MlflowLoggerHook, NeptuneLoggerHook, OptimizerHook, PaviLoggerHook, SyncBuffersHook, TensorboardLoggerHook, TextLoggerHook, WandbLoggerHook) from .hooks.lr_updater import StepLrUpdaterHook # noqa from .hooks.lr_updater import (CosineAnnealingLrUpdaterHook, CosineRestartLrUpdaterHook, CyclicLrUpdaterHook, ExpLrUpdaterHook, FixedLrUpdaterHook, FlatCosineAnnealingLrUpdaterHook, InvLrUpdaterHook, LrUpdaterHook, OneCycleLrUpdaterHook, PolyLrUpdaterHook) from .hooks.momentum_updater import (CosineAnnealingMomentumUpdaterHook, CyclicMomentumUpdaterHook, MomentumUpdaterHook, OneCycleMomentumUpdaterHook, StepMomentumUpdaterHook) from .iter_based_runner import IterBasedRunner, IterLoader from .log_buffer import LogBuffer from .optimizer import (OPTIMIZER_BUILDERS, OPTIMIZERS, DefaultOptimizerConstructor, build_optimizer, build_optimizer_constructor) from .priority import Priority, get_priority from .utils import get_host_info, get_time_str, obj_from_dict, set_random_seed from .record import MetricLogger from .hooks.nni_hook import NNIHook from .misc import find_latest_checkpoint __all__ = [ 'BaseRunner', 'Runner', 'EpochBasedRunner', 'IterBasedRunner', 'LogBuffer', 'HOOKS', 'Hook', 'CheckpointHook', 'ClosureHook', 'LrUpdaterHook', 'FixedLrUpdaterHook', 'StepLrUpdaterHook', 'ExpLrUpdaterHook', 'PolyLrUpdaterHook', 'InvLrUpdaterHook', 'CosineAnnealingLrUpdaterHook', 'FlatCosineAnnealingLrUpdaterHook', 'CosineRestartLrUpdaterHook', 'CyclicLrUpdaterHook', 'OneCycleLrUpdaterHook', 'MomentumUpdaterHook', 'StepMomentumUpdaterHook', 'CosineAnnealingMomentumUpdaterHook', 'CyclicMomentumUpdaterHook', 'OneCycleMomentumUpdaterHook', 'OptimizerHook', 'IterTimerHook', 'DistSamplerSeedHook', 'LoggerHook', 'PaviLoggerHook', 'TextLoggerHook', 'TensorboardLoggerHook', 'NeptuneLoggerHook', 'WandbLoggerHook', 'MlflowLoggerHook', 'DvcliveLoggerHook', '_load_checkpoint', 'load_state_dict', 'load_checkpoint', 'weights_to_cpu', 'save_checkpoint', 'Priority', 'get_priority', 'get_host_info', 'get_time_str', 'obj_from_dict', 'init_dist', 'get_dist_info', 'master_only', 'OPTIMIZER_BUILDERS', 'OPTIMIZERS', 'DefaultOptimizerConstructor', 'build_optimizer', 'build_optimizer_constructor', 'IterLoader', 'set_random_seed', 'auto_fp16', 'force_fp32', 'wrap_fp16_model', 'Fp16OptimizerHook', 'SyncBuffersHook', 'EMAHook', 'build_runner', 'RUNNERS', 'allreduce_grads', 'allreduce_params', 'LossScaler', 'CheckpointLoader', 'BaseModule', 'BaseBackbone', 'BaseNecks', 'BaseLosses', 'BaseNecksV2', '_load_checkpoint_with_prefix', 'EvalHook', 'DistEvalHook', 'Sequential', 'ModuleDict', 'ModuleList', 'GradientCumulativeOptimizerHook', 'GradientCumulativeFp16OptimizerHook', 'DefaultRunnerConstructor', 'find_latest_checkpoint' ] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/base_module.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import copy import warnings from abc import ABCMeta from collections import defaultdict from logging import FileHandler import torch.nn as nn from mmcv.runner.dist_utils import master_only from mmcv.utils.logging import print_log, get_logger, logger_initialized class BaseModel(nn.Module): _task = {} def __init_subclass__(cls, name='', **kwargs): if name != '': # if name in cls._taskhead.keys(): # raise ValueError(f'Got name={name} existed' # f'in{cls._taskhead.keys()}') # else: cls._task[name] = cls cls._name = name else: # if cls.__name__ in cls._taskhead.keys(): # raise ValueError(f'Got cls.__name__={cls.__name__} existed ' # f'in{cls._taskhead.keys()}') # else: # warnings.warn(f'Creating a subclass of MetaModel {cls.__name__} with no name.') cls._task[cls.__name__] = cls cls._name = cls.__name__ @classmethod def build_model(cls, *args, **kwargs): # if cls is StreroSRModel: model = kwargs.pop('model') try: cls = cls._models[model] # print(cls) except KeyError: raise ValueError(f'Got model={model} but expected ' f'one of {cls._models.keys()}') return cls(None, None) @classmethod def new(cls, *args, **kwargs): task = kwargs.pop('task') try: cls = cls._task[task] except KeyError: raise ValueError(f'Got task={task} but expected ' f'one of {cls._task.keys()}') return cls(*args, **kwargs) class BaseModule(BaseModel, name='BaseModule'):#nn.Module, metaclass=ABCMeta """Base module for all modules in openmmlab. ``BaseModule`` is a wrapper of ``torch.nn.Module`` with additional functionality of parameter initialization. Compared with ``torch.nn.Module``, ``BaseModule`` mainly adds three attributes. - ``init_cfg``: the config to control the initialization. - ``init_weights``: The function of parameter initialization and recording initialization information. - ``_params_init_info``: Used to track the parameter initialization information. This attribute only exists during executing the ``init_weights``. Args: init_cfg (dict, optional): Initialization config dict. """ # _task = {} # # def __init_subclass__(cls, name='', **kwargs): # if name != '': # # if name in cls._taskhead.keys(): # # raise ValueError(f'Got name={name} existed' # # f'in{cls._taskhead.keys()}') # # else: # cls._task[name] = cls # cls._name = name # else: # # if cls.__name__ in cls._taskhead.keys(): # # raise ValueError(f'Got cls.__name__={cls.__name__} existed ' # # f'in{cls._taskhead.keys()}') # # else: # # warnings.warn(f'Creating a subclass of MetaModel {cls.__name__} with no name.') # cls._task[cls.__name__] = cls # cls._name = cls.__name__ # # # @classmethod # def new(cls, *args, **kwargs): # task = kwargs.pop('task') # try: # cls = cls._task[task] # except KeyError: # raise ValueError(f'Got task={task} but expected ' # f'one of {cls._task.keys()}') # # return cls def __init__(self, init_cfg=None): """Initialize BaseModule, inherited from `torch.nn.Module`""" # NOTE init_cfg can be defined in different levels, but init_cfg # in low levels has a higher priority. super(BaseModule, self).__init__() # define default value of init_cfg instead of hard code # in init_weights() function self._is_init = False self.init_cfg = copy.deepcopy(init_cfg) # Backward compatibility in derived classes # if pretrained is not None: # warnings.warn('DeprecationWarning: pretrained is a deprecated \ # key, please consider using init_cfg') # self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) @property def is_init(self): return self._is_init def init_weights(self): """Initialize the weights.""" is_top_level_module = False # check if it is top-level module if not hasattr(self, '_params_init_info'): # The `_params_init_info` is used to record the initialization # information of the parameters # the key should be the obj:`nn.Parameter` of model and the value # should be a dict containing # - init_info (str): The string that describes the initialization. # - tmp_mean_value (FloatTensor): The mean of the parameter, # which indicates whether the parameter has been modified. # this attribute would be deleted after all parameters # is initialized. self._params_init_info = defaultdict(dict) is_top_level_module = True # Initialize the `_params_init_info`, # When detecting the `tmp_mean_value` of # the corresponding parameter is changed, update related # initialization information for name, param in self.named_parameters(): self._params_init_info[param][ 'init_info'] = f'The value is the same before and ' \ f'after calling `init_weights` ' \ f'of {self.__class__.__name__} ' self._params_init_info[param][ 'tmp_mean_value'] = param.data.mean() # pass `params_init_info` to all submodules # All submodules share the same `params_init_info`, # so it will be updated when parameters are # modified at any level of the model. for sub_module in self.modules(): sub_module._params_init_info = self._params_init_info # Get the initialized logger, if not exist, # create a logger named `mmcv` logger_names = list(logger_initialized.keys()) logger_name = logger_names[0] if logger_names else 'mmcv' from ..cnn import initialize from ..cnn.utils.weight_init import update_init_info module_name = self.__class__.__name__ if not self._is_init: if self.init_cfg: print_log( f'initialize {module_name} with init_cfg {self.init_cfg}', logger=logger_name) initialize(self, self.init_cfg) if isinstance(self.init_cfg, dict): # prevent the parameters of # the pre-trained model # from being overwritten by # the `init_weights` if self.init_cfg['type'] == 'Pretrained': return for m in self.children(): if hasattr(m, 'init_weights'): m.init_weights() # users may overload the `init_weights` update_init_info( m, init_info=f'Initialized by ' f'user-defined `init_weights`' f' in {m.__class__.__name__} ') self._is_init = True else: warnings.warn(f'init_weights of {self.__class__.__name__} has ' f'been called more than once.') if is_top_level_module: self._dump_init_info(logger_name) for sub_module in self.modules(): del sub_module._params_init_info @master_only def _dump_init_info(self, logger_name): """Dump the initialization information to a file named `initialization.log.json` in workdir. Args: logger_name (str): The name of logger. """ logger = get_logger(logger_name) with_file_handler = False # dump the information to the logger file if there is a `FileHandler` for handler in logger.handlers: if isinstance(handler, FileHandler): handler.stream.write( 'Name of parameter - Initialization information\n') for name, param in self.named_parameters(): handler.stream.write( f'\n{name} - {param.shape}: ' f"\n{self._params_init_info[param]['init_info']} \n") handler.stream.flush() with_file_handler = True if not with_file_handler: for name, param in self.named_parameters(): print_log( f'\n{name} - {param.shape}: ' f"\n{self._params_init_info[param]['init_info']} \n ", logger=logger_name) def __repr__(self): s = super().__repr__() if self.init_cfg: s += f'\ninit_cfg={self.init_cfg}' return s class Sequential(BaseModule, nn.Sequential, name='Sequential'): """Sequential module in openmmlab. Args: init_cfg (dict, optional): Initialization config dict. """ def __init__(self, *args, init_cfg=None): BaseModule.__init__(self, init_cfg) nn.Sequential.__init__(self, *args) class ModuleList(BaseModule, nn.ModuleList, name='ModuleList'): """ModuleList in openmmlab. Args: modules (iterable, optional): an iterable of modules to add. init_cfg (dict, optional): Initialization config dict. """ def __init__(self, modules=None, init_cfg=None): BaseModule.__init__(self, init_cfg) nn.ModuleList.__init__(self, modules) class ModuleDict(BaseModule, nn.ModuleDict, name='ModuleDict'): """ModuleDict in openmmlab. Args: modules (dict, optional): a mapping (dictionary) of (string: module) or an iterable of key-value pairs of type (string, module). init_cfg (dict, optional): Initialization config dict. """ def __init__(self, modules=None, init_cfg=None): BaseModule.__init__(self, init_cfg) nn.ModuleDict.__init__(self, modules) class BaseBackbone(BaseModule, name='BaseBackbone'): _models = {} def __init_subclass__(cls, name='', **kwargs): if name != '': # if name in cls._models.keys(): # raise ValueError(f'Got name={name} existed' # f'in{cls._models.keys()}') # else: cls._models[name] = cls cls._name = name else: # if cls.__name__ in cls._models.keys(): # raise ValueError(f'Got cls.__name__={cls.__name__} existed' # f'in{cls._models.keys()}') # else: # warnings.warn(f'Creating a subclass of MetaModel {cls.__name__} with no name.') cls._models[cls.__name__] = cls cls._name = cls.__name__ @classmethod def build_model(cls, *args, **kwargs): model = kwargs.pop('model') try: cls = cls._models[model] except KeyError: raise ValueError(f'Got models={model} but expected ' f'one of {cls._models.keys()}') return cls class BaseLosses(nn.Module): _models = {} def __init_subclass__(cls, name='', **kwargs): # print(name, cls) if name != '': # if name in cls._models.keys(): # raise ValueError(f'Got name={name} existed' # f'in{cls._models.keys()}') # else: cls._models[name] = cls cls._name = name else: # if cls.__name__ in cls._models.keys(): # raise ValueError(f'Got cls.__name__={cls.__name__} existed' # f'in{cls._models.keys()}') # else: # warnings.warn(f'Creating a subclass of MetaModel {cls.__name__} with no name.') cls._models[cls.__name__] = cls cls._name = cls.__name__ @classmethod def build_model(cls, *args, **kwargs): model = kwargs.pop('model') try: cls = cls._models[model] except KeyError: raise ValueError(f'Got models={model} but expected ' f'one of {cls._models.keys()}') return cls class BaseNecks(nn.Module): _models = {} def __init_subclass__(cls, name='', **kwargs): # print(name, cls) if name != '': # if name in cls._models.keys(): # raise ValueError(f'Got name={name} existed' # f'in{cls._models.keys()}') # else: cls._models[name] = cls cls._name = name else: # if cls.__name__ in cls._models.keys(): # raise ValueError(f'Got cls.__name__={cls.__name__} existed' # f'in{cls._models.keys()}') # else: # warnings.warn(f'Creating a subclass of MetaModel {cls.__name__} with no name.') cls._models[cls.__name__] = cls cls._name = cls.__name__ @classmethod def build_model(cls, *args, **kwargs): model = kwargs.pop('model') try: cls = cls._models[model] except KeyError: raise ValueError(f'Got models={model} but expected ' f'one of {cls._models.keys()}') return cls class BaseNecksV2(BaseModule, BaseNecks, name='BaseNecksV2'): ''' 父类的_models, __init_subclass__都会被继承 ''' ... ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/base_runner.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import copy import logging import os.path as osp import warnings from abc import ABCMeta, abstractmethod import torch from torch.optim import Optimizer import mmcv from ..parallel import is_module_wrapper from .checkpoint import load_checkpoint from .dist_utils import get_dist_info from .hooks import HOOKS, Hook from .log_buffer import LogBuffer from .priority import Priority, get_priority from .utils import get_time_str from .record import MetricLogger class BaseRunner(metaclass=ABCMeta): """The base class of Runner, a training helper for PyTorch. All subclasses should implement the following APIs: - ``run()`` - ``train()`` - ``val()`` - ``save_checkpoint()`` Args: model (:obj:`torch.nn.Module`): The model to be run. batch_processor (callable): A callable method that process a data batch. The interface of this method should be `batch_processor(model, data, train_mode) -> dict` optimizer (dict or :obj:`torch.optim.Optimizer`): It can be either an optimizer (in most cases) or a dict of optimizers (in models that requires more than one optimizer, e.g., GAN). work_dir (str, optional): The working directory to save checkpoints and logs. Defaults to None. logger (:obj:`logging.Logger`): Logger used during training. Defaults to None. (The default value is just for backward compatibility) meta (dict | None): A dict records some import information such as environment info and seed, which will be logged in logger hook. Defaults to None. max_epochs (int, optional): Total training epochs. max_iters (int, optional): Total training iterations. """ def __init__(self, model, batch_processor=None, optimizer=None, work_dir=None, logger=None, meta=None, max_iters=None, max_epochs=None, opt_cfg=None): if batch_processor is not None: if not callable(batch_processor): raise TypeError('batch_processor must be callable, ' f'but got {type(batch_processor)}') warnings.warn( 'batch_processor is deprecated, please implement ' 'train_step() and val_step() in the model instead.', DeprecationWarning) # raise an error is `batch_processor` is not None and # `model.train_step()` exists. if is_module_wrapper(model): _model = model.module else: _model = model if hasattr(_model, 'train_step') or hasattr(_model, 'val_step'): raise RuntimeError( 'batch_processor and model.train_step()/model.val_step() ' 'cannot be both available.') # else: # assert hasattr(model, 'train_step') # check the type of `optimizer` if isinstance(optimizer, dict): for name, optim in optimizer.items(): if not isinstance(optim, Optimizer): raise TypeError( f'optimizer must be a dict of torch.optim.Optimizers, ' f'but optimizer["{name}"] is a {type(optim)}') elif not isinstance(optimizer, Optimizer) and optimizer is not None: raise TypeError( f'optimizer must be a torch.optim.Optimizer object ' f'or dict or None, but got {type(optimizer)}') # check the type of `logger` if not isinstance(logger, logging.Logger): raise TypeError(f'logger must be a logging.Logger object, ' f'but got {type(logger)}') # check the type of `meta` if meta is not None and not isinstance(meta, dict): raise TypeError( f'meta must be a dict or None, but got {type(meta)}') self.model = model self.batch_processor = batch_processor self.optimizer = optimizer self.logger = logger self.meta = meta self.opt_cfg = opt_cfg self.earlyStop = False # create work_dir save_dir = opt_cfg['save_dir'] if mmcv.is_str(work_dir): self.work_dir = osp.abspath(work_dir) self.save_dir = osp.abspath(save_dir) mmcv.mkdir_or_exist(self.save_dir) mmcv.mkdir_or_exist(self.work_dir) elif work_dir is None: self.work_dir = None self.save_dir = None else: raise TypeError(f'"work_dir: {work_dir}" must be a str or None') # get model name from the model class if hasattr(self.model, 'module'): self._model_name = self.model.module.__class__.__name__ else: self._model_name = self.model.__class__.__name__ self._rank, self._world_size = get_dist_info() self.timestamp = get_time_str() self.mode = None self._hooks = [] self._epoch = 0 self._iter = 0 self._inner_iter = 0 self.outputs = {} if max_epochs is not None and max_iters is not None: raise ValueError( 'Only one of `max_epochs` or `max_iters` can be set.') self._max_epochs = max_epochs self._max_iters = max_iters # TODO: Redesign LogBuffer, it is not flexible and elegant enough self.log_buffer = MetricLogger(logger=logger, delimiter=" ") # LogBuffer() @property def model_name(self): """str: Name of the model, usually the module class name.""" return self._model_name @property def rank(self): """int: Rank of current process. (distributed training)""" return self._rank @property def world_size(self): """int: Number of processes participating in the job. (distributed training)""" return self._world_size @property def hooks(self): """list[:obj:`Hook`]: A list of registered hooks.""" return self._hooks @property def epoch(self): """int: Current epoch.""" return self._epoch @property def iter(self): """int: Current iteration.""" return self._iter @property def inner_iter(self): """int: Iteration in an epoch.""" return self._inner_iter @property def max_epochs(self): """int: Maximum training epochs.""" return self._max_epochs @property def max_iters(self): """int: Maximum training iterations.""" return self._max_iters @abstractmethod def train(self): pass @abstractmethod def val(self): pass @abstractmethod def run(self, data_loaders, workflow, **kwargs): pass @abstractmethod def save_checkpoint(self, out_dir, filename_tmpl, save_optimizer=True, meta=None, create_symlink=True): pass def current_lr(self): """Get current learning rates. Returns: list[float] | dict[str, list[float]]: Current learning rates of all param groups. If the runner has a dict of optimizers, this method will return a dict. """ if isinstance(self.optimizer, torch.optim.Optimizer): lr = [group['lr'] for group in self.optimizer.param_groups] elif isinstance(self.optimizer, dict): lr = dict() for name, optim in self.optimizer.items(): lr[name] = [group['lr'] for group in optim.param_groups] else: raise RuntimeError( 'lr is not applicable because optimizer does not exist.') return lr def current_momentum(self): """Get current momentums. Returns: list[float] | dict[str, list[float]]: Current momentums of all param groups. If the runner has a dict of optimizers, this method will return a dict. """ def _get_momentum(optimizer): momentums = [] for group in optimizer.param_groups: if 'momentum' in group.keys(): momentums.append(group['momentum']) elif 'betas' in group.keys(): momentums.append(group['betas'][0]) else: momentums.append(0) return momentums if self.optimizer is None: raise RuntimeError( 'momentum is not applicable because optimizer does not exist.') elif isinstance(self.optimizer, torch.optim.Optimizer): momentums = _get_momentum(self.optimizer) elif isinstance(self.optimizer, dict): momentums = dict() for name, optim in self.optimizer.items(): momentums[name] = _get_momentum(optim) return momentums def register_hook(self, hook, priority='NORMAL'): """Register a hook into the hook list. The hook will be inserted into a priority queue, with the specified priority (See :class:`Priority` for details of priorities). For hooks with the same priority, they will be triggered in the same order as they are registered. Args: hook (:obj:`Hook`): The hook to be registered. priority (int or str or :obj:`Priority`): Hook priority. Lower value means higher priority. """ assert isinstance(hook, Hook) if hasattr(hook, 'priority'): raise ValueError('"priority" is a reserved attribute for hooks') priority = get_priority(priority) hook.priority = priority # insert the hook to a sorted list inserted = False for i in range(len(self._hooks) - 1, -1, -1): if priority >= self._hooks[i].priority: self._hooks.insert(i + 1, hook) inserted = True break if not inserted: self._hooks.insert(0, hook) def register_hook_from_cfg(self, hook_cfg): """Register a hook from its cfg. Args: hook_cfg (dict): Hook config. It should have at least keys 'type' and 'priority' indicating its type and priority. Note: The specific hook class to register should not use 'type' and 'priority' arguments during initialization. """ hook_cfg = hook_cfg.copy() priority = hook_cfg.pop('priority', 'NORMAL') hook = mmcv.build_from_cfg(hook_cfg, HOOKS) self.register_hook(hook, priority=priority) def call_hook(self, fn_name): """Call all hooks. Args: fn_name (str): The function name in each hook to be called, such as "before_train_epoch". """ for hook in self._hooks: getattr(hook, fn_name)(self) def get_hook_info(self): # Get hooks info in each stage stage_hook_map = {stage: [] for stage in Hook.stages} for hook in self.hooks: try: priority = Priority(hook.priority).name except ValueError: priority = hook.priority classname = hook.__class__.__name__ hook_info = f'({priority:<12}) {classname:<35}' for trigger_stage in hook.get_triggered_stages(): stage_hook_map[trigger_stage].append(hook_info) stage_hook_infos = [] for stage in Hook.stages: hook_infos = stage_hook_map[stage] if len(hook_infos) > 0: info = f'{stage}:\n' info += '\n'.join(hook_infos) info += '\n -------------------- ' stage_hook_infos.append(info) return '\n'.join(stage_hook_infos) def load_checkpoint(self, filename, resume_mode, map_location='cpu', strict=False, revise_keys=[(r'^module.', '')]): return load_checkpoint( resume_mode, self.work_dir, self.model, filename, map_location, strict, self.logger, revise_keys=revise_keys) def resume(self, resume, resume_mode, reset_lr, lr, resume_optimizer=True, map_location='default'): if map_location == 'default': if torch.cuda.is_available(): device_id = torch.cuda.current_device() checkpoint = self.load_checkpoint( resume, resume_mode, map_location=lambda storage, loc: storage.cuda(device_id)) else: checkpoint = self.load_checkpoint(resume, resume_mode) else: checkpoint = self.load_checkpoint( resume, resume_mode, map_location=map_location) self._epoch = checkpoint['meta']['epoch'] if self.opt_cfg['eval']: self._max_epochs = self._epoch self._iter = checkpoint['meta']['iter'] if self.meta is None: self.meta = {} self.meta.setdefault('hook_msgs', {}) # load `last_ckpt`, `best_score`, `best_ckpt`, etc. for hook messages self.meta['hook_msgs'].update(checkpoint['meta'].get('hook_msgs', {})) # Re-calculate the number of iterations when resuming # models with different number of GPUs if 'config' in checkpoint['meta']: config = mmcv.Config.fromstring( checkpoint['meta']['config'], file_format='.py') previous_gpu_ids = config.get('gpu_ids', None) if previous_gpu_ids and len(previous_gpu_ids) > 0 and len( previous_gpu_ids) != self.world_size: self._iter = int(self._iter * len(previous_gpu_ids) / self.world_size) self.logger.info('the iteration number is changed due to ' 'change of GPU number') # resume meta information meta self.meta = checkpoint['meta'] # if optimizer is not None: # if checkpoint.get('optimizer') is not None: # optimizer.load_state_dict(checkpoint['optimizer']) # # if lr > 0 and reset_lr: # for param_group in optimizer.param_groups: # param_group['lr'] = lr # print_log("loaded checkpoint.optimizer") if 'optimizer' in checkpoint and resume_optimizer: if isinstance(self.optimizer, Optimizer): self.optimizer.load_state_dict(checkpoint['optimizer']) if lr > 0 and reset_lr: for param_group in self.optimizer.param_groups: param_group['lr'] = lr self.logger.info("loaded checkpoint.optimizer") elif isinstance(self.optimizer, dict): for k in self.optimizer.keys(): self.optimizer[k].load_state_dict( checkpoint['optimizer'][k]) if lr > 0 and reset_lr: for param_group in self.optimizer[k].param_groups: param_group['lr'] = lr self.logger.info("loaded checkpoint.optimizer") else: raise TypeError( 'Optimizer should be dict or torch.optim.Optimizer ' f'but got {type(self.optimizer)}') self.logger.info('resumed epoch %d, iter %d', self.epoch, self.iter) def register_lr_hook(self, lr_config): if lr_config is None: return elif isinstance(lr_config, dict): assert 'policy' in lr_config policy_type = lr_config.pop('policy') # If the type of policy is all in lower case, e.g., 'cyclic', # then its first letter will be capitalized, e.g., to be 'Cyclic'. # This is for the convenient usage of Lr updater. # Since this is not applicable for ` # CosineAnnealingLrUpdater`, # the string will not be changed if it contains capital letters. if policy_type == policy_type.lower(): policy_type = policy_type.title() hook_type = policy_type + 'LrUpdaterHook' lr_config['type'] = hook_type hook = mmcv.build_from_cfg(lr_config, HOOKS) else: hook = lr_config self.register_hook(hook, priority='VERY_HIGH') def register_momentum_hook(self, momentum_config): if momentum_config is None: return if isinstance(momentum_config, dict): assert 'policy' in momentum_config policy_type = momentum_config.pop('policy') # If the type of policy is all in lower case, e.g., 'cyclic', # then its first letter will be capitalized, e.g., to be 'Cyclic'. # This is for the convenient usage of momentum updater. # Since this is not applicable for # `CosineAnnealingMomentumUpdater`, # the string will not be changed if it contains capital letters. if policy_type == policy_type.lower(): policy_type = policy_type.title() hook_type = policy_type + 'MomentumUpdaterHook' momentum_config['type'] = hook_type hook = mmcv.build_from_cfg(momentum_config, HOOKS) else: hook = momentum_config self.register_hook(hook, priority='HIGH') def register_optimizer_hook(self, optimizer_config): if optimizer_config is None: return if isinstance(optimizer_config, dict): optimizer_config.setdefault('type', 'OptimizerHook') hook = mmcv.build_from_cfg(optimizer_config, HOOKS) else: hook = optimizer_config self.register_hook(hook, priority='ABOVE_NORMAL') def register_checkpoint_hook(self, checkpoint_config): if checkpoint_config is None: return if isinstance(checkpoint_config, dict): checkpoint_config.setdefault('type', 'CheckpointHook') hook = mmcv.build_from_cfg(checkpoint_config, HOOKS) else: hook = checkpoint_config self.register_hook(hook, priority='NORMAL') def register_logger_hooks(self, log_config): if log_config is None: return log_interval = log_config['interval'] for info in log_config['hooks']: logger_hook = mmcv.build_from_cfg( info, HOOKS, default_args=dict(interval=log_interval)) self.register_hook(logger_hook, priority='VERY_LOW') def register_timer_hook(self, timer_config): if timer_config is None: return if isinstance(timer_config, dict): timer_config_ = copy.deepcopy(timer_config) hook = mmcv.build_from_cfg(timer_config_, HOOKS) else: hook = timer_config self.register_hook(hook, priority='LOW') def register_custom_hooks(self, custom_config): if custom_config is None: return if not isinstance(custom_config, list): custom_config = [custom_config] for item in custom_config: if isinstance(item, dict): self.register_hook_from_cfg(item) else: self.register_hook(item, priority='NORMAL') def register_profiler_hook(self, profiler_config): if profiler_config is None: return if isinstance(profiler_config, dict): profiler_config.setdefault('type', 'ProfilerHook') hook = mmcv.build_from_cfg(profiler_config, HOOKS) else: hook = profiler_config self.register_hook(hook) def register_training_hooks(self, lr_config, optimizer_config=None, checkpoint_config=None, log_config=None, momentum_config=None, timer_config=dict(type='IterTimerHook'), custom_hooks_config=None): """Register default and custom hooks for training. Default and custom hooks include: +----------------------+-------------------------+ | Hooks | Priority | +======================+=========================+ | LrUpdaterHook | VERY_HIGH (10) | +----------------------+-------------------------+ | MomentumUpdaterHook | HIGH (30) | +----------------------+-------------------------+ | OptimizerStepperHook | ABOVE_NORMAL (40) | +----------------------+-------------------------+ | CheckpointSaverHook | NORMAL (50) | +----------------------+-------------------------+ | IterTimerHook | LOW (70) | +----------------------+-------------------------+ | LoggerHook(s) | VERY_LOW (90) | +----------------------+-------------------------+ | CustomHook(s) | defaults to NORMAL (50) | +----------------------+-------------------------+ If custom hooks have same priority with default hooks, custom hooks will be triggered after default hooks. """ self.register_lr_hook(lr_config) self.register_momentum_hook(momentum_config) self.register_optimizer_hook(optimizer_config) self.register_checkpoint_hook(checkpoint_config) self.register_timer_hook(timer_config) self.register_logger_hooks(log_config) self.register_custom_hooks(custom_hooks_config) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/builder.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import copy from ..utils import Registry RUNNERS = Registry('runner') RUNNER_BUILDERS = Registry('runner builder') def build_runner_constructor(cfg): return RUNNER_BUILDERS.build(cfg) def build_runner(cfg, default_args=None): runner_cfg = copy.deepcopy(cfg) constructor_type = runner_cfg.pop('constructor', 'DefaultRunnerConstructor') runner_constructor = build_runner_constructor( dict( type=constructor_type, runner_cfg=runner_cfg, default_args=default_args)) runner = runner_constructor() return runner ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/checkpoint.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import io import os import os.path as osp import pkgutil import re import time import warnings from collections import OrderedDict from importlib import import_module from tempfile import TemporaryDirectory from glob import glob import torch import torchvision from torch.optim import Optimizer import mmcv from ..fileio import FileClient from ..fileio import load as load_file from ..parallel import is_module_wrapper from ..utils import load_url, mkdir_or_exist, print_log from .dist_utils import get_dist_info ENV_MMCV_HOME = 'MMCV_HOME' ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME' DEFAULT_CACHE_DIR = '~/.cache' def _get_mmcv_home(): mmcv_home = os.path.expanduser( os.getenv( ENV_MMCV_HOME, os.path.join( os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'mmcv'))) mkdir_or_exist(mmcv_home) return mmcv_home def load_state_dict(module, state_dict, strict=False, logger=None): """Load state_dict to a module. This method is modified from :meth:`torch.nn.Module.load_state_dict`. Default value for ``strict`` is set to ``False`` and the message for param mismatch will be shown even if strict is False. Args: module (Module): Module that receives the state_dict. state_dict (OrderedDict): Weights. strict (bool): whether to strictly enforce that the keys in :attr:`state_dict` match the keys returned by this module's :meth:`~torch.nn.Module.state_dict` function. Default: ``False``. logger (:obj:`logging.Logger`, optional): Logger to log the error message. If not specified, print function will be used. """ unexpected_keys = [] all_missing_keys = [] err_msg = [] if hasattr(module, 'train'): metadata = getattr(state_dict, '_metadata', None) state_dict = state_dict.copy() if metadata is not None: state_dict._metadata = metadata else: for name in state_dict.keys(): metadata = getattr(state_dict[name], '_metadata', None) state_dict[name] = state_dict[name].copy() if metadata is not None: state_dict[name]._metadata = metadata # use _load_from_state_dict to enable checkpoint version control def load(module, prefix=''): # recursively check parallel module in case that the model has a # complicated structure, e.g., nn.Module(nn.Module(DDP)) if not hasattr(module, '_load_from_state_dict'): for name, m in module.model.items(): if is_module_wrapper(m): m = m.module local_metadata = {} if metadata is None else metadata.get( prefix[:-1], {}) m._load_from_state_dict(state_dict[name], prefix, local_metadata, True, all_missing_keys, unexpected_keys, err_msg) for name, child in m._modules.items(): if child is not None: load(child, prefix + name + '.') else: if is_module_wrapper(module): module = module.module local_metadata = {} if metadata is None else metadata.get( prefix[:-1], {}) module._load_from_state_dict(state_dict, prefix, local_metadata, True, all_missing_keys, unexpected_keys, err_msg) for name, child in module._modules.items(): if child is not None: load(child, prefix + name + '.') load(module) load = None # break load->load reference cycle # ignore "num_batches_tracked" of BN layers missing_keys = [ key for key in all_missing_keys if 'num_batches_tracked' not in key ] if unexpected_keys: err_msg.append('unexpected key in source ' f'state_dict: {", ".join(unexpected_keys)}\n') if missing_keys: err_msg.append( f'missing keys in source state_dict: {", ".join(missing_keys)}\n') rank, _ = get_dist_info() if len(err_msg) > 0 and rank == 0: err_msg.insert( 0, 'The model and loaded state dict do not match exactly\n') err_msg = '\n'.join(err_msg) if strict: raise RuntimeError(err_msg) elif logger is not None: logger.warning(err_msg) else: print(err_msg) def get_torchvision_models(): model_urls = dict() for _, name, ispkg in pkgutil.walk_packages(torchvision.models.__path__): if ispkg: continue _zoo = import_module(f'torchvision.models.{name}') if hasattr(_zoo, 'model_urls'): _urls = getattr(_zoo, 'model_urls') model_urls.update(_urls) return model_urls def get_external_models(): mmcv_home = _get_mmcv_home() default_json_path = osp.join(mmcv.__path__[0], 'model_zoo/open_mmlab.json') default_urls = load_file(default_json_path) assert isinstance(default_urls, dict) external_json_path = osp.join(mmcv_home, 'open_mmlab.json') if osp.exists(external_json_path): external_urls = load_file(external_json_path) assert isinstance(external_urls, dict) default_urls.update(external_urls) return default_urls def get_mmcls_models(): mmcls_json_path = osp.join(mmcv.__path__[0], 'model_zoo/mmcls.json') mmcls_urls = load_file(mmcls_json_path) return mmcls_urls def get_deprecated_model_names(): deprecate_json_path = osp.join(mmcv.__path__[0], 'model_zoo/deprecated.json') deprecate_urls = load_file(deprecate_json_path) assert isinstance(deprecate_urls, dict) return deprecate_urls def _process_mmcls_checkpoint(checkpoint): if 'state_dict' in checkpoint: state_dict = checkpoint['state_dict'] else: # Some checkpoints converted from 3rd-party repo don't # have the "state_dict" key. state_dict = checkpoint new_state_dict = OrderedDict() for k, v in state_dict.items(): if k.startswith('backbone.'): new_state_dict[k[9:]] = v new_checkpoint = dict(state_dict=new_state_dict) return new_checkpoint class CheckpointLoader: """A general checkpoint loader to manage all schemes.""" _schemes = {} @classmethod def _register_scheme(cls, prefixes, loader, force=False): if isinstance(prefixes, str): prefixes = [prefixes] else: assert isinstance(prefixes, (list, tuple)) for prefix in prefixes: if (prefix not in cls._schemes) or force: cls._schemes[prefix] = loader else: raise KeyError( f'{prefix} is already registered as a loader backend, ' 'add "force=True" if you want to override it') # sort, longer prefixes take priority cls._schemes = OrderedDict( sorted(cls._schemes.items(), key=lambda t: t[0], reverse=True)) @classmethod def register_scheme(cls, prefixes, loader=None, force=False): """Register a loader to CheckpointLoader. This method can be used as a normal class method or a decorator. Args: prefixes (str or list[str] or tuple[str]): The prefix of the registered loader. loader (function, optional): The loader function to be registered. When this method is used as a decorator, loader is None. Defaults to None. force (bool, optional): Whether to override the loader if the prefix has already been registered. Defaults to False. """ if loader is not None: cls._register_scheme(prefixes, loader, force=force) return def _register(loader_cls): cls._register_scheme(prefixes, loader_cls, force=force) return loader_cls return _register @classmethod def _get_checkpoint_loader(cls, path): """Finds a loader that supports the given path. Falls back to the local loader if no other loader is found. Args: path (str): checkpoint path Returns: callable: checkpoint loader """ for p in cls._schemes: # use regular match to handle some cases that where the prefix of # loader has a prefix. For example, both 's3://path' and # 'open-mmlab:s3://path' should return `load_from_ceph` if re.match(p, path) is not None: return cls._schemes[p] @classmethod def load_checkpoint(cls, filename, map_location=None, logger=None): """load checkpoint through URL scheme path. Args: filename (str): checkpoint file name with given prefix map_location (str, optional): Same as :func:`torch.load`. Default: None logger (:mod:`logging.Logger`, optional): The logger for message. Default: None Returns: dict or OrderedDict: The loaded checkpoint. """ checkpoint_loader = cls._get_checkpoint_loader(filename) class_name = checkpoint_loader.__name__ print_log( f'load checkpoint from {class_name[10:]} path: {filename}', logger=logger) return checkpoint_loader(filename, map_location) @CheckpointLoader.register_scheme(prefixes='') def load_from_local(filename, map_location): """load checkpoint by local file path. Args: filename (str): local checkpoint file path map_location (str, optional): Same as :func:`torch.load`. Returns: dict or OrderedDict: The loaded checkpoint. """ filename = osp.expanduser(filename) if not osp.isfile(filename): raise FileNotFoundError(f'{filename} can not be found.') checkpoint = torch.load(filename, map_location=map_location) return checkpoint @CheckpointLoader.register_scheme(prefixes=('http://', 'https://')) def load_from_http(filename, map_location=None, model_dir=None): """load checkpoint through HTTP or HTTPS scheme path. In distributed setting, this function only download checkpoint at local rank 0. Args: filename (str): checkpoint file path with modelzoo or torchvision prefix map_location (str, optional): Same as :func:`torch.load`. model_dir (string, optional): directory in which to save the object, Default: None Returns: dict or OrderedDict: The loaded checkpoint. """ rank, world_size = get_dist_info() if rank == 0: checkpoint = load_url( filename, model_dir=model_dir, map_location=map_location) if world_size > 1: torch.distributed.barrier() if rank > 0: checkpoint = load_url( filename, model_dir=model_dir, map_location=map_location) return checkpoint @CheckpointLoader.register_scheme(prefixes='pavi://') def load_from_pavi(filename, map_location=None): """load checkpoint through the file path prefixed with pavi. In distributed setting, this function download ckpt at all ranks to different temporary directories. Args: filename (str): checkpoint file path with pavi prefix map_location (str, optional): Same as :func:`torch.load`. Default: None Returns: dict or OrderedDict: The loaded checkpoint. """ assert filename.startswith('pavi://'), \ f'Expected filename startswith `pavi://`, but get {filename}' model_path = filename[7:] try: from pavi import modelcloud except ImportError: raise ImportError( 'Please install pavi to load checkpoint from modelcloud.') model = modelcloud.get(model_path) with TemporaryDirectory() as tmp_dir: downloaded_file = osp.join(tmp_dir, model.name) model.download(downloaded_file) checkpoint = torch.load(downloaded_file, map_location=map_location) return checkpoint @CheckpointLoader.register_scheme(prefixes=r'(\S+\:)?s3://') def load_from_ceph(filename, map_location=None, backend='petrel'): """load checkpoint through the file path prefixed with s3. In distributed setting, this function download ckpt at all ranks to different temporary directories. Note: Since v1.4.1, the registered scheme prefixes have been enhanced to support bucket names in the path prefix, e.g. 's3://xx.xx/xx.path', 'bucket1:s3://xx.xx/xx.path'. Args: filename (str): checkpoint file path with s3 prefix map_location (str, optional): Same as :func:`torch.load`. backend (str, optional): The storage backend type. Options are 'ceph', 'petrel'. Default: 'petrel'. .. warning:: :class:`mmcv.fileio.file_client.CephBackend` will be deprecated, please use :class:`mmcv.fileio.file_client.PetrelBackend` instead. Returns: dict or OrderedDict: The loaded checkpoint. """ allowed_backends = ['ceph', 'petrel'] if backend not in allowed_backends: raise ValueError(f'Load from Backend {backend} is not supported.') if backend == 'ceph': warnings.warn( 'CephBackend will be deprecated, please use PetrelBackend instead', DeprecationWarning) # CephClient and PetrelBackend have the same prefix 's3://' and the latter # will be chosen as default. If PetrelBackend can not be instantiated # successfully, the CephClient will be chosen. try: file_client = FileClient(backend=backend) except ImportError: allowed_backends.remove(backend) file_client = FileClient(backend=allowed_backends[0]) with io.BytesIO(file_client.get(filename)) as buffer: checkpoint = torch.load(buffer, map_location=map_location) return checkpoint @CheckpointLoader.register_scheme(prefixes=('modelzoo://', 'torchvision://')) def load_from_torchvision(filename, map_location=None): """load checkpoint through the file path prefixed with modelzoo or torchvision. Args: filename (str): checkpoint file path with modelzoo or torchvision prefix map_location (str, optional): Same as :func:`torch.load`. Returns: dict or OrderedDict: The loaded checkpoint. """ model_urls = get_torchvision_models() if filename.startswith('modelzoo://'): warnings.warn( 'The URL scheme of "modelzoo://" is deprecated, please ' 'use "torchvision://" instead', DeprecationWarning) model_name = filename[11:] else: model_name = filename[14:] return load_from_http(model_urls[model_name], map_location=map_location) @CheckpointLoader.register_scheme(prefixes=('open-mmlab://', 'openmmlab://')) def load_from_openmmlab(filename, map_location=None): """load checkpoint through the file path prefixed with open-mmlab or openmmlab. Args: filename (str): checkpoint file path with open-mmlab or openmmlab prefix map_location (str, optional): Same as :func:`torch.load`. Default: None Returns: dict or OrderedDict: The loaded checkpoint. """ model_urls = get_external_models() prefix_str = 'open-mmlab://' if filename.startswith(prefix_str): model_name = filename[13:] else: model_name = filename[12:] prefix_str = 'openmmlab://' deprecated_urls = get_deprecated_model_names() if model_name in deprecated_urls: warnings.warn( f'{prefix_str}{model_name} is deprecated in favor ' f'of {prefix_str}{deprecated_urls[model_name]}', DeprecationWarning) model_name = deprecated_urls[model_name] model_url = model_urls[model_name] # check if is url if model_url.startswith(('http://', 'https://')): checkpoint = load_from_http(model_url, map_location=map_location) else: filename = osp.join(_get_mmcv_home(), model_url) if not osp.isfile(filename): raise FileNotFoundError(f'{filename} can not be found.') checkpoint = torch.load(filename, map_location=map_location) return checkpoint @CheckpointLoader.register_scheme(prefixes='mmcls://') def load_from_mmcls(filename, map_location=None): """load checkpoint through the file path prefixed with mmcls. Args: filename (str): checkpoint file path with mmcls prefix map_location (str, optional): Same as :func:`torch.load`. Returns: dict or OrderedDict: The loaded checkpoint. """ model_urls = get_mmcls_models() model_name = filename[8:] checkpoint = load_from_http( model_urls[model_name], map_location=map_location) checkpoint = _process_mmcls_checkpoint(checkpoint) return checkpoint def _load_checkpoint(filename, map_location=None, logger=None): """Load checkpoint from somewhere (modelzoo, file, url). Args: filename (str): Accept local filepath, URL, ``torchvision://xxx``, ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for details. map_location (str, optional): Same as :func:`torch.load`. Default: None. logger (:mod:`logging.Logger`, optional): The logger for error message. Default: None Returns: dict or OrderedDict: The loaded checkpoint. It can be either an OrderedDict storing model weights or a dict containing other information, which depends on the checkpoint. """ return CheckpointLoader.load_checkpoint(filename, map_location, logger) def _load_checkpoint_with_prefix(prefix, filename, map_location=None): """Load partial pretrained model with specific prefix. Args: prefix (str): The prefix of sub-module. filename (str): Accept local filepath, URL, ``torchvision://xxx``, ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for details. map_location (str | None): Same as :func:`torch.load`. Default: None. Returns: dict or OrderedDict: The loaded checkpoint. """ checkpoint = _load_checkpoint(filename, map_location=map_location) if 'state_dict' in checkpoint: state_dict = checkpoint['state_dict'] else: state_dict = checkpoint if not prefix.endswith('.'): prefix += '.' prefix_len = len(prefix) state_dict = { k[prefix_len:]: v for k, v in state_dict.items() if k.startswith(prefix) } assert state_dict, f'{prefix} is not in the pretrained model' return state_dict def get_checkpoint_dir(OUT_DIR): """Retrieves the location for storing checkpoints.""" return os.path.join(OUT_DIR) def get_last_checkpoint(OUT_DIR, NAME_PREFIX="amp_model_best", logger=None): """Retrieves the most recent checkpoint (highest epoch number).""" checkpoint_dir = get_checkpoint_dir(OUT_DIR) checkpoints = glob(checkpoint_dir + f"/{NAME_PREFIX}*") if len(checkpoints) > 0: last_checkpoint_name = sorted(checkpoints)[-1] last_checkpoint = os.path.join(checkpoint_dir, last_checkpoint_name) print_log(f"loading last_checkpoint file: {last_checkpoint}", logger=logger) return last_checkpoint else: return None def get_best_k_model(OUT_DIR, indicator, _NAME_PREFIX="ckpt_ep_"): best_k_models = [] best_fname = [] if os.path.isfile(OUT_DIR): with open(OUT_DIR, 'r') as f: # TODO: 通常checkpoint不会很大,如果太大open方法不合适,因为我们需要的是最后几行,open是从头遍历的 stats2user = [line.strip('\n') for line in f.readlines()] for line in stats2user: metric = {} line = line.split(',') for v in line[1:]: v = re.sub(r"[{}'' ]", "", v) k, v = v.split(':') metric[str(k)] = v # fname, v = line.split('-') epoch = line[0].replace('.pth.tar', '') epoch = epoch.replace('model_best_', '') # best_k_models[str(epoch)] = float(v) # 第一个line[0]用于save_top_k触发时, 删除多余的模型 # 第二个用于加载best模型, 因为best_k_models是个[[]],索引不方便 best_k_models.append([epoch, metric, line[0]]) best_fname.append(line[0]) # best_k_models['epoch'].append(epoch) # best_k_models[indicator].append(float(v)) if len(best_k_models) == 0: msg = f"checkpoint in directory {OUT_DIR} don't exist or is empty" warnings.warn(msg) return best_k_models, best_fname def load_checkpoint(resume_mode, work_dir, model, filename, map_location=None, strict=False, logger=None, revise_keys=[(r'^module\.', '')]): """Load checkpoint from a file or URI. Args: model (Module): Module to load checkpoint. filename (str): Accept local filepath, URL, ``torchvision://xxx``, ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for details. map_location (str): Same as :func:`torch.load`. strict (bool): Whether to allow different params for the model and checkpoint. logger (:mod:`logging.Logger` or None): The logger for error message. revise_keys (list): A list of customized keywords to modify the state_dict in checkpoint. Each item is a (pattern, replacement) pair of the regular expression operations. Default: strip the prefix 'module.' by [(r'^module\\.', '')]. Returns: dict or OrderedDict: The loaded checkpoint. """ ################ # 只从work_dir里读ckpt,用于模型的继续训练 resume_mode = resume_mode.lower() if resume_mode == 'best': _, best_k_fname = get_best_k_model(os.path.join(work_dir, "checkpoint"), None) if len(best_k_fname) > 0: best_k_model = sorted(best_k_fname)[-1] filename = os.path.join(work_dir, best_k_model) else: print_log("loading best model failed, maybe it's from scratch currently.", logger=logger) elif resume_mode == 'auto': ckpt = get_last_checkpoint(work_dir) if ckpt is not None: filename = ckpt ################ if not os.path.isfile(filename): print_log(f"no checkpoint found at {filename}", logger=logger) return {'meta': {'epoch': 1, 'iter': 1, 'best_epoch': 1, 'best_metric': None}} ################ checkpoint = _load_checkpoint(filename, map_location, logger) # OrderedDict is a subclass of dict if not isinstance(checkpoint, dict): raise RuntimeError( f'No state_dict found in checkpoint file {filename}') if 'meta' not in checkpoint.keys(): checkpoint['meta'] = {} if hasattr(model, 'train'): mod = {'model': model} checkpoint = {'model': checkpoint} else: mod = model.model if isinstance(mod, dict): for name, m in mod.items(): if 'state_dict' in checkpoint[name]: state_dict = checkpoint[name]['state_dict'] else: state_dict = checkpoint[name] # strip prefix of state_dict metadata = getattr(state_dict, '_metadata', OrderedDict()) for p, r in revise_keys: state_dict = OrderedDict( {re.sub(p, r, k): v for k, v in state_dict.items()}) # Keep metadata in state_dict state_dict._metadata = metadata load_state_dict(m, state_dict, strict, logger) else: if 'state_dict' in checkpoint: state_dict = checkpoint['state_dict'] else: state_dict = checkpoint # strip prefix of state_dict metadata = getattr(state_dict, '_metadata', OrderedDict()) for p, r in revise_keys: state_dict = OrderedDict( {re.sub(p, r, k): v for k, v in state_dict.items()}) # Keep metadata in state_dict state_dict._metadata = metadata load_state_dict(mod, state_dict, strict, logger) # if optimizer is not None: # if checkpoint.get('optimizer') is not None: # optimizer.load_state_dict(checkpoint['optimizer']) # # if lr > 0 and reset_lr: # for param_group in optimizer.param_groups: # param_group['lr'] = lr # print_log("loaded checkpoint.optimizer") # load state_dict checkpoint['meta'].setdefault('epoch', 1) checkpoint['meta'].setdefault('iter', 1) checkpoint['meta'].setdefault('best_epoch', 1) checkpoint['meta'].setdefault('best_metric', None) return checkpoint def weights_to_cpu(state_dict): """Copy a model state_dict to cpu. Args: state_dict (OrderedDict): Model weights on GPU. Returns: OrderedDict: Model weights on GPU. """ state_dict_cpu = OrderedDict() for key, val in state_dict.items(): state_dict_cpu[key] = val.cpu() # Keep metadata in state_dict state_dict_cpu._metadata = getattr(state_dict, '_metadata', OrderedDict()) return state_dict_cpu def _save_to_state_dict(module, destination, prefix, keep_vars): """Saves module state to `destination` dictionary. This method is modified from :meth:`torch.nn.Module._save_to_state_dict`. Args: module (nn.Module): The module to generate state_dict. destination (dict): A dict where state will be stored. prefix (str): The prefix for parameters and buffers used in this module. """ for name, param in module._parameters.items(): if param is not None: destination[prefix + name] = param if keep_vars else param.detach() for name, buf in module._buffers.items(): # remove check of _non_persistent_buffers_set to allow nn.BatchNorm2d if buf is not None: destination[prefix + name] = buf if keep_vars else buf.detach() def get_state_dict(module, destination=None, prefix='', keep_vars=False): """Returns a dictionary containing a whole state of the module. Both parameters and persistent buffers (e.g. running averages) are included. Keys are corresponding parameter and buffer names. This method is modified from :meth:`torch.nn.Module.state_dict` to recursively check parallel module in case that the model has a complicated structure, e.g., nn.Module(nn.Module(DDP)). Args: module (nn.Module): The module to generate state_dict. destination (OrderedDict): Returned dict for the state of the module. prefix (str): Prefix of the key. keep_vars (bool): Whether to keep the variable property of the parameters. Default: False. Returns: dict: A dictionary containing a whole state of the module. """ # recursively check parallel module in case that the model has a # complicated structure, e.g., nn.Module(nn.Module(DDP)) if is_module_wrapper(module): module = module.module # below is the same as torch.nn.Module.state_dict() if destination is None: destination = OrderedDict() destination._metadata = OrderedDict() destination._metadata[prefix[:-1]] = local_metadata = dict( version=module._version) _save_to_state_dict(module, destination, prefix, keep_vars) for name, child in module._modules.items(): if child is not None: get_state_dict( child, destination, prefix + name + '.', keep_vars=keep_vars) for hook in module._state_dict_hooks.values(): hook_result = hook(module, destination, prefix, local_metadata) if hook_result is not None: destination = hook_result return destination def save_checkpoint(#model, filename, #optimizer=None, meta=None, file_client_args=None): """Save checkpoint to file. The checkpoint will have 3 fields: ``meta``, ``state_dict`` and ``optimizer``. By default ``meta`` will contain version and time info. Args: model (Module): Module whose params are to be saved. filename (str): Checkpoint filename. optimizer (:obj:`Optimizer`, optional): Optimizer to be saved. meta (dict, optional): Metadata to be saved in checkpoint. file_client_args (dict, optional): Arguments to instantiate a FileClient. See :class:`mmcv.fileio.FileClient` for details. Default: None. `New in version 1.3.16.` """ if meta is None: meta = {} elif not isinstance(meta, dict): raise TypeError(f'meta must be a dict or None, but got {type(meta)}') # meta.update(mmcv_version=mmcv.__version__, time=time.asctime()) if 'model' not in meta: checkpoint = {} for name, sub_meta in meta.items(): model = sub_meta.pop('model') optimizer = sub_meta.pop('optimizer') if is_module_wrapper(model): model = model.module if hasattr(model, 'CLASSES') and model.CLASSES is not None: # save class name to the meta sub_meta.update(CLASSES=model.CLASSES) checkpoint[name] = { 'meta': sub_meta, 'state_dict': weights_to_cpu(get_state_dict(model)) } # save optimizer state dict in the checkpoint if isinstance(optimizer, Optimizer): checkpoint[name]['optimizer'] = optimizer.state_dict() file_client = FileClient.infer_client(file_client_args, filename) with io.BytesIO() as f: torch.save(checkpoint, f) file_client.put(f.getvalue(), filename) else: model = meta.pop('model') optimizer = meta.pop('optimizer') if is_module_wrapper(model): model = model.module if hasattr(model, 'CLASSES') and model.CLASSES is not None: # save class name to the meta meta.update(CLASSES=model.CLASSES) checkpoint = { 'meta': meta, 'state_dict': weights_to_cpu(get_state_dict(model.model)) } # save optimizer state dict in the checkpoint if isinstance(optimizer, Optimizer): checkpoint['optimizer'] = optimizer.state_dict() elif isinstance(optimizer, dict): checkpoint['optimizer'] = {} for name, optim in optimizer.items(): checkpoint['optimizer'][name] = optim.state_dict() if filename.startswith('pavi://'): if file_client_args is not None: raise ValueError( 'file_client_args should be "None" if filename starts with' f'"pavi://", but got {file_client_args}') try: from pavi import exception, modelcloud except ImportError: raise ImportError( 'Please install pavi to load checkpoint from modelcloud.') model_path = filename[7:] root = modelcloud.Folder() model_dir, model_name = osp.split(model_path) try: model = modelcloud.get(model_dir) except exception.NodeNotFoundError: model = root.create_training_model(model_dir) with TemporaryDirectory() as tmp_dir: checkpoint_file = osp.join(tmp_dir, model_name) with open(checkpoint_file, 'wb') as f: torch.save(checkpoint, f) f.flush() model.create_file(checkpoint_file, name=model_name) else: file_client = FileClient.infer_client(file_client_args, filename) with io.BytesIO() as f: torch.save(checkpoint, f) file_client.put(f.getvalue(), filename) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/default_constructor.py ================================================ from .builder import RUNNER_BUILDERS, RUNNERS @RUNNER_BUILDERS.register_module() class DefaultRunnerConstructor: """Default constructor for runners. Custom existing `Runner` like `EpocBasedRunner` though `RunnerConstructor`. For example, We can inject some new properties and functions for `Runner`. Example: >>> from mmcv.runner import RUNNER_BUILDERS, build_runner >>> # Define a new RunnerReconstructor >>> @RUNNER_BUILDERS.register_module() >>> class MyRunnerConstructor: ... def __init__(self, runner_cfg, default_args=None): ... if not isinstance(runner_cfg, dict): ... raise TypeError('runner_cfg should be a dict', ... f'but got {type(runner_cfg)}') ... self.runner_cfg = runner_cfg ... self.default_args = default_args ... ... def __call__(self): ... runner = RUNNERS.build(self.runner_cfg, ... default_args=self.default_args) ... # Add new properties for existing runner ... runner.my_name = 'my_runner' ... runner.my_function = lambda self: print(self.my_name) ... ... >>> # build your runner >>> runner_cfg = dict(type='EpochBasedRunner', max_epochs=40, ... constructor='MyRunnerConstructor') >>> runner = build_runner(runner_cfg) """ def __init__(self, runner_cfg, default_args=None): if not isinstance(runner_cfg, dict): raise TypeError('runner_cfg should be a dict', f'but got {type(runner_cfg)}') self.runner_cfg = runner_cfg self.default_args = default_args def __call__(self): return RUNNERS.build(self.runner_cfg, default_args=self.default_args) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/dist_utils.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import functools import os import subprocess from collections import OrderedDict import torch import torch.multiprocessing as mp from torch import distributed as dist from torch._utils import (_flatten_dense_tensors, _take_tensors, _unflatten_dense_tensors) def init_dist(launcher, backend='nccl', **kwargs): if mp.get_start_method(allow_none=True) is None: mp.set_start_method('spawn') if launcher == 'pytorch': _init_dist_pytorch(backend, **kwargs) elif launcher == 'mpi': _init_dist_mpi(backend, **kwargs) elif launcher == 'slurm': _init_dist_slurm(backend, **kwargs) else: raise ValueError(f'Invalid launcher type: {launcher}') def _init_dist_pytorch(backend, **kwargs): # TODO: use local_rank instead of rank % num_gpus rank = int(os.environ['RANK']) num_gpus = torch.cuda.device_count() torch.cuda.set_device(rank % num_gpus) dist.init_process_group(backend=backend, **kwargs) def _init_dist_mpi(backend, **kwargs): # TODO: use local_rank instead of rank % num_gpus rank = int(os.environ['OMPI_COMM_WORLD_RANK']) num_gpus = torch.cuda.device_count() torch.cuda.set_device(rank % num_gpus) dist.init_process_group(backend=backend, **kwargs) def _init_dist_slurm(backend, port=None): """Initialize slurm distributed training environment. If argument ``port`` is not specified, then the master port will be system environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system environment variable, then a default port ``29500`` will be used. Args: backend (str): Backend of torch.distributed. port (int, optional): Master port. Defaults to None. """ proc_id = int(os.environ['SLURM_PROCID']) ntasks = int(os.environ['SLURM_NTASKS']) node_list = os.environ['SLURM_NODELIST'] num_gpus = torch.cuda.device_count() torch.cuda.set_device(proc_id % num_gpus) addr = subprocess.getoutput( f'scontrol show hostname {node_list} | head -n1') # specify master port if port is not None: os.environ['MASTER_PORT'] = str(port) elif 'MASTER_PORT' in os.environ: pass # use MASTER_PORT in the environment variable else: # 29500 is torch.distributed default port os.environ['MASTER_PORT'] = '29500' # use MASTER_ADDR in the environment variable if it already exists if 'MASTER_ADDR' not in os.environ: os.environ['MASTER_ADDR'] = addr os.environ['WORLD_SIZE'] = str(ntasks) os.environ['LOCAL_RANK'] = str(proc_id % num_gpus) os.environ['RANK'] = str(proc_id) dist.init_process_group(backend=backend) def get_dist_info(): if dist.is_available() and dist.is_initialized(): rank = dist.get_rank() world_size = dist.get_world_size() else: rank = 0 world_size = 1 return rank, world_size def master_only(func): @functools.wraps(func) def wrapper(*args, **kwargs): rank, _ = get_dist_info() if rank == 0: return func(*args, **kwargs) return wrapper def allreduce_params(params, coalesce=True, bucket_size_mb=-1): """Allreduce parameters. Args: params (list[torch.Parameters]): List of parameters or buffers of a model. coalesce (bool, optional): Whether allreduce parameters as a whole. Defaults to True. bucket_size_mb (int, optional): Size of bucket, the unit is MB. Defaults to -1. """ _, world_size = get_dist_info() if world_size == 1: return params = [param.data for param in params] if coalesce: _allreduce_coalesced(params, world_size, bucket_size_mb) else: for tensor in params: dist.all_reduce(tensor.div_(world_size)) def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): """Allreduce gradients. Args: params (list[torch.Parameters]): List of parameters of a model coalesce (bool, optional): Whether allreduce parameters as a whole. Defaults to True. bucket_size_mb (int, optional): Size of bucket, the unit is MB. Defaults to -1. """ grads = [ param.grad.data for param in params if param.requires_grad and param.grad is not None ] _, world_size = get_dist_info() if world_size == 1: return if coalesce: _allreduce_coalesced(grads, world_size, bucket_size_mb) else: for tensor in grads: dist.all_reduce(tensor.div_(world_size)) def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): if bucket_size_mb > 0: bucket_size_bytes = bucket_size_mb * 1024 * 1024 buckets = _take_tensors(tensors, bucket_size_bytes) else: buckets = OrderedDict() for tensor in tensors: tp = tensor.type() if tp not in buckets: buckets[tp] = [] buckets[tp].append(tensor) buckets = buckets.values() for bucket in buckets: flat_tensors = _flatten_dense_tensors(bucket) dist.all_reduce(flat_tensors) flat_tensors.div_(world_size) for tensor, synced in zip( bucket, _unflatten_dense_tensors(flat_tensors, bucket)): tensor.copy_(synced) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/epoch_based_runner.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import os import platform import shutil import time import warnings import time import datetime import torch import mmcv from .base_runner import BaseRunner from .builder import RUNNERS from .checkpoint import save_checkpoint from .utils import get_host_info from mmcv.utils.logging import print_log from .record import MetricLogger, get_grad_norm @RUNNERS.register_module() class EpochBasedRunner(BaseRunner): """Epoch-based Runner. This runner train models epoch by epoch. """ def run_iter(self, data_batch, train_mode, **kwargs): if self.batch_processor is not None: outputs = self.batch_processor( self.model, data_batch, train_mode=train_mode, **kwargs) elif train_mode: outputs = self.model.train_step(data_batch, self.optimizer, **kwargs) # if not isinstance(self.model, dict): # outputs = self.model.train_step(data_batch, self.optimizer, # **kwargs) # else: # outputs = {} # for name in self.model.keys(): # outputs.update(self.model[name].train_step(data_batch, self.optimizer, # **kwargs)) else: outputs = self.model.val_step(data_batch, self.optimizer, **kwargs) if not isinstance(outputs, dict): raise TypeError('"batch_processor()" or "model.train_step()"' 'and "model.val_step()" must return a dict') if 'log_vars' in outputs: self.log_buffer.update_dict(outputs['log_vars']) # {'loss': loss, 'log_vars': {'loss': loss, 'metric_1': ..., 'metric_2': ....} } self.outputs = outputs def train(self, data_loader, **kwargs): if hasattr(self.model, 'train'): self.model.train() elif isinstance(self.model.model, dict): for name in self.model.model.keys(): self.model.model[name].train() else: self.model.model.train() # if not isinstance(self.model, dict): # self.model.train() # else: # for name in self.model.keys(): # self.model[name].train() self.mode = 'train' self.data_loader = data_loader self._max_iters = self._max_epochs * len(self.data_loader) self.call_hook('before_train_epoch') time.sleep(2) # Prevent possible deadlock during epoch transition for i, data_batch in enumerate(self.data_loader): self._inner_iter = i self.call_hook('before_train_iter') self.run_iter(data_batch, train_mode=True, **kwargs) self.call_hook('after_train_iter') self._iter += 1 self.metrics = {k: meter.avg for k, meter in self.log_buffer.meters.items()} self.call_hook('after_train_epoch') self._epoch += 1 def simple_train(self, data_loader, **kwargs): optimizer = self.optimizer accumulated_step = self.opt_cfg.get('accumulated_step', 1) clip_max_norm = self.opt_cfg.get('clip_max_norm', 0) print_freq = self.opt_cfg.get('print_freq', 1) nni = self.opt_cfg.get('nni', None) self.model.train() self.mode = 'train' self.data_loader = data_loader self._max_iters = self._max_epochs * len(self.data_loader) # metric_logger = MetricLogger(delimiter=" ", dist_print=0, logger=self.logger) header = 'Epoch: [{}]'.format(self._epoch) print_freq = len(data_loader) if print_freq <= 0 else print_freq metric_logger = self.log_buffer for data_batch, idx in metric_logger.log_every(data_loader, print_freq, header): self._inner_iter = idx self.run_iter(data_batch, train_mode=True, **kwargs) losses = self.outputs['loss'] / accumulated_step losses.backward() if clip_max_norm > 0: grad_norm = torch.nn.utils.clip_grad_norm_(self.model.parameters(), clip_max_norm) else: grad_norm = get_grad_norm(self.model.parameters()) if idx % accumulated_step == 0: optimizer.step() optimizer.zero_grad() metric_logger.update(lr=optimizer.param_groups[0]["lr"]) metric_logger.update(grad_norm=grad_norm) metric_logger.update_dict(self.outputs['log_vars']) self._iter += 1 self.metrics = {k: meter.avg for k, meter in metric_logger.meters.items()} self.call_hook('after_train_epoch') metric_logger.clear() self._epoch += 1 if nni is not None: nni.report_intermediate_result( {name: value for name, value in self.metrics.items() if self.opt_cfg.metrics in name}) @torch.no_grad() def simple_val(self, data_loader, **kwargs): # 用IterBasedRunner是否会更统一? # 如果要更进一步整合,应该变成eval_hook,但这是一个simple case self.model.eval() self.mode = 'val' opt_cfg = self.opt_cfg save_fmt = opt_cfg['save_fmt'] # metric_logger = MetricLogger(dist_print=0, delimiter=" ", logger=self.logger) metric_logger = self.log_buffer header = 'TestEpoch: [{0}]'.format(self.epoch - 1) save_dir = os.path.join(self.work_dir, f"{opt_cfg['dataset']}") if save_fmt and self._epoch == 1: os.makedirs(save_dir, exist_ok=True) for batch, idx in metric_logger.log_every(data_loader, 1, header): metrics = self.model.val_step(batch, save_dir, idx=idx, save_fmt=save_fmt, filename=batch.get('filename', None)) # self.run_iter() metric_logger.update_dict(metrics) stats = {k: meter.avg for k, meter in metric_logger.meters.items()} if opt_cfg['mode'] == 'nni': self.nni.report_final_result({name: value for name, value in stats.items() if opt_cfg['metrics'] in name}) # 仅进行验证时触发,结束while metric_logger.clear() if not self.flag: self._epoch += 1 @torch.no_grad() def val(self, data_loader, **kwargs): if hasattr(self.model, 'eval'): self.model.eval() elif isinstance(self.model.model, dict): for name in self.model.model.keys(): self.model.model[name].eval() else: self.model.model.eval() self.mode = 'val' self.data_loader = data_loader self.call_hook('before_val_epoch') time.sleep(2) # Prevent possible deadlock during epoch transition tic = time.time() for i, data_batch in enumerate(self.data_loader): self._inner_iter = i self.call_hook('before_val_iter') self.run_iter(data_batch, train_mode=False, idx=i, img_range=self.opt_cfg['img_range'], save_fmt=self.opt_cfg['save_fmt'], filename=data_batch.get('filename', [None])[0], save_dir=self.save_dir) self.call_hook('after_val_iter') print("test time:", time.time() - tic) self.call_hook('after_val_epoch') if self.opt_cfg['eval']: self._epoch += 1 def run(self, data_loaders, workflow, max_epochs=None, **kwargs): """Start running. Args: data_loaders (list[:obj:`DataLoader`]): Dataloaders for training and validation. workflow (list[tuple]): A list of (phase, epochs) to specify the running order and epochs. E.g, [('train', 2), ('val', 1)] means running 2 epochs for training and 1 epoch for validation, iteratively. """ assert isinstance(data_loaders, dict) assert mmcv.is_list_of(workflow, tuple) assert len(data_loaders) == len(workflow), print_log(f"{len(data_loaders)} == {len(workflow)}") if max_epochs is not None: warnings.warn( 'setting max_epochs in run is deprecated, ' 'please set max_epochs in runner_config', DeprecationWarning) self._max_epochs = max_epochs assert self._max_epochs is not None, ( 'max_epochs must be specified during instantiation') self.flag = any('train' in mode for mode, _ in workflow) self.workflow = workflow self.data_length = 1 for i, flow in enumerate(workflow): mode, epochs = flow if mode == 'train': self._max_iters = self._max_epochs * len(data_loaders[mode]) self.data_length = len(data_loaders[mode]) break work_dir = self.work_dir if self.work_dir is not None else 'NONE' print_log(f'Start running, host: {get_host_info()}, work_dir: {work_dir}', logger=self.logger) print_log(f'Hooks will be executed in the following order:\n{self.get_hook_info()}', logger=self.logger) print_log(f'workflow: {workflow}, max: {self._max_epochs} epochs', logger=self.logger) self.call_hook('before_run') tic = time.time() print_freq = self.opt_cfg.get('print_freq', 1) # from 1 to self._max_epochs, not from 0 while self.epoch <= self._max_epochs: for i, flow in enumerate(workflow): mode, epochs = flow if isinstance(mode, str): # self.train() if not hasattr(self, mode): raise ValueError( f'runner has no method named "{mode}" to run an ' 'epoch') epoch_runner = getattr(self, mode) else: raise TypeError( 'mode in workflow must be a str, but got {}'.format( type(mode))) for epoch in range(epochs): if mode == 'train' and self.epoch >= self._max_epochs: break epoch_runner(data_loaders[mode], **kwargs) if self.earlyStop: print_log("model train has diverged, python will stop training", logger=self.logger) break time.sleep(1) # wait for some hooks like loggers to finish self.call_hook('after_run') total_time = time.time() - tic total_time_str = str(datetime.timedelta(seconds=int(total_time))) print_log('Training time {}'.format(total_time_str), logger=self.logger) def save_checkpoint(self, out_dir, filename_tmpl='epoch_{}.pth', save_optimizer=True, meta=None, create_symlink=True): """Save the checkpoint. Args: out_dir (str): The directory that checkpoints are saved. filename_tmpl (str, optional): The checkpoint filename template, which contains a placeholder for the epoch number. Defaults to 'epoch_{}.pth'. save_optimizer (bool, optional): Whether to save the optimizer to the checkpoint. Defaults to True. meta (dict, optional): The meta information to be saved in the checkpoint. Defaults to None. create_symlink (bool, optional): Whether to create a symlink "latest.pth" to point to the latest checkpoint. Defaults to True. """ if meta is None: meta = {} elif not isinstance(meta, dict): raise TypeError( f'meta should be a dict or None, but got {type(meta)}') if self.meta is not None: meta.update(self.meta) # Note: meta.update(self.meta) should be done before # meta.update(epoch=self.epoch + 1, iter=self.iter) otherwise # there will be problems with resumed checkpoints. # More details in https://github.com/open-mmlab/mmcv/pull/1108 meta.update(epoch=self.epoch + 1, iter=self.iter) filename = filename_tmpl.format(self.epoch + 1) filepath = os.path.join(out_dir, filename) optimizer = self.optimizer if save_optimizer else None save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) # in some environments, `os.symlink` is not supported, you may need to # set `create_symlink` to False if create_symlink: dst_file = os.path.join(out_dir, 'latest.pth') if platform.system() != 'Windows': mmcv.symlink(filename, dst_file) else: shutil.copy(filepath, dst_file) @RUNNERS.register_module() class Runner(EpochBasedRunner): """Deprecated name of EpochBasedRunner.""" def __init__(self, *args, **kwargs): warnings.warn( 'Runner was deprecated, please use EpochBasedRunner instead', DeprecationWarning) super().__init__(*args, **kwargs) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/fp16_utils.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import functools import warnings from collections import abc from inspect import getfullargspec import numpy as np import torch import torch.nn as nn from mmcv.utils import TORCH_VERSION, digit_version from .dist_utils import allreduce_grads as _allreduce_grads try: # If PyTorch version >= 1.6.0, torch.cuda.amp.autocast would be imported # and used; otherwise, auto fp16 will adopt mmcv's implementation. # Note that when PyTorch >= 1.6.0, we still cast tensor types to fp16 # manually, so the behavior may not be consistent with real amp. from torch.cuda.amp import autocast except ImportError: pass def cast_tensor_type(inputs, src_type, dst_type): """Recursively convert Tensor in inputs from src_type to dst_type. Note: In v1.4.4 and later, ``cast_tersor_type`` will only convert the torch.Tensor which is consistent with ``src_type`` to the ``dst_type``. Before v1.4.4, it ignores the ``src_type`` argument, leading to some potential problems. For example, ``cast_tensor_type(inputs, torch.float, torch.half)`` will convert all tensors in inputs to ``torch.half`` including those originally in ``torch.Int`` or other types, which is not expected. Args: inputs: Inputs that to be casted. src_type (torch.dtype): Source type.. dst_type (torch.dtype): Destination type. Returns: The same type with inputs, but all contained Tensors have been cast. """ if isinstance(inputs, nn.Module): return inputs elif isinstance(inputs, torch.Tensor): # we need to ensure that the type of inputs to be casted are the same # as the argument `src_type`. return inputs.to(dst_type) if inputs.dtype == src_type else inputs elif isinstance(inputs, str): return inputs elif isinstance(inputs, np.ndarray): return inputs elif isinstance(inputs, abc.Mapping): return type(inputs)({ k: cast_tensor_type(v, src_type, dst_type) for k, v in inputs.items() }) elif isinstance(inputs, abc.Iterable): return type(inputs)( cast_tensor_type(item, src_type, dst_type) for item in inputs) else: return inputs def auto_fp16(apply_to=None, out_fp32=False): """Decorator to enable fp16 training automatically. This decorator is useful when you write custom modules and want to support mixed precision training. If inputs arguments are fp32 tensors, they will be converted to fp16 automatically. Arguments other than fp32 tensors are ignored. If you are using PyTorch >= 1.6, torch.cuda.amp is used as the backend, otherwise, original mmcv implementation will be adopted. Args: apply_to (Iterable, optional): The argument names to be converted. `None` indicates all arguments. out_fp32 (bool): Whether to convert the output back to fp32. Example: >>> import torch.nn as nn >>> class MyModule1(nn.Module): >>> >>> # Convert x and y to fp16 >>> @auto_fp16() >>> def forward(self, x, y): >>> pass >>> import torch.nn as nn >>> class MyModule2(nn.Module): >>> >>> # convert pred to fp16 >>> @auto_fp16(apply_to=('pred', )) >>> def do_something(self, pred, others): >>> pass """ def auto_fp16_wrapper(old_func): @functools.wraps(old_func) def new_func(*args, **kwargs): # check if the module has set the attribute `fp16_enabled`, if not, # just fallback to the original method. if not isinstance(args[0], torch.nn.Module): raise TypeError('@auto_fp16 can only be used to decorate the ' 'method of nn.Module') if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled): return old_func(*args, **kwargs) # get the arg spec of the decorated method args_info = getfullargspec(old_func) # get the argument names to be casted args_to_cast = args_info.args if apply_to is None else apply_to # convert the args that need to be processed new_args = [] # NOTE: default args are not taken into consideration if args: arg_names = args_info.args[:len(args)] for i, arg_name in enumerate(arg_names): if arg_name in args_to_cast: new_args.append( cast_tensor_type(args[i], torch.float, torch.half)) else: new_args.append(args[i]) # convert the kwargs that need to be processed new_kwargs = {} if kwargs: for arg_name, arg_value in kwargs.items(): if arg_name in args_to_cast: new_kwargs[arg_name] = cast_tensor_type( arg_value, torch.float, torch.half) else: new_kwargs[arg_name] = arg_value # apply converted arguments to the decorated method if (TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) >= digit_version('1.6.0')): with autocast(enabled=True): output = old_func(*new_args, **new_kwargs) else: output = old_func(*new_args, **new_kwargs) # cast the results back to fp32 if necessary if out_fp32: output = cast_tensor_type(output, torch.half, torch.float) return output return new_func return auto_fp16_wrapper def force_fp32(apply_to=None, out_fp16=False): """Decorator to convert input arguments to fp32 in force. This decorator is useful when you write custom modules and want to support mixed precision training. If there are some inputs that must be processed in fp32 mode, then this decorator can handle it. If inputs arguments are fp16 tensors, they will be converted to fp32 automatically. Arguments other than fp16 tensors are ignored. If you are using PyTorch >= 1.6, torch.cuda.amp is used as the backend, otherwise, original mmcv implementation will be adopted. Args: apply_to (Iterable, optional): The argument names to be converted. `None` indicates all arguments. out_fp16 (bool): Whether to convert the output back to fp16. Example: >>> import torch.nn as nn >>> class MyModule1(nn.Module): >>> >>> # Convert x and y to fp32 >>> @force_fp32() >>> def loss(self, x, y): >>> pass >>> import torch.nn as nn >>> class MyModule2(nn.Module): >>> >>> # convert pred to fp32 >>> @force_fp32(apply_to=('pred', )) >>> def post_process(self, pred, others): >>> pass """ def force_fp32_wrapper(old_func): @functools.wraps(old_func) def new_func(*args, **kwargs): # check if the module has set the attribute `fp16_enabled`, if not, # just fallback to the original method. if not isinstance(args[0], torch.nn.Module): raise TypeError('@force_fp32 can only be used to decorate the ' 'method of nn.Module') if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled): return old_func(*args, **kwargs) # get the arg spec of the decorated method args_info = getfullargspec(old_func) # get the argument names to be casted args_to_cast = args_info.args if apply_to is None else apply_to # convert the args that need to be processed new_args = [] if args: arg_names = args_info.args[:len(args)] for i, arg_name in enumerate(arg_names): if arg_name in args_to_cast: new_args.append( cast_tensor_type(args[i], torch.half, torch.float)) else: new_args.append(args[i]) # convert the kwargs that need to be processed new_kwargs = dict() if kwargs: for arg_name, arg_value in kwargs.items(): if arg_name in args_to_cast: new_kwargs[arg_name] = cast_tensor_type( arg_value, torch.half, torch.float) else: new_kwargs[arg_name] = arg_value # apply converted arguments to the decorated method if (TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) >= digit_version('1.6.0')): with autocast(enabled=False): output = old_func(*new_args, **new_kwargs) else: output = old_func(*new_args, **new_kwargs) # cast the results back to fp32 if necessary if out_fp16: output = cast_tensor_type(output, torch.float, torch.half) return output return new_func return force_fp32_wrapper def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): warnings.warning( '"mmcv.runner.fp16_utils.allreduce_grads" is deprecated, and will be ' 'removed in v2.8. Please switch to "mmcv.runner.allreduce_grads', DeprecationWarning) _allreduce_grads(params, coalesce=coalesce, bucket_size_mb=bucket_size_mb) def wrap_fp16_model(model): """Wrap the FP32 model to FP16. If you are using PyTorch >= 1.6, torch.cuda.amp is used as the backend, otherwise, original mmcv implementation will be adopted. For PyTorch >= 1.6, this function will 1. Set fp16 flag inside the model to True. Otherwise: 1. Convert FP32 model to FP16. 2. Remain some necessary layers to be FP32, e.g., normalization layers. 3. Set `fp16_enabled` flag inside the model to True. Args: model (nn.Module): Model in FP32. """ if (TORCH_VERSION == 'parrots' or digit_version(TORCH_VERSION) < digit_version('1.6.0')): # convert model to fp16 model.half() # patch the normalization layers to make it work in fp32 mode patch_norm_fp32(model) # set `fp16_enabled` flag for m in model.modules(): if hasattr(m, 'fp16_enabled'): m.fp16_enabled = True def patch_norm_fp32(module): """Recursively convert normalization layers from FP16 to FP32. Args: module (nn.Module): The modules to be converted in FP16. Returns: nn.Module: The converted module, the normalization layers have been converted to FP32. """ if isinstance(module, (nn.modules.batchnorm._BatchNorm, nn.GroupNorm)): module.float() if isinstance(module, nn.GroupNorm) or torch.__version__ < '1.3': module.forward = patch_forward_method(module.forward, torch.half, torch.float) for child in module.children(): patch_norm_fp32(child) return module def patch_forward_method(func, src_type, dst_type, convert_output=True): """Patch the forward method of a module. Args: func (callable): The original forward method. src_type (torch.dtype): Type of input arguments to be converted from. dst_type (torch.dtype): Type of input arguments to be converted to. convert_output (bool): Whether to convert the output back to src_type. Returns: callable: The patched forward method. """ def new_forward(*args, **kwargs): output = func(*cast_tensor_type(args, src_type, dst_type), **cast_tensor_type(kwargs, src_type, dst_type)) if convert_output: output = cast_tensor_type(output, dst_type, src_type) return output return new_forward class LossScaler: """Class that manages loss scaling in mixed precision training which supports both dynamic or static mode. The implementation refers to https://github.com/NVIDIA/apex/blob/master/apex/fp16_utils/loss_scaler.py. Indirectly, by supplying ``mode='dynamic'`` for dynamic loss scaling. It's important to understand how :class:`LossScaler` operates. Loss scaling is designed to combat the problem of underflowing gradients encountered at long times when training fp16 networks. Dynamic loss scaling begins by attempting a very high loss scale. Ironically, this may result in OVERflowing gradients. If overflowing gradients are encountered, :class:`FP16_Optimizer` then skips the update step for this particular iteration/minibatch, and :class:`LossScaler` adjusts the loss scale to a lower value. If a certain number of iterations occur without overflowing gradients detected,:class:`LossScaler` increases the loss scale once more. In this way :class:`LossScaler` attempts to "ride the edge" of always using the highest loss scale possible without incurring overflow. Args: init_scale (float): Initial loss scale value, default: 2**32. scale_factor (float): Factor used when adjusting the loss scale. Default: 2. mode (str): Loss scaling mode. 'dynamic' or 'static' scale_window (int): Number of consecutive iterations without an overflow to wait before increasing the loss scale. Default: 1000. """ def __init__(self, init_scale=2**32, mode='dynamic', scale_factor=2., scale_window=1000): self.cur_scale = init_scale self.cur_iter = 0 assert mode in ('dynamic', 'static'), 'mode can only be dynamic or static' self.mode = mode self.last_overflow_iter = -1 self.scale_factor = scale_factor self.scale_window = scale_window def has_overflow(self, params): """Check if params contain overflow.""" if self.mode != 'dynamic': return False for p in params: if p.grad is not None and LossScaler._has_inf_or_nan(p.grad.data): return True return False def _has_inf_or_nan(x): """Check if params contain NaN.""" try: cpu_sum = float(x.float().sum()) except RuntimeError as instance: if 'value cannot be converted' not in instance.args[0]: raise return True else: if cpu_sum == float('inf') or cpu_sum == -float('inf') \ or cpu_sum != cpu_sum: return True return False def update_scale(self, overflow): """update the current loss scale value when overflow happens.""" if self.mode != 'dynamic': return if overflow: self.cur_scale = max(self.cur_scale / self.scale_factor, 1) self.last_overflow_iter = self.cur_iter else: if (self.cur_iter - self.last_overflow_iter) % \ self.scale_window == 0: self.cur_scale *= self.scale_factor self.cur_iter += 1 def state_dict(self): """Returns the state of the scaler as a :class:`dict`.""" return dict( cur_scale=self.cur_scale, cur_iter=self.cur_iter, mode=self.mode, last_overflow_iter=self.last_overflow_iter, scale_factor=self.scale_factor, scale_window=self.scale_window) def load_state_dict(self, state_dict): """Loads the loss_scaler state dict. Args: state_dict (dict): scaler state. """ self.cur_scale = state_dict['cur_scale'] self.cur_iter = state_dict['cur_iter'] self.mode = state_dict['mode'] self.last_overflow_iter = state_dict['last_overflow_iter'] self.scale_factor = state_dict['scale_factor'] self.scale_window = state_dict['scale_window'] @property def loss_scale(self): return self.cur_scale ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/__init__.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from .checkpoint import CheckpointHook from .closure import ClosureHook from .ema import EMAHook from .evaluation import DistEvalHook, EvalHook from .hook import HOOKS, Hook from .iter_timer import IterTimerHook from .logger import (DvcliveLoggerHook, LoggerHook, MlflowLoggerHook, NeptuneLoggerHook, PaviLoggerHook, TensorboardLoggerHook, TextLoggerHook, WandbLoggerHook) from .lr_updater import (CosineAnnealingLrUpdaterHook, CosineRestartLrUpdaterHook, CyclicLrUpdaterHook, ExpLrUpdaterHook, FixedLrUpdaterHook, FlatCosineAnnealingLrUpdaterHook, InvLrUpdaterHook, LrUpdaterHook, OneCycleLrUpdaterHook, PolyLrUpdaterHook, StepLrUpdaterHook) from .memory import EmptyCacheHook from .momentum_updater import (CosineAnnealingMomentumUpdaterHook, CyclicMomentumUpdaterHook, MomentumUpdaterHook, OneCycleMomentumUpdaterHook, StepMomentumUpdaterHook) from .optimizer import (Fp16OptimizerHook, GradientCumulativeFp16OptimizerHook, GradientCumulativeOptimizerHook, OptimizerHook) from .profiler import ProfilerHook from .sampler_seed import DistSamplerSeedHook from .sync_buffer import SyncBuffersHook __all__ = [ 'HOOKS', 'Hook', 'CheckpointHook', 'ClosureHook', 'LrUpdaterHook', 'FixedLrUpdaterHook', 'StepLrUpdaterHook', 'ExpLrUpdaterHook', 'PolyLrUpdaterHook', 'InvLrUpdaterHook', 'CosineAnnealingLrUpdaterHook', 'FlatCosineAnnealingLrUpdaterHook', 'CosineRestartLrUpdaterHook', 'CyclicLrUpdaterHook', 'OneCycleLrUpdaterHook', 'OptimizerHook', 'Fp16OptimizerHook', 'IterTimerHook', 'DistSamplerSeedHook', 'EmptyCacheHook', 'LoggerHook', 'MlflowLoggerHook', 'PaviLoggerHook', 'TextLoggerHook', 'TensorboardLoggerHook', 'NeptuneLoggerHook', 'WandbLoggerHook', 'DvcliveLoggerHook', 'MomentumUpdaterHook', 'StepMomentumUpdaterHook', 'CosineAnnealingMomentumUpdaterHook', 'CyclicMomentumUpdaterHook', 'OneCycleMomentumUpdaterHook', 'SyncBuffersHook', 'EMAHook', 'EvalHook', 'DistEvalHook', 'ProfilerHook', 'GradientCumulativeOptimizerHook', 'GradientCumulativeFp16OptimizerHook' ] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/checkpoint.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import os.path as osp import warnings from mmcv.utils.logging import print_log from mmcv.fileio import FileClient from ..dist_utils import allreduce_params, master_only from .hook import HOOKS, Hook from math import inf import os import re from ..checkpoint import save_checkpoint, get_best_k_model import platform import mmcv import shutil @HOOKS.register_module() class CheckpointHook(Hook): """Save checkpoints periodically. Args: interval (int): The saving period. If ``by_epoch=True``, interval indicates epochs, otherwise it indicates iterations. Default: -1, which means "never". by_epoch (bool): Saving checkpoints by epoch or by iteration. Default: True. save_optimizer (bool): Whether to save optimizer state_dict in the checkpoint. It is usually used for resuming experiments. Default: True. out_dir (str, optional): The root directory to save checkpoints. If not specified, ``runner.work_dir`` will be used by default. If specified, the ``out_dir`` will be the concatenation of ``out_dir`` and the last level directory of ``runner.work_dir``. `Changed in version 1.3.16.` max_keep_ckpts (int, optional): The maximum checkpoints to keep. In some cases we want only the latest few checkpoints and would like to delete old ones to save the disk space. Default: -1, which means unlimited. save_last (bool, optional): Whether to force the last checkpoint to be saved regardless of interval. Default: True. sync_buffer (bool, optional): Whether to synchronize buffers in different gpus. Default: False. file_client_args (dict, optional): Arguments to instantiate a FileClient. See :class:`mmcv.fileio.FileClient` for details. Default: None. `New in version 1.3.16.` .. warning:: Before v1.3.16, the ``out_dir`` argument indicates the path where the checkpoint is stored. However, since v1.3.16, ``out_dir`` indicates the root directory and the final path to save checkpoint is the concatenation of ``out_dir`` and the last level directory of ``runner.work_dir``. Suppose the value of ``out_dir`` is "/path/of/A" and the value of ``runner.work_dir`` is "/path/of/B", then the final path will be "/path/of/A/B". """ def __init__(self, interval=-1, by_epoch=True, save_optimizer=True, out_dir=None, max_keep_ckpts=-1, save_last=True, sync_buffer=False, file_client_args=None, **kwargs): self.interval = interval self.by_epoch = by_epoch self.save_optimizer = save_optimizer self.out_dir = out_dir self.max_keep_ckpts = max_keep_ckpts self.save_last = save_last self.args = kwargs self.sync_buffer = sync_buffer self.file_client_args = file_client_args def before_run(self, runner): if not self.out_dir: self.out_dir = runner.work_dir self.file_client = FileClient.infer_client(self.file_client_args, self.out_dir) # if `self.out_dir` is not equal to `runner.work_dir`, it means that # `self.out_dir` is set so the final `self.out_dir` is the # concatenation of `self.out_dir` and the last level directory of # `runner.work_dir` if self.out_dir != runner.work_dir: basename = osp.basename(runner.work_dir.rstrip(osp.sep)) self.out_dir = self.file_client.join_path(self.out_dir, basename) print_log((f'Checkpoints will be saved to {self.out_dir} by ' f'{self.file_client.name}.'), logger=runner.logger) # disable the create_symlink option because some file backends do not # allow to create a symlink if 'create_symlink' in self.args: if self.args[ 'create_symlink'] and not self.file_client.allow_symlink: self.args['create_symlink'] = False warnings.warn( ('create_symlink is set as True by the user but is changed' 'to be False because creating symbolic link is not ' f'allowed in {self.file_client.name}')) else: self.args['create_symlink'] = self.file_client.allow_symlink def after_train_epoch(self, runner): if not self.by_epoch: return # save checkpoint for following cases: # 1. every ``self.interval`` epochs # 2. reach the last epoch of training if self.every_n_epochs( runner, self.interval) or (self.save_last and self.is_last_epoch(runner)): print_log( f'Saving checkpoint at {runner.epoch + 1} epochs', logger=runner.logger) if self.sync_buffer: allreduce_params(runner.model.buffers()) self._save_checkpoint(runner) @master_only def _save_checkpoint(self, runner): """Save the current checkpoint and delete unwanted checkpoint.""" runner.save_checkpoint( self.out_dir, save_optimizer=self.save_optimizer, **self.args) if runner.meta is not None: if self.by_epoch: cur_ckpt_filename = self.args.get( 'filename_tmpl', 'epoch_{}.pth').format(runner.epoch + 1) else: cur_ckpt_filename = self.args.get( 'filename_tmpl', 'iter_{}.pth').format(runner.iter + 1) runner.meta.setdefault('hook_msgs', dict()) runner.meta['hook_msgs']['last_ckpt'] = self.file_client.join_path( self.out_dir, cur_ckpt_filename) # remove other checkpoints if self.max_keep_ckpts > 0: if self.by_epoch: name = 'epoch_{}.pth' current_ckpt = runner.epoch + 1 else: name = 'iter_{}.pth' current_ckpt = runner.iter + 1 redundant_ckpts = range( current_ckpt - self.max_keep_ckpts * self.interval, 0, -self.interval) filename_tmpl = self.args.get('filename_tmpl', name) for _step in redundant_ckpts: ckpt_path = self.file_client.join_path( self.out_dir, filename_tmpl.format(_step)) if self.file_client.isfile(ckpt_path): self.file_client.remove(ckpt_path) else: break def after_train_iter(self, runner): if self.by_epoch: return # save checkpoint for following cases: # 1. every ``self.interval`` iterations # 2. reach the last iteration of training if self.every_n_iters( runner, self.interval) or (self.save_last and self.is_last_iter(runner)): print_log( f'Saving checkpoint at {runner.iter + 1} iterations', logger=runner.logger) if self.sync_buffer: allreduce_params(runner.model.buffers()) self._save_checkpoint(runner) @HOOKS.register_module() class ModelCheckpoint(Hook): rule_map = {'greater': lambda x, y: x >= y, 'less': lambda x, y: x <= y} indicator_rule_map = {'greater': lambda x, y: max(x, y), 'less': lambda x, y: min(x, y)} _default_greater_keys = [ 'acc', 'top', 'AR@', 'auc', 'precision', 'mAP', 'mDice', 'mIoU', 'mAcc', 'aAcc', 'psnr', 'ssim', 'q' ] _default_best_prec1 = {'greater': -inf, 'less': inf} _default_less_keys = ['loss', 'sam', 'ergas'] def __init__(self, indicator: str, formatter_filename="model_best_{epoch},{best_metric}", print_freq=1, save_top_k: int=1, greater_keys=None, less_keys=None, best_prec1=None, best_epoch=0, sync_buffer=False): ''' Args: save_interval: save_top_k: ``save_top_k == k``, if ``save_top_k == 0``, no models are saved. if ``save_top_k == -1``, all models are saved. Please note that the monitors are checked every ``every_n_epochs`` epochs. Returns: ''' self.best_epoch = best_epoch self.print_freq = print_freq self.save_top_k = save_top_k self.sync_buffer = sync_buffer self.indicator = 'top-1' if indicator == 'top' else indicator self.formatter_filename = formatter_filename # indicator_lc = indicator.lower() if greater_keys is None: greater_keys = ModelCheckpoint._default_greater_keys else: if not isinstance(greater_keys, (list, tuple)): greater_keys = (greater_keys,) # assert is_seq_of(greater_keys, str) greater_keys = [key.lower() for key in greater_keys] if less_keys is None: less_keys = self._default_less_keys else: if not isinstance(less_keys, (list, tuple)): less_keys = (less_keys,) # assert is_seq_of(less_keys, str) less_keys = [key.lower() for key in less_keys] if indicator in greater_keys: rule = 'greater' elif indicator in less_keys: rule = 'less' elif any(key in indicator for key in greater_keys): rule = 'greater' elif any(key in indicator for key in less_keys): rule = 'less' else: raise ValueError(f'Cannot infer the rule for key ' f'{indicator}, thus a specific rule ' f'must be specified.') self.best_prec1 = self._default_best_prec1[rule] if best_prec1 is None else best_prec1 self.compare_func = self.rule_map[rule] self.indicator_func = self.indicator_rule_map[rule] self.rule = rule def before_run(self, runner): self.save_model_path = runner.work_dir self.ckpt = os.path.join(self.save_model_path, 'checkpoint') os.makedirs(self.save_model_path, exist_ok=True) print_log(f'Checkpoints will be saved to {self.save_model_path}', logger=runner.logger) def earlyStopping(self, avg_grad_norm): if avg_grad_norm > 100: return True def after_train_epoch(self, runner): if self.sync_buffer: allreduce_params(runner.model.buffers()) metrics = runner.metrics# metrics = {k: meter.avg for k, meter in runner.log_buffer.meters.items()} runner.earlyStop = self.earlyStopping(metrics.get('grad_norm', 0)) self.save_checkpoint(runner, metrics) # print_log(' * Best training metrics so far@ {best_metric} in epoch {best_epoch}'.format( # best_metric=metrics['best_metric'], best_epoch=metrics['best_epoch']), logger=runner.logger) def _save_checkpoint(self, meta, out_dir, filename, is_best, create_symlink=True): if meta is None: meta = {} elif not isinstance(meta, dict): raise TypeError( f'meta should be a dict or None, but got {type(meta)}') # meta.update(epoch=meta.pop('epoch') + 1, iter=meta.pop('iter')) filepath = os.path.join(out_dir, filename) # save_checkpoint(meta.pop('model'), filepath, optimizer=meta.pop('optimizer'), meta=meta) save_checkpoint(filepath, meta=meta) if create_symlink or is_best: dst_file = os.path.join(out_dir, 'model_best_.pth') if platform.system() != 'Windows': mmcv.symlink(filename, dst_file) else: shutil.copy(filepath, dst_file) @master_only def save_checkpoint(self, runner, metrics): flag = False if not hasattr(runner.model, 'train') and isinstance(runner.model.model, dict): flag = True stats = {} for k, m in runner.model.model.items(): stats[k] = { 'epoch': runner.epoch, 'iter': runner.iter, 'model': m, 'best_metric': {name: value for name, value in metrics.items() if name not in ['grad_norm', 'lr', 'time', 'data_time']}, # 保存多个metric的数值, 实际比较的时候还是只有一个 'loss': metrics['loss'], 'best_epoch': runner._epoch, 'optimizer': runner.optimizer[k] } runner.metrics.update( {'best_metric': {k: stats[k]['best_metric']}, 'best_epoch': {k: stats[k]['best_epoch']}}) else: stats = { 'epoch': runner.epoch, 'iter': runner.iter, 'model': runner.model, 'best_metric': {name: value for name, value in metrics.items() if name not in ['grad_norm', 'lr', 'time', 'data_time']}, # 保存多个metric的数值, 实际比较的时候还是只有一个 'loss': metrics['loss'], 'best_epoch': runner._epoch, 'optimizer': runner.optimizer } runner.metrics.update(best_metric=stats['best_metric'], best_epoch=stats['best_epoch']) new_best_k_model_flag = [] indicator = self.indicator save_top_k = self.save_top_k # stats 应当是{epoch: X, score: Y} -> [epoch, score] assert isinstance(stats, dict), print(f"stats in model_checkpoint should be dict but be {type(stats)}") # stats = list(stats.values()) best_k_model, _ = get_best_k_model(self.save_model_path + "/checkpoint", indicator) # print(best_k_model) if save_top_k < 0: raise ValueError(f"Invalid value for save_top_k={save_top_k}. Must be >= 0") if save_top_k == 0: stats['best_metric'] = self._default_best_prec1[self.rule] stats['best_epoch'] = 0 self._save_checkpoint(stats, self.save_model_path, is_best=False, filename=f"{stats['epoch']}.pth.tar") if save_top_k >= 1: # self.best_prec1 = self.indicator_func(self.best_prec1, stats[self.indicator]) if len(best_k_model) >= save_top_k: # reverse=True, 降序, default: False # 使用索引去对best_k_model进行排序,best_k_model应是列表,才能返回索引 best_k_model.append([stats['epoch'], stats['best_metric'], None]) sortedIndex_best_k_model = sorted(range(len(best_k_model)), key=lambda k: float(best_k_model[k][1][indicator]), reverse=self.rule == "less") # print(sortedIndex_best_k_model) new_best_k_model_flag = [ not self.compare_func(float(query_score[indicator]), stats['best_metric'][indicator]) for _, query_score, _ in best_k_model] # print(new_best_k_model_flag) # ckpt_stats = [] # {} # key会冲突导致popitem出错 # ckpt_stats[str(stats['epoch'])] = stats[indicator] ckpt_stats = [stats['epoch'], stats['best_metric']] for index in sortedIndex_best_k_model: if new_best_k_model_flag[index]: # top_k_count += 1 # best_k_model[indicator][index] = stats[indicator] # best_k_model['epoch'][index] = stats['epoch'] # best_k_model.pop(str(index)) # best_k_model.update(ckpt_stats) # best_k_model[index] = list(ckpt_stats.popitem()) fname = self.save_model_path + "/" + best_k_model[index][2] ckpt_stats.append(None) best_k_model[index] = ckpt_stats if os.path.isfile(fname): os.remove(fname) break stats['best_epoch'], stats['best_metric'] = best_k_model[sortedIndex_best_k_model[-1]][:2] best_k_model = best_k_model[:-1] # best_k_model = [{'epoch': k, 'score': v} for k, v in best_k_model.items()] best_k_model = [{'epoch': epoch, 'best_metric': score} for (epoch, score, _) in best_k_model] with open(self.ckpt, 'w') as f: outs = [self.formatter_filename.format(**line) + "\n" for line in best_k_model] f.writelines(outs) else: if not flag: with open(self.ckpt, 'a') as f: outs = self.formatter_filename.format(**stats) + "\n" f.writelines(outs) # 训练初期,不满topk时候, 模型是否保存下来 # if save_top_k == 1: # if len(best_k_model) < save_top_k: # new_best_k_model_flag = [True] is_best = any(new_best_k_model_flag) if runner.epoch % self.print_freq == 0 or is_best: self._save_checkpoint( stats, out_dir=self.save_model_path, is_best=is_best, filename=f"{runner.epoch}.pth.tar") if not flag: print_log(' * Best training metrics so far@ {best_metric} in epoch {best_epoch}'.format( best_metric=stats['best_metric'], best_epoch=stats['best_epoch']), logger=runner.logger ) return stats def after_train_iter(self, runner): if hasattr(runner.model, 'train'): if type(runner.model.module.model).__name__ == 'INN': runner.model.module.model.free() else: if isinstance(runner.model.model, dict): runner.model.model['PAN2MS'].module.free() # raise NotImplementedError("after_train_iter is not implemented by ModelCheckpoint (customed)") ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/closure.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from .hook import HOOKS, Hook @HOOKS.register_module() class ClosureHook(Hook): def __init__(self, fn_name, fn): assert hasattr(self, fn_name) assert callable(fn) setattr(self, fn_name, fn) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/ema.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from ...parallel import is_module_wrapper from ..hooks.hook import HOOKS, Hook @HOOKS.register_module() class EMAHook(Hook): r"""Exponential Moving Average Hook. Use Exponential Moving Average on all parameters of model in training process. All parameters have a ema backup, which update by the formula as below. EMAHook takes priority over EvalHook and CheckpointSaverHook. .. math:: Xema\_{t+1} = (1 - \text{momentum}) \times Xema\_{t} + \text{momentum} \times X_t Args: momentum (float): The momentum used for updating ema parameter. Defaults to 0.0002. interval (int): Update ema parameter every interval iteration. Defaults to 1. warm_up (int): During first warm_up steps, we may use smaller momentum to update ema parameters more slowly. Defaults to 100. resume_from (str): The checkpoint path. Defaults to None. """ def __init__(self, momentum=0.0002, interval=1, warm_up=100, resume_from=None): assert isinstance(interval, int) and interval > 0 self.warm_up = warm_up self.interval = interval assert momentum > 0 and momentum < 1 self.momentum = momentum**interval self.checkpoint = resume_from def before_run(self, runner): """To resume model with it's ema parameters more friendly. Register ema parameter as ``named_buffer`` to model """ model = runner.model if is_module_wrapper(model): model = model.module self.param_ema_buffer = {} self.model_parameters = dict(model.named_parameters(recurse=True)) for name, value in self.model_parameters.items(): # "." is not allowed in module's buffer name buffer_name = f"ema_{name.replace('.', '_')}" self.param_ema_buffer[name] = buffer_name model.register_buffer(buffer_name, value.data.clone()) self.model_buffers = dict(model.named_buffers(recurse=True)) if self.checkpoint is not None: runner.resume(self.checkpoint) def after_train_iter(self, runner): """Update ema parameter every self.interval iterations.""" curr_step = runner.iter # We warm up the momentum considering the instability at beginning momentum = min(self.momentum, (1 + curr_step) / (self.warm_up + curr_step)) if curr_step % self.interval != 0: return for name, parameter in self.model_parameters.items(): buffer_name = self.param_ema_buffer[name] buffer_parameter = self.model_buffers[buffer_name] buffer_parameter.mul_(1 - momentum).add_(momentum, parameter.data) def after_train_epoch(self, runner): """We load parameter values from ema backup to model before the EvalHook.""" self._swap_ema_parameters() def before_train_epoch(self, runner): """We recover model's parameter from ema backup after last epoch's EvalHook.""" self._swap_ema_parameters() def _swap_ema_parameters(self): """Swap the parameter of model with parameter in ema_buffer.""" for name, value in self.model_parameters.items(): temp = value.data.clone() ema_buffer = self.model_buffers[self.param_ema_buffer[name]] value.data.copy_(ema_buffer.data) ema_buffer.data.copy_(temp) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/evaluation.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import os.path as osp import warnings from math import inf import torch.distributed as dist from torch.nn.modules.batchnorm import _BatchNorm from torch.utils.data import DataLoader from mmcv.fileio import FileClient from mmcv.utils import is_seq_of from .hook import Hook from .logger import LoggerHook class EvalHook(Hook): """Non-Distributed evaluation hook. This hook will regularly perform evaluation in a given interval when performing in non-distributed environment. Args: dataloader (DataLoader): A PyTorch dataloader, whose dataset has implemented ``evaluate`` function. start (int | None, optional): Evaluation starting epoch. It enables evaluation before the training starts if ``start`` <= the resuming epoch. If None, whether to evaluate is merely decided by ``interval``. Default: None. interval (int): Evaluation interval. Default: 1. by_epoch (bool): Determine perform evaluation by epoch or by iteration. If set to True, it will perform by epoch. Otherwise, by iteration. Default: True. save_best (str, optional): If a metric is specified, it would measure the best checkpoint during evaluation. The information about best checkpoint would be saved in ``runner.meta['hook_msgs']`` to keep best score value and best checkpoint path, which will be also loaded when resume checkpoint. Options are the evaluation metrics on the test dataset. e.g., ``bbox_mAP``, ``segm_mAP`` for bbox detection and instance segmentation. ``AR@100`` for proposal recall. If ``save_best`` is ``auto``, the first key of the returned ``OrderedDict`` result will be used. Default: None. rule (str | None, optional): Comparison rule for best score. If set to None, it will infer a reasonable rule. Keys such as 'acc', 'top' .etc will be inferred by 'greater' rule. Keys contain 'loss' will be inferred by 'less' rule. Options are 'greater', 'less', None. Default: None. test_fn (callable, optional): test a model with samples from a dataloader, and return the test results. If ``None``, the default test function ``mmcv.engine.single_gpu_test`` will be used. (default: ``None``) greater_keys (List[str] | None, optional): Metric keys that will be inferred by 'greater' comparison rule. If ``None``, _default_greater_keys will be used. (default: ``None``) less_keys (List[str] | None, optional): Metric keys that will be inferred by 'less' comparison rule. If ``None``, _default_less_keys will be used. (default: ``None``) out_dir (str, optional): The root directory to save checkpoints. If not specified, `runner.work_dir` will be used by default. If specified, the `out_dir` will be the concatenation of `out_dir` and the last level directory of `runner.work_dir`. `New in version 1.3.16.` file_client_args (dict): Arguments to instantiate a FileClient. See :class:`mmcv.fileio.FileClient` for details. Default: None. `New in version 1.3.16.` **eval_kwargs: Evaluation arguments fed into the evaluate function of the dataset. Note: If new arguments are added for EvalHook, tools/test.py, tools/eval_metric.py may be affected. """ # Since the key for determine greater or less is related to the downstream # tasks, downstream repos may need to overwrite the following inner # variable accordingly. rule_map = {'greater': lambda x, y: x > y, 'less': lambda x, y: x < y} init_value_map = {'greater': -inf, 'less': inf} _default_greater_keys = [ 'acc', 'top', 'AR@', 'auc', 'precision', 'mAP', 'mDice', 'mIoU', 'mAcc', 'aAcc' ] _default_less_keys = ['loss'] def __init__(self, dataloader, start=None, interval=1, by_epoch=True, save_best=None, rule=None, test_fn=None, greater_keys=None, less_keys=None, out_dir=None, file_client_args=None, **eval_kwargs): if not isinstance(dataloader, DataLoader): raise TypeError(f'dataloader must be a pytorch DataLoader, ' f'but got {type(dataloader)}') if interval <= 0: raise ValueError(f'interval must be a positive number, ' f'but got {interval}') assert isinstance(by_epoch, bool), '``by_epoch`` should be a boolean' if start is not None and start < 0: raise ValueError(f'The evaluation start epoch {start} is smaller ' f'than 0') self.dataloader = dataloader self.interval = interval self.start = start self.by_epoch = by_epoch assert isinstance(save_best, str) or save_best is None, \ '""save_best"" should be a str or None ' \ f'rather than {type(save_best)}' self.save_best = save_best self.eval_kwargs = eval_kwargs self.initial_flag = True if test_fn is None: from mmcv.engine import single_gpu_test self.test_fn = single_gpu_test else: self.test_fn = test_fn if greater_keys is None: self.greater_keys = self._default_greater_keys else: if not isinstance(greater_keys, (list, tuple)): greater_keys = (greater_keys, ) assert is_seq_of(greater_keys, str) self.greater_keys = greater_keys if less_keys is None: self.less_keys = self._default_less_keys else: if not isinstance(less_keys, (list, tuple)): less_keys = (less_keys, ) assert is_seq_of(less_keys, str) self.less_keys = less_keys if self.save_best is not None: self.best_ckpt_path = None self._init_rule(rule, self.save_best) self.out_dir = out_dir self.file_client_args = file_client_args def _init_rule(self, rule, key_indicator): """Initialize rule, key_indicator, comparison_func, and best score. Here is the rule to determine which rule is used for key indicator when the rule is not specific (note that the key indicator matching is case-insensitive): 1. If the key indicator is in ``self.greater_keys``, the rule will be specified as 'greater'. 2. Or if the key indicator is in ``self.less_keys``, the rule will be specified as 'less'. 3. Or if the key indicator is equal to the substring in any one item in ``self.greater_keys``, the rule will be specified as 'greater'. 4. Or if the key indicator is equal to the substring in any one item in ``self.less_keys``, the rule will be specified as 'less'. Args: rule (str | None): Comparison rule for best score. key_indicator (str | None): Key indicator to determine the comparison rule. """ if rule not in self.rule_map and rule is not None: raise KeyError(f'rule must be greater, less or None, ' f'but got {rule}.') if rule is None: if key_indicator != 'auto': # `_lc` here means we use the lower case of keys for # case-insensitive matching key_indicator_lc = key_indicator.lower() greater_keys = [key.lower() for key in self.greater_keys] less_keys = [key.lower() for key in self.less_keys] if key_indicator_lc in greater_keys: rule = 'greater' elif key_indicator_lc in less_keys: rule = 'less' elif any(key in key_indicator_lc for key in greater_keys): rule = 'greater' elif any(key in key_indicator_lc for key in less_keys): rule = 'less' else: raise ValueError(f'Cannot infer the rule for key ' f'{key_indicator}, thus a specific rule ' f'must be specified.') self.rule = rule self.key_indicator = key_indicator if self.rule is not None: self.compare_func = self.rule_map[self.rule] def before_run(self, runner): if not self.out_dir: self.out_dir = runner.work_dir self.file_client = FileClient.infer_client(self.file_client_args, self.out_dir) # if `self.out_dir` is not equal to `runner.work_dir`, it means that # `self.out_dir` is set so the final `self.out_dir` is the # concatenation of `self.out_dir` and the last level directory of # `runner.work_dir` if self.out_dir != runner.work_dir: basename = osp.basename(runner.work_dir.rstrip(osp.sep)) self.out_dir = self.file_client.join_path(self.out_dir, basename) runner.logger.info( (f'The best checkpoint will be saved to {self.out_dir} by ' f'{self.file_client.name}')) if self.save_best is not None: if runner.meta is None: warnings.warn('runner.meta is None. Creating an empty one.') runner.meta = dict() runner.meta.setdefault('hook_msgs', dict()) self.best_ckpt_path = runner.meta['hook_msgs'].get( 'best_ckpt', None) def before_train_iter(self, runner): """Evaluate the model only at the start of training by iteration.""" if self.by_epoch or not self.initial_flag: return if self.start is not None and runner.iter >= self.start: self.after_train_iter(runner) self.initial_flag = False def before_train_epoch(self, runner): """Evaluate the model only at the start of training by epoch.""" if not (self.by_epoch and self.initial_flag): return if self.start is not None and runner.epoch >= self.start: self.after_train_epoch(runner) self.initial_flag = False def after_train_iter(self, runner): """Called after every training iter to evaluate the results.""" if not self.by_epoch and self._should_evaluate(runner): # Because the priority of EvalHook is higher than LoggerHook, the # training log and the evaluating log are mixed. Therefore, # we need to dump the training log and clear it before evaluating # log is generated. In addition, this problem will only appear in # `IterBasedRunner` whose `self.by_epoch` is False, because # `EpochBasedRunner` whose `self.by_epoch` is True calls # `_do_evaluate` in `after_train_epoch` stage, and at this stage # the training log has been printed, so it will not cause any # problem. more details at # https://github.com/open-mmlab/mmsegmentation/issues/694 for hook in runner._hooks: if isinstance(hook, LoggerHook): hook.after_train_iter(runner) runner.log_buffer.clear() self._do_evaluate(runner) def after_train_epoch(self, runner): """Called after every training epoch to evaluate the results.""" # if self.by_epoch and self._should_evaluate(runner): # self._do_evaluate(runner) ... def _do_evaluate(self, runner): """perform evaluation and save ckpt.""" results = self.test_fn(runner.model, self.dataloader) runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) key_score = self.evaluate(runner, results) # the key_score may be `None` so it needs to skip the action to save # the best checkpoint if self.save_best and key_score: self._save_ckpt(runner, key_score) def _should_evaluate(self, runner): """Judge whether to perform evaluation. Here is the rule to judge whether to perform evaluation: 1. It will not perform evaluation during the epoch/iteration interval, which is determined by ``self.interval``. 2. It will not perform evaluation if the start time is larger than current time. 3. It will not perform evaluation when current time is larger than the start time but during epoch/iteration interval. Returns: bool: The flag indicating whether to perform evaluation. """ if self.by_epoch: current = runner.epoch check_time = self.every_n_epochs else: current = runner.iter check_time = self.every_n_iters if self.start is None: if not check_time(runner, self.interval): # No evaluation during the interval. return False elif (current + 1) < self.start: # No evaluation if start is larger than the current time. return False else: # Evaluation only at epochs/iters 3, 5, 7... # if start==3 and interval==2 if (current + 1 - self.start) % self.interval: return False return True def _save_ckpt(self, runner, key_score): """Save the best checkpoint. It will compare the score according to the compare function, write related information (best score, best checkpoint path) and save the best checkpoint into ``work_dir``. """ if self.by_epoch: current = f'epoch_{runner.epoch + 1}' cur_type, cur_time = 'epoch', runner.epoch + 1 else: current = f'iter_{runner.iter + 1}' cur_type, cur_time = 'iter', runner.iter + 1 best_score = runner.meta['hook_msgs'].get( 'best_score', self.init_value_map[self.rule]) if self.compare_func(key_score, best_score): best_score = key_score runner.meta['hook_msgs']['best_score'] = best_score if self.best_ckpt_path and self.file_client.isfile( self.best_ckpt_path): self.file_client.remove(self.best_ckpt_path) runner.logger.info( (f'The previous best checkpoint {self.best_ckpt_path} was ' 'removed')) best_ckpt_name = f'best_{self.key_indicator}_{current}.pth' self.best_ckpt_path = self.file_client.join_path( self.out_dir, best_ckpt_name) runner.meta['hook_msgs']['best_ckpt'] = self.best_ckpt_path runner.save_checkpoint( self.out_dir, best_ckpt_name, create_symlink=False) runner.logger.info( f'Now best checkpoint is saved as {best_ckpt_name}.') runner.logger.info( f'Best {self.key_indicator} is {best_score:0.4f} ' f'at {cur_time} {cur_type}.') def evaluate(self, runner, results): """Evaluate the results. Args: runner (:obj:`mmcv.Runner`): The underlined training runner. results (list): Output results. """ eval_res = self.dataloader.dataset.evaluate( results, logger=runner.logger, **self.eval_kwargs) for name, val in eval_res.items(): runner.log_buffer.output[name] = val runner.log_buffer.ready = True if self.save_best is not None: # If the performance of model is pool, the `eval_res` may be an # empty dict and it will raise exception when `self.save_best` is # not None. More details at # https://github.com/open-mmlab/mmdetection/issues/6265. if not eval_res: warnings.warn( 'Since `eval_res` is an empty dict, the behavior to save ' 'the best checkpoint will be skipped in this evaluation.') return None if self.key_indicator == 'auto': # infer from eval_results self._init_rule(self.rule, list(eval_res.keys())[0]) return eval_res[self.key_indicator] return None class DistEvalHook(EvalHook): """Distributed evaluation hook. This hook will regularly perform evaluation in a given interval when performing in distributed environment. Args: dataloader (DataLoader): A PyTorch dataloader, whose dataset has implemented ``evaluate`` function. start (int | None, optional): Evaluation starting epoch. It enables evaluation before the training starts if ``start`` <= the resuming epoch. If None, whether to evaluate is merely decided by ``interval``. Default: None. interval (int): Evaluation interval. Default: 1. by_epoch (bool): Determine perform evaluation by epoch or by iteration. If set to True, it will perform by epoch. Otherwise, by iteration. default: True. save_best (str, optional): If a metric is specified, it would measure the best checkpoint during evaluation. The information about best checkpoint would be saved in ``runner.meta['hook_msgs']`` to keep best score value and best checkpoint path, which will be also loaded when resume checkpoint. Options are the evaluation metrics on the test dataset. e.g., ``bbox_mAP``, ``segm_mAP`` for bbox detection and instance segmentation. ``AR@100`` for proposal recall. If ``save_best`` is ``auto``, the first key of the returned ``OrderedDict`` result will be used. Default: None. rule (str | None, optional): Comparison rule for best score. If set to None, it will infer a reasonable rule. Keys such as 'acc', 'top' .etc will be inferred by 'greater' rule. Keys contain 'loss' will be inferred by 'less' rule. Options are 'greater', 'less', None. Default: None. test_fn (callable, optional): test a model with samples from a dataloader in a multi-gpu manner, and return the test results. If ``None``, the default test function ``mmcv.engine.multi_gpu_test`` will be used. (default: ``None``) tmpdir (str | None): Temporary directory to save the results of all processes. Default: None. gpu_collect (bool): Whether to use gpu or cpu to collect results. Default: False. broadcast_bn_buffer (bool): Whether to broadcast the buffer(running_mean and running_var) of rank 0 to other rank before evaluation. Default: True. out_dir (str, optional): The root directory to save checkpoints. If not specified, `runner.work_dir` will be used by default. If specified, the `out_dir` will be the concatenation of `out_dir` and the last level directory of `runner.work_dir`. file_client_args (dict): Arguments to instantiate a FileClient. See :class:`mmcv.fileio.FileClient` for details. Default: None. **eval_kwargs: Evaluation arguments fed into the evaluate function of the dataset. """ def __init__(self, dataloader, start=None, interval=1, by_epoch=True, save_best=None, rule=None, test_fn=None, greater_keys=None, less_keys=None, broadcast_bn_buffer=True, tmpdir=None, gpu_collect=False, out_dir=None, file_client_args=None, **eval_kwargs): if test_fn is None: from mmcv.engine import multi_gpu_test test_fn = multi_gpu_test super().__init__( dataloader, start=start, interval=interval, by_epoch=by_epoch, save_best=save_best, rule=rule, test_fn=test_fn, greater_keys=greater_keys, less_keys=less_keys, out_dir=out_dir, file_client_args=file_client_args, **eval_kwargs) self.broadcast_bn_buffer = broadcast_bn_buffer self.tmpdir = tmpdir self.gpu_collect = gpu_collect def _do_evaluate(self, runner): """perform evaluation and save ckpt.""" # Synchronization of BatchNorm's buffer (running_mean # and running_var) is not supported in the DDP of pytorch, # which may cause the inconsistent performance of models in # different ranks, so we broadcast BatchNorm's buffers # of rank 0 to other ranks to avoid this. if self.broadcast_bn_buffer: model = runner.model for name, module in model.named_modules(): if isinstance(module, _BatchNorm) and module.track_running_stats: dist.broadcast(module.running_var, 0) dist.broadcast(module.running_mean, 0) tmpdir = self.tmpdir if tmpdir is None: tmpdir = osp.join(runner.work_dir, '.eval_hook') results = self.test_fn( runner.model, self.dataloader, tmpdir=tmpdir, gpu_collect=self.gpu_collect) if runner.rank == 0: print('\n') runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) key_score = self.evaluate(runner, results) # the key_score may be `None` so it needs to skip the action to # save the best checkpoint if self.save_best and key_score: self._save_ckpt(runner, key_score) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/hook.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from mmcv.utils import Registry, is_method_overridden HOOKS = Registry('hook') class Hook: stages = ('before_run', 'before_train_epoch', 'before_train_iter', 'after_train_iter', 'after_train_epoch', 'before_val_epoch', 'before_val_iter', 'after_val_iter', 'after_val_epoch', 'after_run') def before_run(self, runner): pass def after_run(self, runner): pass def before_epoch(self, runner): pass def after_epoch(self, runner): pass def before_iter(self, runner): pass def after_iter(self, runner): pass def before_train_epoch(self, runner): self.before_epoch(runner) def before_val_epoch(self, runner): self.before_epoch(runner) def after_train_epoch(self, runner): self.after_epoch(runner) def after_val_epoch(self, runner): self.after_epoch(runner) def before_train_iter(self, runner): self.before_iter(runner) def before_val_iter(self, runner): self.before_iter(runner) def after_train_iter(self, runner): self.after_iter(runner) def after_val_iter(self, runner): self.after_iter(runner) def every_n_epochs(self, runner, n): return (runner.epoch + 1) % n == 0 if n > 0 else False def every_n_inner_iters(self, runner, n): return (runner.inner_iter + 1) % n == 0 if n > 0 else False def every_n_iters(self, runner, n): return (runner.iter + 1) % n == 0 if n > 0 else False def end_of_epoch(self, runner): return runner.inner_iter + 1 == len(runner.data_loader) def is_last_epoch(self, runner): return runner.epoch + 1 == runner._max_epochs def is_last_iter(self, runner): return runner.iter + 1 == runner._max_iters def get_triggered_stages(self): trigger_stages = set() for stage in Hook.stages: if is_method_overridden(stage, Hook, self): trigger_stages.add(stage) # some methods will be triggered in multi stages # use this dict to map method to stages. method_stages_map = { 'before_epoch': ['before_train_epoch', 'before_val_epoch'], 'after_epoch': ['after_train_epoch', 'after_val_epoch'], 'before_iter': ['before_train_iter', 'before_val_iter'], 'after_iter': ['after_train_iter', 'after_val_iter'], } for method, map_stages in method_stages_map.items(): if is_method_overridden(method, Hook, self): trigger_stages.update(map_stages) return [stage for stage in Hook.stages if stage in trigger_stages] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/iter_timer.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import time from .hook import HOOKS, Hook @HOOKS.register_module() class IterTimerHook(Hook): def before_epoch(self, runner): self.t = time.time() def before_iter(self, runner): runner.log_buffer.update(data_time=time.time() - self.t)#{'data_time': time.time() - self.t} def after_iter(self, runner): runner.log_buffer.update(time=time.time() - self.t)#{'time': time.time() - self.t} self.t = time.time() ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/logger/__init__.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from .base import LoggerHook from .dvclive import DvcliveLoggerHook from .mlflow import MlflowLoggerHook from .neptune import NeptuneLoggerHook from .pavi import PaviLoggerHook from .tensorboard import TensorboardLoggerHook from .text import TextLoggerHook from .wandb import WandbLoggerHook __all__ = [ 'LoggerHook', 'MlflowLoggerHook', 'PaviLoggerHook', 'TensorboardLoggerHook', 'TextLoggerHook', 'WandbLoggerHook', 'NeptuneLoggerHook', 'DvcliveLoggerHook' ] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/logger/base.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import numbers from abc import ABCMeta, abstractmethod import numpy as np import torch from ..hook import Hook class LoggerHook(Hook): """Base class for logger hooks. Args: interval (int): Logging interval (every k iterations). Default 10. ignore_last (bool): Ignore the log of last iterations in each epoch if less than `interval`. Default True. reset_flag (bool): Whether to clear the output buffer after logging. Default False. by_epoch (bool): Whether EpochBasedRunner is used. Default True. """ __metaclass__ = ABCMeta def __init__(self, interval=10, ignore_last=True, reset_flag=False, by_epoch=True): self.interval = interval self.ignore_last = ignore_last self.reset_flag = reset_flag self.by_epoch = by_epoch @abstractmethod def log(self, runner): pass @staticmethod def is_scalar(val, include_np=True, include_torch=True): """Tell the input variable is a scalar or not. Args: val: Input variable. include_np (bool): Whether include 0-d np.ndarray as a scalar. include_torch (bool): Whether include 0-d torch.Tensor as a scalar. Returns: bool: True or False. """ if isinstance(val, numbers.Number): return True elif include_np and isinstance(val, np.ndarray) and val.ndim == 0: return True elif include_torch and isinstance(val, torch.Tensor) and len(val) == 1: return True else: return False def get_mode(self, runner): if runner.mode == 'train': if 'time' in runner.log_buffer.meters: #output mode = 'train' else: mode = 'val' elif runner.mode == 'val': mode = 'val' else: raise ValueError(f"runner mode should be 'train' or 'val', " f'but got {runner.mode}') return mode def get_epoch(self, runner): if runner.mode == 'train': epoch = runner.epoch# + 1 elif runner.mode == 'val': # normal val mode # runner.epoch += 1 has been done before val workflow epoch = runner.epoch else: raise ValueError(f"runner mode should be 'train' or 'val', " f'but got {runner.mode}') return epoch def get_iter(self, runner, inner_iter=False): """Get the current training iteration step.""" if self.by_epoch and inner_iter: current_iter = runner.inner_iter + 1 else: current_iter = runner.iter + 1 return current_iter def get_lr_tags(self, runner): tags = {} lrs = runner.current_lr() if isinstance(lrs, dict): for name, value in lrs.items(): tags[f'learning_rate/{name}'] = value[0] else: tags['learning_rate'] = lrs[0] return tags def get_momentum_tags(self, runner): tags = {} momentums = runner.current_momentum() if isinstance(momentums, dict): for name, value in momentums.items(): tags[f'momentum/{name}'] = value[0] else: tags['momentum'] = momentums[0] return tags def get_loggable_tags(self, runner, allow_scalar=True, allow_text=False, add_mode=True, tags_to_skip=('time', 'data_time', 'learning_rate', 'pan2ms', 'grad_norm', 'lr', 'memory')): tags = {} for var, val in runner.metrics.items():#log_buffer.output if var in tags_to_skip: continue if self.is_scalar(val) and not allow_scalar: continue if isinstance(val, str) and not allow_text: continue if add_mode: var = f'{self.get_mode(runner)}/{var}' tags[var] = val tags.update(self.get_lr_tags(runner)) tags.update(self.get_momentum_tags(runner)) return tags def before_run(self, runner): for hook in runner.hooks[::-1]: if isinstance(hook, LoggerHook): hook.reset_flag = True break def before_epoch(self, runner): runner.log_buffer.clear() # clear logs of last epoch def after_train_iter(self, runner): # if self.by_epoch and self.every_n_inner_iters(runner, self.interval): # runner.log_buffer.average(self.interval) # elif not self.by_epoch and self.every_n_iters(runner, self.interval): # runner.log_buffer.average(self.interval) # elif self.end_of_epoch(runner) and not self.ignore_last: # # not precise but more stable # runner.log_buffer.average(self.interval) # if runner.log_buffer.ready: # self.log(runner) # if self.reset_flag: # runner.log_buffer.clear_output() if self.by_epoch and self.every_n_inner_iters(runner, self.interval): # runner.log_buffer.ready = True self.log(runner) # if self.reset_flag: # runner.log_buffer.clear_output() def after_train_epoch(self, runner): # if runner.log_buffer.ready: if self.every_n_epochs(runner, self.interval): self.log(runner) if self.reset_flag: runner.log_buffer.clear_output() def after_val_epoch(self, runner): # runner.log_buffer.average() self.log(runner) if self.reset_flag: runner.log_buffer.clear_output() def after_val_iter(self, runner): self.log(runner) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/logger/dvclive.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from pathlib import Path from ...dist_utils import master_only from ..hook import HOOKS from .base import LoggerHook @HOOKS.register_module() class DvcliveLoggerHook(LoggerHook): """Class to log metrics with dvclive. It requires `dvclive`_ to be installed. Args: model_file (str): Default None. If not None, after each epoch the model will be saved to {model_file}. interval (int): Logging interval (every k iterations). Default 10. ignore_last (bool): Ignore the log of last iterations in each epoch if less than `interval`. Default: True. reset_flag (bool): Whether to clear the output buffer after logging. Default: False. by_epoch (bool): Whether EpochBasedRunner is used. Default: True. kwargs: Arguments for instantiating `Live`_. .. _dvclive: https://dvc.org/doc/dvclive .. _Live: https://dvc.org/doc/dvclive/api-reference/live#parameters """ def __init__(self, model_file=None, interval=10, ignore_last=True, reset_flag=False, by_epoch=True, **kwargs): super().__init__(interval, ignore_last, reset_flag, by_epoch) self.model_file = model_file self.import_dvclive(**kwargs) def import_dvclive(self, **kwargs): try: from dvclive import Live except ImportError: raise ImportError( 'Please run "pip install dvclive" to install dvclive') self.dvclive = Live(**kwargs) @master_only def log(self, runner): tags = self.get_loggable_tags(runner) if tags: self.dvclive.set_step(self.get_iter(runner)) for k, v in tags.items(): self.dvclive.log(k, v) @master_only def after_train_epoch(self, runner): super().after_train_epoch(runner) if self.model_file is not None: runner.save_checkpoint( Path(self.model_file).parent, filename_tmpl=Path(self.model_file).name, create_symlink=False, ) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/logger/mlflow.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from mmcv.utils import TORCH_VERSION from ...dist_utils import master_only from ..hook import HOOKS from .base import LoggerHook @HOOKS.register_module() class MlflowLoggerHook(LoggerHook): """Class to log metrics and (optionally) a trained model to MLflow. It requires `MLflow`_ to be installed. Args: exp_name (str, optional): Name of the experiment to be used. Default None. If not None, set the active experiment. If experiment does not exist, an experiment with provided name will be created. tags (Dict[str], optional): Tags for the current run. Default None. If not None, set tags for the current run. log_model (bool, optional): Whether to log an MLflow artifact. Default True. If True, log runner.model as an MLflow artifact for the current run. interval (int): Logging interval (every k iterations). Default: 10. ignore_last (bool): Ignore the log of last iterations in each epoch if less than `interval`. Default: True. reset_flag (bool): Whether to clear the output buffer after logging. Default: False. by_epoch (bool): Whether EpochBasedRunner is used. Default: True. .. _MLflow: https://www.mlflow.org/docs/latest/index.html """ def __init__(self, exp_name=None, tags=None, log_model=True, interval=10, ignore_last=True, reset_flag=False, by_epoch=True): super(MlflowLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) self.import_mlflow() self.exp_name = exp_name self.tags = tags self.log_model = log_model def import_mlflow(self): try: import mlflow import mlflow.pytorch as mlflow_pytorch except ImportError: raise ImportError( 'Please run "pip install mlflow" to install mlflow') self.mlflow = mlflow self.mlflow_pytorch = mlflow_pytorch @master_only def before_run(self, runner): super(MlflowLoggerHook, self).before_run(runner) if self.exp_name is not None: self.mlflow.set_experiment(self.exp_name) if self.tags is not None: self.mlflow.set_tags(self.tags) @master_only def log(self, runner): tags = self.get_loggable_tags(runner) if tags: self.mlflow.log_metrics(tags, step=self.get_iter(runner)) @master_only def after_run(self, runner): if self.log_model: self.mlflow_pytorch.log_model( runner.model, 'models', pip_requirements=[f'torch=={TORCH_VERSION}']) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/logger/neptune.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from ...dist_utils import master_only from ..hook import HOOKS from .base import LoggerHook @HOOKS.register_module() class NeptuneLoggerHook(LoggerHook): """Class to log metrics to NeptuneAI. It requires `Neptune`_ to be installed. Args: init_kwargs (dict): a dict contains the initialization keys as below: - project (str): Name of a project in a form of namespace/project_name. If None, the value of NEPTUNE_PROJECT environment variable will be taken. - api_token (str): User’s API token. If None, the value of NEPTUNE_API_TOKEN environment variable will be taken. Note: It is strongly recommended to use NEPTUNE_API_TOKEN environment variable rather than placing your API token in plain text in your source code. - name (str, optional, default is 'Untitled'): Editable name of the run. Name is displayed in the run's Details and in Runs table as a column. Check https://docs.neptune.ai/api-reference/neptune#init for more init arguments. interval (int): Logging interval (every k iterations). Default: 10. ignore_last (bool): Ignore the log of last iterations in each epoch if less than ``interval``. Default: True. reset_flag (bool): Whether to clear the output buffer after logging. Default: True. with_step (bool): If True, the step will be logged from ``self.get_iters``. Otherwise, step will not be logged. Default: True. by_epoch (bool): Whether EpochBasedRunner is used. Default: True. .. _Neptune: https://docs.neptune.ai """ def __init__(self, init_kwargs=None, interval=10, ignore_last=True, reset_flag=True, with_step=True, by_epoch=True): super(NeptuneLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) self.import_neptune() self.init_kwargs = init_kwargs self.with_step = with_step def import_neptune(self): try: import neptune.new as neptune except ImportError: raise ImportError( 'Please run "pip install neptune-client" to install neptune') self.neptune = neptune self.run = None @master_only def before_run(self, runner): if self.init_kwargs: self.run = self.neptune.init(**self.init_kwargs) else: self.run = self.neptune.init() @master_only def log(self, runner): tags = self.get_loggable_tags(runner) if tags: for tag_name, tag_value in tags.items(): if self.with_step: self.run[tag_name].log( tag_value, step=self.get_iter(runner)) else: tags['global_step'] = self.get_iter(runner) self.run[tag_name].log(tags) @master_only def after_run(self, runner): self.run.stop() ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/logger/pavi.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import json import os import os.path as osp import torch import yaml import mmcv from ....parallel.utils import is_module_wrapper from ...dist_utils import master_only from ..hook import HOOKS from .base import LoggerHook @HOOKS.register_module() class PaviLoggerHook(LoggerHook): """Class to visual model, log metrics (for internal use). Args: init_kwargs (dict): A dict contains the initialization keys. add_graph (bool): Whether to visual model. Default: False. add_last_ckpt (bool): Whether to save checkpoint after run. Default: False. interval (int): Logging interval (every k iterations). Default: True. ignore_last (bool): Ignore the log of last iterations in each epoch if less than `interval`. Default: True. reset_flag (bool): Whether to clear the output buffer after logging. Default: False. by_epoch (bool): Whether EpochBasedRunner is used. Default: True. img_key (string): Get image data from Dataset. Default: 'img_info'. """ def __init__(self, init_kwargs=None, add_graph=False, add_last_ckpt=False, interval=10, ignore_last=True, reset_flag=False, by_epoch=True, img_key='img_info'): super(PaviLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) self.init_kwargs = init_kwargs self.add_graph = add_graph self.add_last_ckpt = add_last_ckpt self.img_key = img_key @master_only def before_run(self, runner): super(PaviLoggerHook, self).before_run(runner) try: from pavi import SummaryWriter except ImportError: raise ImportError('Please run "pip install pavi" to install pavi.') self.run_name = runner.work_dir.split('/')[-1] if not self.init_kwargs: self.init_kwargs = dict() self.init_kwargs['name'] = self.run_name self.init_kwargs['model'] = runner._model_name if runner.meta is not None: if 'config_dict' in runner.meta: config_dict = runner.meta['config_dict'] assert isinstance( config_dict, dict), ('meta["config_dict"] has to be of a dict, ' f'but got {type(config_dict)}') elif 'config_file' in runner.meta: config_file = runner.meta['config_file'] config_dict = dict(mmcv.Config.fromfile(config_file)) else: config_dict = None if config_dict is not None: # 'max_.*iter' is parsed in pavi sdk as the maximum iterations # to properly set up the progress bar. config_dict = config_dict.copy() config_dict.setdefault('max_iter', runner.max_iters) # non-serializable values are first converted in # mmcv.dump to json config_dict = json.loads( mmcv.dump(config_dict, file_format='json')) session_text = yaml.dump(config_dict) self.init_kwargs['session_text'] = session_text self.writer = SummaryWriter(**self.init_kwargs) def get_step(self, runner): """Get the total training step/epoch.""" if self.get_mode(runner) == 'val' and self.by_epoch: return self.get_epoch(runner) else: return self.get_iter(runner) @master_only def log(self, runner): tags = self.get_loggable_tags(runner, add_mode=False) if tags: self.writer.add_scalars( self.get_mode(runner), tags, self.get_step(runner)) @master_only def after_run(self, runner): if self.add_last_ckpt: ckpt_path = osp.join(runner.work_dir, 'latest.pth') if osp.islink(ckpt_path): ckpt_path = osp.join(runner.work_dir, os.readlink(ckpt_path)) if osp.isfile(ckpt_path): # runner.epoch += 1 has been done before `after_run`. iteration = runner.epoch if self.by_epoch else runner.iter return self.writer.add_snapshot_file( tag=self.run_name, snapshot_file_path=ckpt_path, iteration=iteration) # flush the buffer and send a task ending signal to Pavi self.writer.close() @master_only def before_epoch(self, runner): if runner.epoch == 0 and self.add_graph: if is_module_wrapper(runner.model): _model = runner.model.module else: _model = runner.model device = next(_model.parameters()).device data = next(iter(runner.data_loader)) image = data[self.img_key][0:1].to(device) with torch.no_grad(): self.writer.add_graph(_model, image) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/logger/tensorboard.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import os.path as osp from mmcv.utils import TORCH_VERSION, digit_version from ...dist_utils import master_only from ..hook import HOOKS from .base import LoggerHook @HOOKS.register_module() class TensorboardLoggerHook(LoggerHook): """Class to log metrics to Tensorboard. Args: log_dir (string): Save directory location. Default: None. If default values are used, directory location is ``runner.work_dir``/tf_logs. interval (int): Logging interval (every k iterations). Default: True. ignore_last (bool): Ignore the log of last iterations in each epoch if less than `interval`. Default: True. reset_flag (bool): Whether to clear the output buffer after logging. Default: False. by_epoch (bool): Whether EpochBasedRunner is used. Default: True. """ def __init__(self, log_dir=None, interval=10, ignore_last=True, reset_flag=False, by_epoch=True): super(TensorboardLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) self.log_dir = log_dir @master_only def before_run(self, runner): super(TensorboardLoggerHook, self).before_run(runner) if (TORCH_VERSION == 'parrots' or digit_version(TORCH_VERSION) < digit_version('1.1')): try: from tensorboardX import SummaryWriter except ImportError: raise ImportError('Please install tensorboardX to use ' 'TensorboardLoggerHook.') else: try: from torch.utils.tensorboard import SummaryWriter except ImportError: raise ImportError( 'Please run "pip install future tensorboard" to install ' 'the dependencies to use torch.utils.tensorboard ' '(applicable to PyTorch 1.1 or higher)') if self.log_dir is None: self.log_dir = osp.join(runner.work_dir, 'tf_logs') self.writer = SummaryWriter(self.log_dir) @master_only def log(self, runner): tags = self.get_loggable_tags(runner, allow_text=True) for tag, val in tags.items(): if isinstance(val, str): self.writer.add_text(tag, val, self.get_iter(runner)) else: self.writer.add_scalar(tag, val, self.get_iter(runner)) @master_only def after_run(self, runner): self.writer.close() ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/logger/text.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import datetime import os import os.path as osp from collections import OrderedDict import torch import torch.distributed as dist import mmcv from mmcv.fileio.file_client import FileClient from mmcv.utils import is_tuple_of, scandir from ..hook import HOOKS from .base import LoggerHook from mmcv.utils.logging import print_log @HOOKS.register_module() class TextLoggerHook(LoggerHook): """Logger hook in text. In this logger hook, the information will be printed on terminal and saved in json file. Args: by_epoch (bool, optional): Whether EpochBasedRunner is used. Default: True. interval (int, optional): Logging interval (every k iterations). Default: 10. ignore_last (bool, optional): Ignore the log of last iterations in each epoch if less than :attr:`interval`. Default: True. reset_flag (bool, optional): Whether to clear the output buffer after logging. Default: False. interval_exp_name (int, optional): Logging interval for experiment name. This feature is to help users conveniently get the experiment information from screen or log file. Default: 1000. out_dir (str, optional): Logs are saved in ``runner.work_dir`` default. If ``out_dir`` is specified, logs will be copied to a new directory which is the concatenation of ``out_dir`` and the last level directory of ``runner.work_dir``. Default: None. `New in version 1.3.16.` out_suffix (str or tuple[str], optional): Those filenames ending with ``out_suffix`` will be copied to ``out_dir``. Default: ('.log.json', '.log', '.py'). `New in version 1.3.16.` keep_local (bool, optional): Whether to keep local log when :attr:`out_dir` is specified. If False, the local log will be removed. Default: True. `New in version 1.3.16.` file_client_args (dict, optional): Arguments to instantiate a FileClient. See :class:`mmcv.fileio.FileClient` for details. Default: None. `New in version 1.3.16.` """ def __init__(self, by_epoch=True, interval=10, ignore_last=True, reset_flag=False, interval_exp_name=1000, out_dir=None, out_suffix=('.log.json', '.log', '.py'), keep_local=True, file_client_args=None): super(TextLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) self.by_epoch = by_epoch self.time_sec_tot = 0 self.interval_exp_name = interval_exp_name if out_dir is None and file_client_args is not None: raise ValueError( 'file_client_args should be "None" when `out_dir` is not' 'specified.') self.out_dir = out_dir if not (out_dir is None or isinstance(out_dir, str) or is_tuple_of(out_dir, str)): raise TypeError('out_dir should be "None" or string or tuple of ' 'string, but got {out_dir}') self.out_suffix = out_suffix self.keep_local = keep_local self.file_client_args = file_client_args if self.out_dir is not None: self.file_client = FileClient.infer_client(file_client_args, self.out_dir) def before_run(self, runner): super(TextLoggerHook, self).before_run(runner) if self.out_dir is not None: self.file_client = FileClient.infer_client(self.file_client_args, self.out_dir) # The final `self.out_dir` is the concatenation of `self.out_dir` # and the last level directory of `runner.work_dir` basename = osp.basename(runner.work_dir.rstrip(osp.sep)) self.out_dir = self.file_client.join_path(self.out_dir, basename) print_log( (f'Text logs will be saved to {self.out_dir} by ' f'{self.file_client.name} after the training process.'), logger=runner.logger) self.start_iter = runner.iter self.data_length = runner.data_length self.max_epochs = runner.max_epochs self.json_log_path = osp.join(runner.work_dir, f'{runner.timestamp}.log.json') if runner.meta is not None: self._dump_log(runner.meta, runner) def _get_max_memory(self, runner): device = getattr(runner.model, 'output_device', None) mem = torch.cuda.max_memory_allocated(device=device) mem_mb = torch.tensor([mem / (1024 * 1024)], dtype=torch.int, device=device) if runner.world_size > 1: dist.reduce(mem_mb, 0, op=dist.ReduceOp.MAX) return mem_mb.item() def _log_info(self, log_dict, runner): # print exp name for users to distinguish experiments # at every ``interval_exp_name`` iterations and the end of each epoch if runner.meta is not None and 'exp_name' in runner.meta: if (self.every_n_iters(runner, self.interval_exp_name)) or ( self.by_epoch and self.end_of_epoch(runner)): exp_info = f'Exp name: {runner.meta["exp_name"]}' print_log(exp_info, logger=runner.logger) if log_dict['mode'] == 'train': if isinstance(log_dict['lr'], dict): lr_str = [] for k, val in log_dict['lr'].items(): lr_str.append(f'lr_{k}: {val:.3e}') lr_str = ' '.join(lr_str) else: lr_str = f'lr: {log_dict["lr"]:.3e}' # by epoch: Epoch [4][100/1000] # by iter: Iter [100/100000] if self.by_epoch: log_str = f'Epoch [{log_dict["epoch"]}]/[{self.max_epochs}]' \ f'[{log_dict["iter"]}/{self.data_length}]\t' else: log_str = f'Iter [{log_dict["iter"]}/{runner.max_iters}]\t' log_str += f'{lr_str}, ' if 'time' in log_dict.keys(): self.time_sec_tot += (log_dict['time'] * self.interval) time_sec_avg = self.time_sec_tot / ( runner.iter - self.start_iter + 1) # eta_sec = time_sec_avg * (runner.max_iters - runner.iter - 1) eta_str = str(datetime.timedelta(seconds=int(eta_sec))) log_str += f'eta: {eta_str}, ' log_str += f'time: {log_dict["time"]:.3f}, ' \ f'data_time: {log_dict["data_time"]:.3f}, ' # statistic memory if torch.cuda.is_available(): log_str += f'memory: {log_dict["memory"]}MB, ' else: # val/test time # here 1000 is the length of the val dataloader # by epoch: Epoch[val] [4][1000] # by iter: Iter[val] [1000] if self.by_epoch: log_str = f'Epoch({log_dict["mode"]}) ' \ f'[{log_dict["epoch"]}][{log_dict["iter"]}]\t' else: log_str = f'Iter({log_dict["mode"]}) [{log_dict["iter"]}]\t' log_items = [] for name, val in log_dict.items(): # TODO: resolve this hack # these items have been in log_str if name in [ 'mode', 'Epoch', 'iter', 'lr', 'time', 'data_time', 'memory', 'epoch' ]: continue if isinstance(val, float): val = f'{val:.5f}' log_items.append(f'{name}: {val}') log_str += ', '.join(log_items) print_log(log_str, logger=runner.logger) def _dump_log(self, log_dict, runner): # dump log in json format json_log = OrderedDict() for k, v in log_dict.items(): json_log[k] = self._round_float(v) # only append log at last line if runner.rank == 0: with open(self.json_log_path, 'a+') as f: mmcv.dump(json_log, f, file_format='json') f.write('\n') def _round_float(self, items): if isinstance(items, list): return [self._round_float(item) for item in items] elif isinstance(items, float): return round(items, 5) else: return items def log(self, runner): if 'eval_iter_num' in runner.log_buffer.meters: #output # this doesn't modify runner.iter and is regardless of by_epoch cur_iter = runner.log_buffer.meters.pop('eval_iter_num') #output else: cur_iter = self.get_iter(runner, inner_iter=True) log_dict = OrderedDict( mode=self.get_mode(runner), epoch=self.get_epoch(runner), iter=cur_iter) # only record lr of the first param group cur_lr = runner.current_lr() if isinstance(cur_lr, list): log_dict['lr'] = cur_lr[0] else: assert isinstance(cur_lr, dict) log_dict['lr'] = {} for k, lr_ in cur_lr.items(): assert isinstance(lr_, list) log_dict['lr'].update({k: lr_[0]}) if 'time' in runner.log_buffer.meters:#output # statistic memory if torch.cuda.is_available(): log_dict['memory'] = self._get_max_memory(runner) runner.metrics = {k: meter.avg for k, meter in runner.log_buffer.meters.items()} log_dict = dict(log_dict, **runner.metrics) #output self._log_info(log_dict, runner) self._dump_log(log_dict, runner) return log_dict def after_run(self, runner): # copy or upload logs to self.out_dir if self.out_dir is not None: for filename in scandir(runner.work_dir, self.out_suffix, True): local_filepath = osp.join(runner.work_dir, filename) out_filepath = self.file_client.join_path( self.out_dir, filename) with open(local_filepath, 'r') as f: self.file_client.put_text(f.read(), out_filepath) print_log( (f'The file {local_filepath} has been uploaded to ' f'{out_filepath}.'), logger=runner.logger) if not self.keep_local: os.remove(local_filepath) print_log( (f'{local_filepath} was removed due to the ' '`self.keep_local=False`'), logger=runner.logger) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/logger/wandb.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import os.path as osp from mmcv.utils import scandir from ...dist_utils import master_only from ..hook import HOOKS from .base import LoggerHook @HOOKS.register_module() class WandbLoggerHook(LoggerHook): """Class to log metrics with wandb. It requires `wandb`_ to be installed. Args: init_kwargs (dict): A dict contains the initialization keys. Check https://docs.wandb.ai/ref/python/init for more init arguments. interval (int): Logging interval (every k iterations). Default 10. ignore_last (bool): Ignore the log of last iterations in each epoch if less than `interval`. Default: True. reset_flag (bool): Whether to clear the output buffer after logging. Default: False. commit (bool): Save the metrics dict to the wandb server and increment the step. If false ``wandb.log`` just updates the current metrics dict with the row argument and metrics won't be saved until ``wandb.log`` is called with ``commit=True``. Default: True. by_epoch (bool): Whether EpochBasedRunner is used. Default: True. with_step (bool): If True, the step will be logged from ``self.get_iters``. Otherwise, step will not be logged. Default: True. log_artifact (bool): If True, artifacts in {work_dir} will be uploaded to wandb after training ends. Default: True `New in version 1.4.3.` out_suffix (str or tuple[str], optional): Those filenames ending with ``out_suffix`` will be uploaded to wandb. Default: ('.log.json', '.log', '.py'). `New in version 1.4.3.` .. _wandb: https://docs.wandb.ai """ def __init__(self, init_kwargs=None, interval=10, ignore_last=True, reset_flag=False, commit=True, by_epoch=True, with_step=True, log_artifact=True, out_suffix=('.log.json', '.log', '.py')): super(WandbLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) self.import_wandb() self.init_kwargs = init_kwargs self.commit = commit self.with_step = with_step self.log_artifact = log_artifact self.out_suffix = out_suffix def import_wandb(self): try: import wandb except ImportError: raise ImportError( 'Please run "pip install wandb" to install wandb') self.wandb = wandb @master_only def before_run(self, runner): super(WandbLoggerHook, self).before_run(runner) if self.wandb is None: self.import_wandb() if self.init_kwargs: self.wandb.init(**self.init_kwargs) else: self.wandb.init() @master_only def log(self, runner): tags = self.get_loggable_tags(runner) if tags: if self.with_step: self.wandb.log( tags, step=self.get_iter(runner), commit=self.commit) else: tags['global_step'] = self.get_iter(runner) self.wandb.log(tags, commit=self.commit) @master_only def after_run(self, runner): if self.log_artifact: wandb_artifact = self.wandb.Artifact( name='artifacts', type='model') for filename in scandir(runner.work_dir, self.out_suffix, True): local_filepath = osp.join(runner.work_dir, filename) wandb_artifact.add_file(local_filepath) self.wandb.log_artifact(wandb_artifact) self.wandb.join() ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/lr_updater.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import numbers from math import cos, pi import mmcv from .hook import HOOKS, Hook class LrUpdaterHook(Hook): """LR Scheduler in MMCV. Args: by_epoch (bool): LR changes epoch by epoch warmup (string): Type of warmup used. It can be None(use no warmup), 'constant', 'linear' or 'exp' warmup_iters (int): The number of iterations or epochs that warmup lasts warmup_ratio (float): LR used at the beginning of warmup equals to warmup_ratio * initial_lr warmup_by_epoch (bool): When warmup_by_epoch == True, warmup_iters means the number of epochs that warmup lasts, otherwise means the number of iteration that warmup lasts """ def __init__(self, by_epoch=True, warmup=None, warmup_iters=0, warmup_ratio=0.1, warmup_by_epoch=False): # validate the "warmup" argument if warmup is not None: if warmup not in ['constant', 'linear', 'exp']: raise ValueError( f'"{warmup}" is not a supported type for warming up, valid' ' types are "constant" and "linear"') if warmup is not None: assert warmup_iters > 0, \ '"warmup_iters" must be a positive integer' assert 0 < warmup_ratio <= 1.0, \ '"warmup_ratio" must be in range (0,1]' self.by_epoch = by_epoch self.warmup = warmup self.warmup_iters = warmup_iters self.warmup_ratio = warmup_ratio self.warmup_by_epoch = warmup_by_epoch if self.warmup_by_epoch: self.warmup_epochs = self.warmup_iters self.warmup_iters = None else: self.warmup_epochs = None self.base_lr = [] # initial lr for all param groups self.regular_lr = [] # expected lr if no warming up is performed def _set_lr(self, runner, lr_groups): if isinstance(runner.optimizer, dict): for k, optim in runner.optimizer.items(): for param_group, lr in zip(optim.param_groups, lr_groups[k]): param_group['lr'] = lr else: for param_group, lr in zip(runner.optimizer.param_groups, lr_groups): param_group['lr'] = lr def get_lr(self, runner, base_lr): raise NotImplementedError def get_regular_lr(self, runner): if isinstance(runner.optimizer, dict): lr_groups = {} for k in runner.optimizer.keys(): _lr_group = [ self.get_lr(runner, _base_lr) for _base_lr in self.base_lr[k] ] lr_groups.update({k: _lr_group}) return lr_groups else: return [self.get_lr(runner, _base_lr) for _base_lr in self.base_lr] def get_warmup_lr(self, cur_iters): def _get_warmup_lr(cur_iters, regular_lr): if self.warmup == 'constant': warmup_lr = [_lr * self.warmup_ratio for _lr in regular_lr] elif self.warmup == 'linear': k = (1 - cur_iters / self.warmup_iters) * (1 - self.warmup_ratio) warmup_lr = [_lr * (1 - k) for _lr in regular_lr] elif self.warmup == 'exp': k = self.warmup_ratio**(1 - cur_iters / self.warmup_iters) warmup_lr = [_lr * k for _lr in regular_lr] return warmup_lr if isinstance(self.regular_lr, dict): lr_groups = {} for key, regular_lr in self.regular_lr.items(): lr_groups[key] = _get_warmup_lr(cur_iters, regular_lr) return lr_groups else: return _get_warmup_lr(cur_iters, self.regular_lr) def before_run(self, runner): # NOTE: when resuming from a checkpoint, if 'initial_lr' is not saved, # it will be set according to the optimizer params if isinstance(runner.optimizer, dict): self.base_lr = {} for k, optim in runner.optimizer.items(): for group in optim.param_groups: group.setdefault('initial_lr', group['lr']) _base_lr = [ group['initial_lr'] for group in optim.param_groups ] self.base_lr.update({k: _base_lr}) else: for group in runner.optimizer.param_groups: group.setdefault('initial_lr', group['lr']) self.base_lr = [ group['initial_lr'] for group in runner.optimizer.param_groups ] def before_train_epoch(self, runner): if self.warmup_iters is None: epoch_len = len(runner.data_loader) self.warmup_iters = self.warmup_epochs * epoch_len if not self.by_epoch: return self.regular_lr = self.get_regular_lr(runner) self._set_lr(runner, self.regular_lr) def before_train_iter(self, runner): cur_iter = runner.iter if not self.by_epoch: self.regular_lr = self.get_regular_lr(runner) if self.warmup is None or cur_iter >= self.warmup_iters: self._set_lr(runner, self.regular_lr) else: warmup_lr = self.get_warmup_lr(cur_iter) self._set_lr(runner, warmup_lr) elif self.by_epoch: if self.warmup is None or cur_iter > self.warmup_iters: return elif cur_iter == self.warmup_iters: self._set_lr(runner, self.regular_lr) else: warmup_lr = self.get_warmup_lr(cur_iter) self._set_lr(runner, warmup_lr) @HOOKS.register_module() class FixedLrUpdaterHook(LrUpdaterHook): def __init__(self, **kwargs): super(FixedLrUpdaterHook, self).__init__(**kwargs) def get_lr(self, runner, base_lr): return base_lr @HOOKS.register_module() class StepLrUpdaterHook(LrUpdaterHook): """Step LR scheduler with min_lr clipping. Args: step (int | list[int]): Step to decay the LR. If an int value is given, regard it as the decay interval. If a list is given, decay LR at these steps. gamma (float, optional): Decay LR ratio. Default: 0.1. min_lr (float, optional): Minimum LR value to keep. If LR after decay is lower than `min_lr`, it will be clipped to this value. If None is given, we don't perform lr clipping. Default: None. """ def __init__(self, step, gamma=0.1, min_lr=None, **kwargs): if isinstance(step, list): assert mmcv.is_list_of(step, int) assert all([s > 0 for s in step]) elif isinstance(step, int): assert step > 0 else: raise TypeError('"step" must be a list or integer') self.step = step self.gamma = gamma self.min_lr = min_lr super(StepLrUpdaterHook, self).__init__(**kwargs) def get_lr(self, runner, base_lr): progress = runner.epoch if self.by_epoch else runner.iter # calculate exponential term if isinstance(self.step, int): exp = progress // self.step else: exp = len(self.step) for i, s in enumerate(self.step): if progress < s: exp = i break lr = base_lr * (self.gamma**exp) if self.min_lr is not None: # clip to a minimum value lr = max(lr, self.min_lr) return lr @HOOKS.register_module() class ExpLrUpdaterHook(LrUpdaterHook): def __init__(self, gamma, **kwargs): self.gamma = gamma super(ExpLrUpdaterHook, self).__init__(**kwargs) def get_lr(self, runner, base_lr): progress = runner.epoch if self.by_epoch else runner.iter return base_lr * self.gamma**progress @HOOKS.register_module() class PolyLrUpdaterHook(LrUpdaterHook): def __init__(self, power=1., min_lr=0., **kwargs): self.power = power self.min_lr = min_lr super(PolyLrUpdaterHook, self).__init__(**kwargs) def get_lr(self, runner, base_lr): if self.by_epoch: progress = runner.epoch max_progress = runner.max_epochs else: progress = runner.iter max_progress = runner.max_iters coeff = (1 - progress / max_progress)**self.power return (base_lr - self.min_lr) * coeff + self.min_lr @HOOKS.register_module() class InvLrUpdaterHook(LrUpdaterHook): def __init__(self, gamma, power=1., **kwargs): self.gamma = gamma self.power = power super(InvLrUpdaterHook, self).__init__(**kwargs) def get_lr(self, runner, base_lr): progress = runner.epoch if self.by_epoch else runner.iter return base_lr * (1 + self.gamma * progress)**(-self.power) @HOOKS.register_module() class CosineAnnealingLrUpdaterHook(LrUpdaterHook): def __init__(self, min_lr=None, min_lr_ratio=None, **kwargs): assert (min_lr is None) ^ (min_lr_ratio is None) self.min_lr = min_lr self.min_lr_ratio = min_lr_ratio super(CosineAnnealingLrUpdaterHook, self).__init__(**kwargs) def get_lr(self, runner, base_lr): if self.by_epoch: progress = runner.epoch max_progress = runner.max_epochs else: progress = runner.iter max_progress = runner.max_iters if self.min_lr_ratio is not None: target_lr = base_lr * self.min_lr_ratio else: target_lr = self.min_lr return annealing_cos(base_lr, target_lr, progress / max_progress) @HOOKS.register_module() class FlatCosineAnnealingLrUpdaterHook(LrUpdaterHook): """Flat + Cosine lr schedule. Modified from https://github.com/fastai/fastai/blob/master/fastai/callback/schedule.py#L128 # noqa: E501 Args: start_percent (float): When to start annealing the learning rate after the percentage of the total training steps. The value should be in range [0, 1). Default: 0.75 min_lr (float, optional): The minimum lr. Default: None. min_lr_ratio (float, optional): The ratio of minimum lr to the base lr. Either `min_lr` or `min_lr_ratio` should be specified. Default: None. """ def __init__(self, start_percent=0.75, min_lr=None, min_lr_ratio=None, **kwargs): assert (min_lr is None) ^ (min_lr_ratio is None) if start_percent < 0 or start_percent > 1 or not isinstance( start_percent, float): raise ValueError( 'expected float between 0 and 1 start_percent, but ' f'got {start_percent}') self.start_percent = start_percent self.min_lr = min_lr self.min_lr_ratio = min_lr_ratio super(FlatCosineAnnealingLrUpdaterHook, self).__init__(**kwargs) def get_lr(self, runner, base_lr): if self.by_epoch: start = round(runner.max_epochs * self.start_percent) progress = runner.epoch - start max_progress = runner.max_epochs - start else: start = round(runner.max_iters * self.start_percent) progress = runner.iter - start max_progress = runner.max_iters - start if self.min_lr_ratio is not None: target_lr = base_lr * self.min_lr_ratio else: target_lr = self.min_lr if progress < 0: return base_lr else: return annealing_cos(base_lr, target_lr, progress / max_progress) @HOOKS.register_module() class CosineRestartLrUpdaterHook(LrUpdaterHook): """Cosine annealing with restarts learning rate scheme. Args: periods (list[int]): Periods for each cosine anneling cycle. restart_weights (list[float], optional): Restart weights at each restart iteration. Default: [1]. min_lr (float, optional): The minimum lr. Default: None. min_lr_ratio (float, optional): The ratio of minimum lr to the base lr. Either `min_lr` or `min_lr_ratio` should be specified. Default: None. """ def __init__(self, periods, restart_weights=[1], min_lr=None, min_lr_ratio=None, **kwargs): assert (min_lr is None) ^ (min_lr_ratio is None) self.periods = periods self.min_lr = min_lr self.min_lr_ratio = min_lr_ratio self.restart_weights = restart_weights assert (len(self.periods) == len(self.restart_weights) ), 'periods and restart_weights should have the same length.' super(CosineRestartLrUpdaterHook, self).__init__(**kwargs) self.cumulative_periods = [ sum(self.periods[0:i + 1]) for i in range(0, len(self.periods)) ] def get_lr(self, runner, base_lr): if self.by_epoch: progress = runner.epoch else: progress = runner.iter if self.min_lr_ratio is not None: target_lr = base_lr * self.min_lr_ratio else: target_lr = self.min_lr idx = get_position_from_periods(progress, self.cumulative_periods) current_weight = self.restart_weights[idx] nearest_restart = 0 if idx == 0 else self.cumulative_periods[idx - 1] current_periods = self.periods[idx] alpha = min((progress - nearest_restart) / current_periods, 1) return annealing_cos(base_lr, target_lr, alpha, current_weight) def get_position_from_periods(iteration, cumulative_periods): """Get the position from a period list. It will return the index of the right-closest number in the period list. For example, the cumulative_periods = [100, 200, 300, 400], if iteration == 50, return 0; if iteration == 210, return 2; if iteration == 300, return 3. Args: iteration (int): Current iteration. cumulative_periods (list[int]): Cumulative period list. Returns: int: The position of the right-closest number in the period list. """ for i, period in enumerate(cumulative_periods): if iteration < period: return i raise ValueError(f'Current iteration {iteration} exceeds ' f'cumulative_periods {cumulative_periods}') @HOOKS.register_module() class CyclicLrUpdaterHook(LrUpdaterHook): """Cyclic LR Scheduler. Implement the cyclical learning rate policy (CLR) described in https://arxiv.org/pdf/1506.01186.pdf Different from the original paper, we use cosine annealing rather than triangular policy inside a cycle. This improves the performance in the 3D detection area. Args: by_epoch (bool, optional): Whether to update LR by epoch. target_ratio (tuple[float], optional): Relative ratio of the highest LR and the lowest LR to the initial LR. cyclic_times (int, optional): Number of cycles during training step_ratio_up (float, optional): The ratio of the increasing process of LR in the total cycle. anneal_strategy (str, optional): {'cos', 'linear'} Specifies the annealing strategy: 'cos' for cosine annealing, 'linear' for linear annealing. Default: 'cos'. gamma (float, optional): Cycle decay ratio. Default: 1. It takes values in the range (0, 1]. The difference between the maximum learning rate and the minimum learning rate decreases periodically when it is less than 1. `New in version 1.4.4.` """ def __init__(self, by_epoch=False, target_ratio=(10, 1e-4), cyclic_times=1, step_ratio_up=0.4, anneal_strategy='cos', gamma=1, **kwargs): if isinstance(target_ratio, float): target_ratio = (target_ratio, target_ratio / 1e5) elif isinstance(target_ratio, tuple): target_ratio = (target_ratio[0], target_ratio[0] / 1e5) \ if len(target_ratio) == 1 else target_ratio else: raise ValueError('target_ratio should be either float ' f'or tuple, got {type(target_ratio)}') assert len(target_ratio) == 2, \ '"target_ratio" must be list or tuple of two floats' assert 0 <= step_ratio_up < 1.0, \ '"step_ratio_up" must be in range [0,1)' assert 0 < gamma <= 1, \ '"gamma" must be in range (0, 1]' self.target_ratio = target_ratio self.cyclic_times = cyclic_times self.step_ratio_up = step_ratio_up self.gamma = gamma self.max_iter_per_phase = None self.lr_phases = [] # init lr_phases # validate anneal_strategy if anneal_strategy not in ['cos', 'linear']: raise ValueError('anneal_strategy must be one of "cos" or ' f'"linear", instead got {anneal_strategy}') elif anneal_strategy == 'cos': self.anneal_func = annealing_cos elif anneal_strategy == 'linear': self.anneal_func = annealing_linear assert not by_epoch, \ 'currently only support "by_epoch" = False' super(CyclicLrUpdaterHook, self).__init__(by_epoch, **kwargs) def before_run(self, runner): super(CyclicLrUpdaterHook, self).before_run(runner) # initiate lr_phases # total lr_phases are separated as up and down self.max_iter_per_phase = runner.max_iters // self.cyclic_times iter_up_phase = int(self.step_ratio_up * self.max_iter_per_phase) self.lr_phases.append([0, iter_up_phase, 1, self.target_ratio[0]]) self.lr_phases.append([ iter_up_phase, self.max_iter_per_phase, self.target_ratio[0], self.target_ratio[1] ]) def get_lr(self, runner, base_lr): curr_iter = runner.iter % self.max_iter_per_phase curr_cycle = runner.iter // self.max_iter_per_phase # Update weight decay scale = self.gamma**curr_cycle for (start_iter, end_iter, start_ratio, end_ratio) in self.lr_phases: if start_iter <= curr_iter < end_iter: # Apply cycle scaling to gradually reduce the difference # between max_lr and base lr. The target end_ratio can be # expressed as: # end_ratio = (base_lr + scale * (max_lr - base_lr)) / base_lr # iteration: 0-iter_up_phase: if start_iter == 0: end_ratio = 1 - scale + end_ratio * scale # iteration: iter_up_phase-self.max_iter_per_phase else: start_ratio = 1 - scale + start_ratio * scale progress = curr_iter - start_iter return self.anneal_func(base_lr * start_ratio, base_lr * end_ratio, progress / (end_iter - start_iter)) @HOOKS.register_module() class OneCycleLrUpdaterHook(LrUpdaterHook): """One Cycle LR Scheduler. The 1cycle learning rate policy changes the learning rate after every batch. The one cycle learning rate policy is described in https://arxiv.org/pdf/1708.07120.pdf Args: max_lr (float or list): Upper learning rate boundaries in the cycle for each parameter group. total_steps (int, optional): The total number of steps in the cycle. Note that if a value is not provided here, it will be the max_iter of runner. Default: None. pct_start (float): The percentage of the cycle (in number of steps) spent increasing the learning rate. Default: 0.3 anneal_strategy (str): {'cos', 'linear'} Specifies the annealing strategy: 'cos' for cosine annealing, 'linear' for linear annealing. Default: 'cos' div_factor (float): Determines the initial learning rate via initial_lr = max_lr/div_factor Default: 25 final_div_factor (float): Determines the minimum learning rate via min_lr = initial_lr/final_div_factor Default: 1e4 three_phase (bool): If three_phase is True, use a third phase of the schedule to annihilate the learning rate according to final_div_factor instead of modifying the second phase (the first two phases will be symmetrical about the step indicated by pct_start). Default: False """ def __init__(self, max_lr, total_steps=None, pct_start=0.3, anneal_strategy='cos', div_factor=25, final_div_factor=1e4, three_phase=False, **kwargs): # validate by_epoch, currently only support by_epoch = False if 'by_epoch' not in kwargs: kwargs['by_epoch'] = False else: assert not kwargs['by_epoch'], \ 'currently only support "by_epoch" = False' if not isinstance(max_lr, (numbers.Number, list, dict)): raise ValueError('the type of max_lr must be the one of list or ' f'dict, but got {type(max_lr)}') self._max_lr = max_lr if total_steps is not None: if not isinstance(total_steps, int): raise ValueError('the type of total_steps must be int, but' f'got {type(total_steps)}') self.total_steps = total_steps # validate pct_start if pct_start < 0 or pct_start > 1 or not isinstance(pct_start, float): raise ValueError('expected float between 0 and 1 pct_start, but ' f'got {pct_start}') self.pct_start = pct_start # validate anneal_strategy if anneal_strategy not in ['cos', 'linear']: raise ValueError('anneal_strategy must be one of "cos" or ' f'"linear", instead got {anneal_strategy}') elif anneal_strategy == 'cos': self.anneal_func = annealing_cos elif anneal_strategy == 'linear': self.anneal_func = annealing_linear self.div_factor = div_factor self.final_div_factor = final_div_factor self.three_phase = three_phase self.lr_phases = [] # init lr_phases super(OneCycleLrUpdaterHook, self).__init__(**kwargs) def before_run(self, runner): if hasattr(self, 'total_steps'): total_steps = self.total_steps else: total_steps = runner.max_iters if total_steps < runner.max_iters: raise ValueError( 'The total steps must be greater than or equal to max ' f'iterations {runner.max_iters} of runner, but total steps ' f'is {total_steps}.') if isinstance(runner.optimizer, dict): self.base_lr = {} for k, optim in runner.optimizer.items(): _max_lr = format_param(k, optim, self._max_lr) self.base_lr[k] = [lr / self.div_factor for lr in _max_lr] for group, lr in zip(optim.param_groups, self.base_lr[k]): group.setdefault('initial_lr', lr) else: k = type(runner.optimizer).__name__ _max_lr = format_param(k, runner.optimizer, self._max_lr) self.base_lr = [lr / self.div_factor for lr in _max_lr] for group, lr in zip(runner.optimizer.param_groups, self.base_lr): group.setdefault('initial_lr', lr) if self.three_phase: self.lr_phases.append( [float(self.pct_start * total_steps) - 1, 1, self.div_factor]) self.lr_phases.append([ float(2 * self.pct_start * total_steps) - 2, self.div_factor, 1 ]) self.lr_phases.append( [total_steps - 1, 1, 1 / self.final_div_factor]) else: self.lr_phases.append( [float(self.pct_start * total_steps) - 1, 1, self.div_factor]) self.lr_phases.append( [total_steps - 1, self.div_factor, 1 / self.final_div_factor]) def get_lr(self, runner, base_lr): curr_iter = runner.iter start_iter = 0 for i, (end_iter, start_lr, end_lr) in enumerate(self.lr_phases): if curr_iter <= end_iter: pct = (curr_iter - start_iter) / (end_iter - start_iter) lr = self.anneal_func(base_lr * start_lr, base_lr * end_lr, pct) break start_iter = end_iter return lr def annealing_cos(start, end, factor, weight=1): """Calculate annealing cos learning rate. Cosine anneal from `weight * start + (1 - weight) * end` to `end` as percentage goes from 0.0 to 1.0. Args: start (float): The starting learning rate of the cosine annealing. end (float): The ending learing rate of the cosine annealing. factor (float): The coefficient of `pi` when calculating the current percentage. Range from 0.0 to 1.0. weight (float, optional): The combination factor of `start` and `end` when calculating the actual starting learning rate. Default to 1. """ cos_out = cos(pi * factor) + 1 return end + 0.5 * weight * (start - end) * cos_out def annealing_linear(start, end, factor): """Calculate annealing linear learning rate. Linear anneal from `start` to `end` as percentage goes from 0.0 to 1.0. Args: start (float): The starting learning rate of the linear annealing. end (float): The ending learing rate of the linear annealing. factor (float): The coefficient of `pi` when calculating the current percentage. Range from 0.0 to 1.0. """ return start + (end - start) * factor def format_param(name, optim, param): if isinstance(param, numbers.Number): return [param] * len(optim.param_groups) elif isinstance(param, (list, tuple)): # multi param groups if len(param) != len(optim.param_groups): raise ValueError(f'expected {len(optim.param_groups)} ' f'values for {name}, got {len(param)}') return param else: # multi optimizers if name not in param: raise KeyError(f'{name} is not found in {param.keys()}') return param[name] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/memory.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import torch from .hook import HOOKS, Hook @HOOKS.register_module() class EmptyCacheHook(Hook): def __init__(self, before_epoch=False, after_epoch=True, after_iter=False): self._before_epoch = before_epoch self._after_epoch = after_epoch self._after_iter = after_iter def after_iter(self, runner): if self._after_iter: torch.cuda.empty_cache() def before_epoch(self, runner): if self._before_epoch: torch.cuda.empty_cache() def after_epoch(self, runner): if self._after_epoch: torch.cuda.empty_cache() ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/momentum_updater.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import mmcv from .hook import HOOKS, Hook from .lr_updater import annealing_cos, annealing_linear, format_param class MomentumUpdaterHook(Hook): def __init__(self, by_epoch=True, warmup=None, warmup_iters=0, warmup_ratio=0.9): # validate the "warmup" argument if warmup is not None: if warmup not in ['constant', 'linear', 'exp']: raise ValueError( f'"{warmup}" is not a supported type for warming up, valid' ' types are "constant" and "linear"') if warmup is not None: assert warmup_iters > 0, \ '"warmup_iters" must be a positive integer' assert 0 < warmup_ratio <= 1.0, \ '"warmup_momentum" must be in range (0,1]' self.by_epoch = by_epoch self.warmup = warmup self.warmup_iters = warmup_iters self.warmup_ratio = warmup_ratio self.base_momentum = [] # initial momentum for all param groups self.regular_momentum = [ ] # expected momentum if no warming up is performed def _set_momentum(self, runner, momentum_groups): if isinstance(runner.optimizer, dict): for k, optim in runner.optimizer.items(): for param_group, mom in zip(optim.param_groups, momentum_groups[k]): if 'momentum' in param_group.keys(): param_group['momentum'] = mom elif 'betas' in param_group.keys(): param_group['betas'] = (mom, param_group['betas'][1]) else: for param_group, mom in zip(runner.optimizer.param_groups, momentum_groups): if 'momentum' in param_group.keys(): param_group['momentum'] = mom elif 'betas' in param_group.keys(): param_group['betas'] = (mom, param_group['betas'][1]) def get_momentum(self, runner, base_momentum): raise NotImplementedError def get_regular_momentum(self, runner): if isinstance(runner.optimizer, dict): momentum_groups = {} for k in runner.optimizer.keys(): _momentum_group = [ self.get_momentum(runner, _base_momentum) for _base_momentum in self.base_momentum[k] ] momentum_groups.update({k: _momentum_group}) return momentum_groups else: return [ self.get_momentum(runner, _base_momentum) for _base_momentum in self.base_momentum ] def get_warmup_momentum(self, cur_iters): def _get_warmup_momentum(cur_iters, regular_momentum): if self.warmup == 'constant': warmup_momentum = [ _momentum / self.warmup_ratio for _momentum in regular_momentum ] elif self.warmup == 'linear': k = (1 - cur_iters / self.warmup_iters) * (1 - self.warmup_ratio) warmup_momentum = [ _momentum / (1 - k) for _momentum in regular_momentum ] elif self.warmup == 'exp': k = self.warmup_ratio**(1 - cur_iters / self.warmup_iters) warmup_momentum = [ _momentum / k for _momentum in regular_momentum ] return warmup_momentum if isinstance(self.regular_momentum, dict): momentum_groups = {} for key, regular_momentum in self.regular_momentum.items(): momentum_groups[key] = _get_warmup_momentum( cur_iters, regular_momentum) return momentum_groups else: return _get_warmup_momentum(cur_iters, self.regular_momentum) def before_run(self, runner): # NOTE: when resuming from a checkpoint, # if 'initial_momentum' is not saved, # it will be set according to the optimizer params if isinstance(runner.optimizer, dict): self.base_momentum = {} for k, optim in runner.optimizer.items(): for group in optim.param_groups: if 'momentum' in group.keys(): group.setdefault('initial_momentum', group['momentum']) else: group.setdefault('initial_momentum', group['betas'][0]) _base_momentum = [ group['initial_momentum'] for group in optim.param_groups ] self.base_momentum.update({k: _base_momentum}) else: for group in runner.optimizer.param_groups: if 'momentum' in group.keys(): group.setdefault('initial_momentum', group['momentum']) else: group.setdefault('initial_momentum', group['betas'][0]) self.base_momentum = [ group['initial_momentum'] for group in runner.optimizer.param_groups ] def before_train_epoch(self, runner): if not self.by_epoch: return self.regular_momentum = self.get_regular_momentum(runner) self._set_momentum(runner, self.regular_momentum) def before_train_iter(self, runner): cur_iter = runner.iter if not self.by_epoch: self.regular_momentum = self.get_regular_momentum(runner) if self.warmup is None or cur_iter >= self.warmup_iters: self._set_momentum(runner, self.regular_momentum) else: warmup_momentum = self.get_warmup_momentum(cur_iter) self._set_momentum(runner, warmup_momentum) elif self.by_epoch: if self.warmup is None or cur_iter > self.warmup_iters: return elif cur_iter == self.warmup_iters: self._set_momentum(runner, self.regular_momentum) else: warmup_momentum = self.get_warmup_momentum(cur_iter) self._set_momentum(runner, warmup_momentum) @HOOKS.register_module() class StepMomentumUpdaterHook(MomentumUpdaterHook): """Step momentum scheduler with min value clipping. Args: step (int | list[int]): Step to decay the momentum. If an int value is given, regard it as the decay interval. If a list is given, decay momentum at these steps. gamma (float, optional): Decay momentum ratio. Default: 0.5. min_momentum (float, optional): Minimum momentum value to keep. If momentum after decay is lower than this value, it will be clipped accordingly. If None is given, we don't perform lr clipping. Default: None. """ def __init__(self, step, gamma=0.5, min_momentum=None, **kwargs): if isinstance(step, list): assert mmcv.is_list_of(step, int) assert all([s > 0 for s in step]) elif isinstance(step, int): assert step > 0 else: raise TypeError('"step" must be a list or integer') self.step = step self.gamma = gamma self.min_momentum = min_momentum super(StepMomentumUpdaterHook, self).__init__(**kwargs) def get_momentum(self, runner, base_momentum): progress = runner.epoch if self.by_epoch else runner.iter # calculate exponential term if isinstance(self.step, int): exp = progress // self.step else: exp = len(self.step) for i, s in enumerate(self.step): if progress < s: exp = i break momentum = base_momentum * (self.gamma**exp) if self.min_momentum is not None: # clip to a minimum value momentum = max(momentum, self.min_momentum) return momentum @HOOKS.register_module() class CosineAnnealingMomentumUpdaterHook(MomentumUpdaterHook): def __init__(self, min_momentum=None, min_momentum_ratio=None, **kwargs): assert (min_momentum is None) ^ (min_momentum_ratio is None) self.min_momentum = min_momentum self.min_momentum_ratio = min_momentum_ratio super(CosineAnnealingMomentumUpdaterHook, self).__init__(**kwargs) def get_momentum(self, runner, base_momentum): if self.by_epoch: progress = runner.epoch max_progress = runner.max_epochs else: progress = runner.iter max_progress = runner.max_iters if self.min_momentum_ratio is not None: target_momentum = base_momentum * self.min_momentum_ratio else: target_momentum = self.min_momentum return annealing_cos(base_momentum, target_momentum, progress / max_progress) @HOOKS.register_module() class CyclicMomentumUpdaterHook(MomentumUpdaterHook): """Cyclic momentum Scheduler. Implement the cyclical momentum scheduler policy described in https://arxiv.org/pdf/1708.07120.pdf This momentum scheduler usually used together with the CyclicLRUpdater to improve the performance in the 3D detection area. Args: target_ratio (tuple[float]): Relative ratio of the lowest momentum and the highest momentum to the initial momentum. cyclic_times (int): Number of cycles during training step_ratio_up (float): The ratio of the increasing process of momentum in the total cycle. by_epoch (bool): Whether to update momentum by epoch. anneal_strategy (str, optional): {'cos', 'linear'} Specifies the annealing strategy: 'cos' for cosine annealing, 'linear' for linear annealing. Default: 'cos'. gamma (float, optional): Cycle decay ratio. Default: 1. It takes values in the range (0, 1]. The difference between the maximum learning rate and the minimum learning rate decreases periodically when it is less than 1. `New in version 1.4.4.` """ def __init__(self, by_epoch=False, target_ratio=(0.85 / 0.95, 1), cyclic_times=1, step_ratio_up=0.4, anneal_strategy='cos', gamma=1, **kwargs): if isinstance(target_ratio, float): target_ratio = (target_ratio, target_ratio / 1e5) elif isinstance(target_ratio, tuple): target_ratio = (target_ratio[0], target_ratio[0] / 1e5) \ if len(target_ratio) == 1 else target_ratio else: raise ValueError('target_ratio should be either float ' f'or tuple, got {type(target_ratio)}') assert len(target_ratio) == 2, \ '"target_ratio" must be list or tuple of two floats' assert 0 <= step_ratio_up < 1.0, \ '"step_ratio_up" must be in range [0,1)' self.target_ratio = target_ratio self.cyclic_times = cyclic_times self.step_ratio_up = step_ratio_up self.gamma = gamma self.momentum_phases = [] # init momentum_phases if anneal_strategy not in ['cos', 'linear']: raise ValueError('anneal_strategy must be one of "cos" or ' f'"linear", instead got {anneal_strategy}') elif anneal_strategy == 'cos': self.anneal_func = annealing_cos elif anneal_strategy == 'linear': self.anneal_func = annealing_linear # currently only support by_epoch=False assert not by_epoch, \ 'currently only support "by_epoch" = False' super(CyclicMomentumUpdaterHook, self).__init__(by_epoch, **kwargs) def before_run(self, runner): super(CyclicMomentumUpdaterHook, self).before_run(runner) # initiate momentum_phases # total momentum_phases are separated as up and down max_iter_per_phase = runner.max_iters // self.cyclic_times iter_up_phase = int(self.step_ratio_up * max_iter_per_phase) self.max_iter_per_phase = max_iter_per_phase self.momentum_phases.append( [0, iter_up_phase, 1, self.target_ratio[0]]) self.momentum_phases.append([ iter_up_phase, max_iter_per_phase, self.target_ratio[0], self.target_ratio[1] ]) def get_momentum(self, runner, base_momentum): curr_iter = runner.iter % self.max_iter_per_phase curr_cycle = runner.iter // self.max_iter_per_phase scale = self.gamma**curr_cycle for (start_iter, end_iter, start_ratio, end_ratio) \ in self.momentum_phases: if start_iter <= curr_iter < end_iter: # Apply cycle scaling to gradually reduce the difference # between max_momentum and base momentum. The target end_ratio # can be expressed as: # end_ratio = (base_momentum + scale * \ # (max_momentum - base_momentum)) / base_momentum # iteration: 0-iter_up_phase: if start_iter == 0: end_ratio = 1 - scale + end_ratio * scale # iteration: iter_up_phase-self.max_iter_per_phase else: start_ratio = 1 - scale + start_ratio * scale progress = curr_iter - start_iter return self.anneal_func(base_momentum * start_ratio, base_momentum * end_ratio, progress / (end_iter - start_iter)) @HOOKS.register_module() class OneCycleMomentumUpdaterHook(MomentumUpdaterHook): """OneCycle momentum Scheduler. This momentum scheduler usually used together with the OneCycleLrUpdater to improve the performance. Args: base_momentum (float or list): Lower momentum boundaries in the cycle for each parameter group. Note that momentum is cycled inversely to learning rate; at the peak of a cycle, momentum is 'base_momentum' and learning rate is 'max_lr'. Default: 0.85 max_momentum (float or list): Upper momentum boundaries in the cycle for each parameter group. Functionally, it defines the cycle amplitude (max_momentum - base_momentum). Note that momentum is cycled inversely to learning rate; at the start of a cycle, momentum is 'max_momentum' and learning rate is 'base_lr' Default: 0.95 pct_start (float): The percentage of the cycle (in number of steps) spent increasing the learning rate. Default: 0.3 anneal_strategy (str): {'cos', 'linear'} Specifies the annealing strategy: 'cos' for cosine annealing, 'linear' for linear annealing. Default: 'cos' three_phase (bool): If three_phase is True, use a third phase of the schedule to annihilate the learning rate according to final_div_factor instead of modifying the second phase (the first two phases will be symmetrical about the step indicated by pct_start). Default: False """ def __init__(self, base_momentum=0.85, max_momentum=0.95, pct_start=0.3, anneal_strategy='cos', three_phase=False, **kwargs): # validate by_epoch, currently only support by_epoch=False if 'by_epoch' not in kwargs: kwargs['by_epoch'] = False else: assert not kwargs['by_epoch'], \ 'currently only support "by_epoch" = False' if not isinstance(base_momentum, (float, list, dict)): raise ValueError('base_momentum must be the type among of float,' 'list or dict.') self._base_momentum = base_momentum if not isinstance(max_momentum, (float, list, dict)): raise ValueError('max_momentum must be the type among of float,' 'list or dict.') self._max_momentum = max_momentum # validate pct_start if pct_start < 0 or pct_start > 1 or not isinstance(pct_start, float): raise ValueError('Expected float between 0 and 1 pct_start, but ' f'got {pct_start}') self.pct_start = pct_start # validate anneal_strategy if anneal_strategy not in ['cos', 'linear']: raise ValueError('anneal_strategy must by one of "cos" or ' f'"linear", instead got {anneal_strategy}') elif anneal_strategy == 'cos': self.anneal_func = annealing_cos elif anneal_strategy == 'linear': self.anneal_func = annealing_linear self.three_phase = three_phase self.momentum_phases = [] # init momentum_phases super(OneCycleMomentumUpdaterHook, self).__init__(**kwargs) def before_run(self, runner): if isinstance(runner.optimizer, dict): for k, optim in runner.optimizer.items(): if ('momentum' not in optim.defaults and 'betas' not in optim.defaults): raise ValueError('optimizer must support momentum with' 'option enabled') self.use_beta1 = 'betas' in optim.defaults _base_momentum = format_param(k, optim, self._base_momentum) _max_momentum = format_param(k, optim, self._max_momentum) for group, b_momentum, m_momentum in zip( optim.param_groups, _base_momentum, _max_momentum): if self.use_beta1: _, beta2 = group['betas'] group['betas'] = (m_momentum, beta2) else: group['momentum'] = m_momentum group['base_momentum'] = b_momentum group['max_momentum'] = m_momentum else: optim = runner.optimizer if ('momentum' not in optim.defaults and 'betas' not in optim.defaults): raise ValueError('optimizer must support momentum with' 'option enabled') self.use_beta1 = 'betas' in optim.defaults k = type(optim).__name__ _base_momentum = format_param(k, optim, self._base_momentum) _max_momentum = format_param(k, optim, self._max_momentum) for group, b_momentum, m_momentum in zip(optim.param_groups, _base_momentum, _max_momentum): if self.use_beta1: _, beta2 = group['betas'] group['betas'] = (m_momentum, beta2) else: group['momentum'] = m_momentum group['base_momentum'] = b_momentum group['max_momentum'] = m_momentum if self.three_phase: self.momentum_phases.append({ 'end_iter': float(self.pct_start * runner.max_iters) - 1, 'start_momentum': 'max_momentum', 'end_momentum': 'base_momentum' }) self.momentum_phases.append({ 'end_iter': float(2 * self.pct_start * runner.max_iters) - 2, 'start_momentum': 'base_momentum', 'end_momentum': 'max_momentum' }) self.momentum_phases.append({ 'end_iter': runner.max_iters - 1, 'start_momentum': 'max_momentum', 'end_momentum': 'max_momentum' }) else: self.momentum_phases.append({ 'end_iter': float(self.pct_start * runner.max_iters) - 1, 'start_momentum': 'max_momentum', 'end_momentum': 'base_momentum' }) self.momentum_phases.append({ 'end_iter': runner.max_iters - 1, 'start_momentum': 'base_momentum', 'end_momentum': 'max_momentum' }) def _set_momentum(self, runner, momentum_groups): if isinstance(runner.optimizer, dict): for k, optim in runner.optimizer.items(): for param_group, mom in zip(optim.param_groups, momentum_groups[k]): if 'momentum' in param_group.keys(): param_group['momentum'] = mom elif 'betas' in param_group.keys(): param_group['betas'] = (mom, param_group['betas'][1]) else: for param_group, mom in zip(runner.optimizer.param_groups, momentum_groups): if 'momentum' in param_group.keys(): param_group['momentum'] = mom elif 'betas' in param_group.keys(): param_group['betas'] = (mom, param_group['betas'][1]) def get_momentum(self, runner, param_group): curr_iter = runner.iter start_iter = 0 for i, phase in enumerate(self.momentum_phases): end_iter = phase['end_iter'] if curr_iter <= end_iter or i == len(self.momentum_phases) - 1: pct = (curr_iter - start_iter) / (end_iter - start_iter) momentum = self.anneal_func( param_group[phase['start_momentum']], param_group[phase['end_momentum']], pct) break start_iter = end_iter return momentum def get_regular_momentum(self, runner): if isinstance(runner.optimizer, dict): momentum_groups = {} for k, optim in runner.optimizer.items(): _momentum_group = [ self.get_momentum(runner, param_group) for param_group in optim.param_groups ] momentum_groups.update({k: _momentum_group}) return momentum_groups else: momentum_groups = [] for param_group in runner.optimizer.param_groups: momentum_groups.append(self.get_momentum(runner, param_group)) return momentum_groups ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/nni_hook.py ================================================ from .hook import HOOKS, Hook @HOOKS.register_module() class NNIHook(Hook): def before_run(self, runner): if runner.opt_cfg['mode'] == "nni": import nni runner.logger = None self.nni = nni def after_train_epoch(self, runner): opt_cfg = runner.opt_cfg if opt_cfg['mode'] == 'nni': # stats = runner.outputs['log_vars'] stats = runner.metrics if len(runner.workflow) == 1 and runner.epoch == runner.max_epochs: self.nni.report_final_result({name: value for name, value in stats.items() if opt_cfg['metrics'] in name}) else: print("report_intermediate_result") metrics = {name: value for name, value in stats.items() if opt_cfg['metrics'] in name} self.nni.report_intermediate_result(metrics['loss']) def after_train_iter(self, runner): ... def before_val_iter(self, runner): ... def after_val_iter(self, runner): ... def after_val_epoch(self, runner): opt_cfg = runner.opt_cfg if opt_cfg['mode'] == 'nni': stats = runner.outputs if len(runner.workflow) != 1 and runner.epoch == runner.max_epochs: self.nni.report_final_result({name: value for name, value in stats.items() if opt_cfg['metrics'] in name}['loss']) else: self.nni.report_intermediate_result( {name: value for name, value in stats.items() if opt_cfg['metrics'] in name}) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/optimizer.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import copy import logging from collections import defaultdict from itertools import chain from torch.nn.utils import clip_grad from mmcv.utils import TORCH_VERSION, _BatchNorm, digit_version from ..dist_utils import allreduce_grads from ..fp16_utils import LossScaler, wrap_fp16_model from .hook import HOOKS, Hook from mmcv.runner.record import get_grad_norm try: # If PyTorch version >= 1.6.0, torch.cuda.amp.GradScaler would be imported # and used; otherwise, auto fp16 will adopt mmcv's implementation. from torch.cuda.amp import GradScaler except ImportError: pass @HOOKS.register_module() class OptimizerHook(Hook): """A hook contains custom operations for the optimizer. Args: grad_clip (dict, optional): A float to control the clip_grad. Default: None. (not a config dict) detect_anomalous_params (bool): This option is only used for debugging which will slow down the training speed. Detect anomalous parameters that are not included in the computational graph with `loss` as the root. There are two cases - Parameters were not used during forward pass. - Parameters were not used to produce loss. Default: False. """ def __init__(self, grad_clip=None, detect_anomalous_params=False): self.grad_clip = grad_clip self.detect_anomalous_params = detect_anomalous_params def clip_grads(self, params): # params = list( # filter(lambda p: p.requires_grad and p.grad is not None, params)) params, grad_norm = get_grad_norm(params) if len(params) > 0 and self.grad_clip: # **self.grad_clip return clip_grad.clip_grad_norm_(params, self.grad_clip) else: return grad_norm def after_train_iter(self, runner): if isinstance(runner.optimizer, dict): if isinstance(runner.model, dict): for name, m in runner.model.items(): grad_norm = self.clip_grads(m.parameters()) # if grad_norm is not None: # Add grad norm to the logger # runner.outputs['num_samples'] 用于计算平均的,MetricLogger改进了log_buffer的几个计算,这里不需要了 runner.log_buffer.update_dict({f'{name}_grad_norm': float(grad_norm)}) for name, optim in runner.optimizer.items(): optim.zero_grad() if self.detect_anomalous_params: self.detect_anomalous_parameters(runner.outputs[f'{name}_loss'], runner) runner.outputs[f'{name}_loss'].backward() optim.step() else: runner.optimizer.zero_grad() if self.detect_anomalous_params: self.detect_anomalous_parameters(runner.outputs['loss'], runner) runner.outputs['loss'].backward() if not hasattr(runner.model, 'train'): grad_norm = self.clip_grads(runner.model.model.parameters()) else: grad_norm = self.clip_grads(runner.model.parameters()) # runner.outputs['num_samples'] 用于计算平均的,MetricLogger改进了log_buffer的几个计算,这里不需要了 runner.log_buffer.update_dict({'grad_norm': float(grad_norm)}) runner.optimizer.step() def detect_anomalous_parameters(self, loss, runner): logger = runner.logger parameters_in_graph = set() visited = set() def traverse(grad_fn): if grad_fn is None: return if grad_fn not in visited: visited.add(grad_fn) if hasattr(grad_fn, 'variable'): parameters_in_graph.add(grad_fn.variable) parents = grad_fn.next_functions if parents is not None: for parent in parents: grad_fn = parent[0] traverse(grad_fn) traverse(loss.grad_fn) for n, p in runner.model.named_parameters(): if p not in parameters_in_graph and p.requires_grad: logger.log( level=logging.ERROR, msg=f'{n} with shape {p.size()} is not ' f'in the computational graph \n') @HOOKS.register_module() class GradientCumulativeOptimizerHook(OptimizerHook): """Optimizer Hook implements multi-iters gradient cumulating. Args: cumulative_iters (int, optional): Num of gradient cumulative iters. The optimizer will step every `cumulative_iters` iters. Defaults to 1. Examples: >>> # Use cumulative_iters to simulate a large batch size >>> # It is helpful when the hardware cannot handle a large batch size. >>> loader = DataLoader(data, batch_size=64) >>> optim_hook = GradientCumulativeOptimizerHook(cumulative_iters=4) >>> # almost equals to >>> loader = DataLoader(data, batch_size=256) >>> optim_hook = OptimizerHook() """ def __init__(self, cumulative_iters=1, **kwargs): super(GradientCumulativeOptimizerHook, self).__init__(**kwargs) assert isinstance(cumulative_iters, int) and cumulative_iters > 0, \ f'cumulative_iters only accepts positive int, but got ' \ f'{type(cumulative_iters)} instead.' self.cumulative_iters = cumulative_iters self.divisible_iters = 0 self.remainder_iters = 0 self.initialized = False def has_batch_norm(self, module): if isinstance(module, _BatchNorm): return True for m in module.children(): if self.has_batch_norm(m): return True return False def _init(self, runner): if runner.iter % self.cumulative_iters != 0: runner.logger.warning( 'Resume iter number is not divisible by cumulative_iters in ' 'GradientCumulativeOptimizerHook, which means the gradient of ' 'some iters is lost and the result may be influenced slightly.' ) if self.has_batch_norm(runner.model) and self.cumulative_iters > 1: runner.logger.warning( 'GradientCumulativeOptimizerHook may slightly decrease ' 'performance if the model has BatchNorm layers.') residual_iters = runner.max_iters - runner.iter self.divisible_iters = ( residual_iters // self.cumulative_iters * self.cumulative_iters) self.remainder_iters = residual_iters - self.divisible_iters self.initialized = True def after_train_iter(self, runner): if not self.initialized: self._init(runner) if runner.iter < self.divisible_iters: loss_factor = self.cumulative_iters else: loss_factor = self.remainder_iters loss = runner.outputs['loss'] loss = loss / loss_factor loss.backward() if (self.every_n_iters(runner, self.cumulative_iters) or self.is_last_iter(runner)): if self.grad_clip is not None: grad_norm = self.clip_grads(runner.model.parameters()) if grad_norm is not None: # Add grad norm to the logger runner.log_buffer.update({'grad_norm': float(grad_norm)}, runner.outputs['num_samples']) runner.optimizer.step() runner.optimizer.zero_grad() if (TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) >= digit_version('1.6.0')): @HOOKS.register_module() class Fp16OptimizerHook(OptimizerHook): """FP16 optimizer hook (using PyTorch's implementation). If you are using PyTorch >= 1.6, torch.cuda.amp is used as the backend, to take care of the optimization procedure. Args: loss_scale (float | str | dict): Scale factor configuration. If loss_scale is a float, static loss scaling will be used with the specified scale. If loss_scale is a string, it must be 'dynamic', then dynamic loss scaling will be used. It can also be a dict containing arguments of GradScalar. Defaults to 512. For Pytorch >= 1.6, mmcv uses official implementation of GradScaler. If you use a dict version of loss_scale to create GradScaler, please refer to: https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler for the parameters. Examples: >>> loss_scale = dict( ... init_scale=65536.0, ... growth_factor=2.0, ... backoff_factor=0.5, ... growth_interval=2000 ... ) >>> optimizer_hook = Fp16OptimizerHook(loss_scale=loss_scale) """ def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1, loss_scale=512., distributed=True): self.grad_clip = grad_clip self.coalesce = coalesce self.bucket_size_mb = bucket_size_mb self.distributed = distributed self._scale_update_param = None if loss_scale == 'dynamic': self.loss_scaler = GradScaler() elif isinstance(loss_scale, float): self._scale_update_param = loss_scale self.loss_scaler = GradScaler(init_scale=loss_scale) elif isinstance(loss_scale, dict): self.loss_scaler = GradScaler(**loss_scale) else: raise ValueError('loss_scale must be of type float, dict, or ' f'"dynamic", got {loss_scale}') def before_run(self, runner): """Preparing steps before Mixed Precision Training.""" # wrap model mode to fp16 wrap_fp16_model(runner.model) # resume from state dict if 'fp16' in runner.meta and 'loss_scaler' in runner.meta['fp16']: scaler_state_dict = runner.meta['fp16']['loss_scaler'] self.loss_scaler.load_state_dict(scaler_state_dict) def copy_grads_to_fp32(self, fp16_net, fp32_weights): """Copy gradients from fp16 model to fp32 weight copy.""" for fp32_param, fp16_param in zip(fp32_weights, fp16_net.parameters()): if fp16_param.grad is not None: if fp32_param.grad is None: fp32_param.grad = fp32_param.data.new( fp32_param.size()) fp32_param.grad.copy_(fp16_param.grad) def copy_params_to_fp16(self, fp16_net, fp32_weights): """Copy updated params from fp32 weight copy to fp16 model.""" for fp16_param, fp32_param in zip(fp16_net.parameters(), fp32_weights): fp16_param.data.copy_(fp32_param.data) def after_train_iter(self, runner): """Backward optimization steps for Mixed Precision Training. For dynamic loss scaling, please refer to https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler. 1. Scale the loss by a scale factor. 2. Backward the loss to obtain the gradients. 3. Unscale the optimizer’s gradient tensors. 4. Call optimizer.step() and update scale factor. 5. Save loss_scaler state_dict for resume purpose. """ # clear grads of last iteration runner.model.zero_grad() runner.optimizer.zero_grad() self.loss_scaler.scale(runner.outputs['loss']).backward() self.loss_scaler.unscale_(runner.optimizer) # grad clip if self.grad_clip is not None: grad_norm = self.clip_grads(runner.model.parameters()) if grad_norm is not None: # Add grad norm to the logger runner.log_buffer.update({'grad_norm': float(grad_norm)}, runner.outputs['num_samples']) # backward and update scaler self.loss_scaler.step(runner.optimizer) self.loss_scaler.update(self._scale_update_param) # save state_dict of loss_scaler runner.meta.setdefault( 'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() @HOOKS.register_module() class GradientCumulativeFp16OptimizerHook(GradientCumulativeOptimizerHook, Fp16OptimizerHook): """Fp16 optimizer Hook (using PyTorch's implementation) implements multi-iters gradient cumulating. If you are using PyTorch >= 1.6, torch.cuda.amp is used as the backend, to take care of the optimization procedure. """ def __init__(self, *args, **kwargs): super(GradientCumulativeFp16OptimizerHook, self).__init__(*args, **kwargs) def after_train_iter(self, runner): if not self.initialized: self._init(runner) if runner.iter < self.divisible_iters: loss_factor = self.cumulative_iters else: loss_factor = self.remainder_iters loss = runner.outputs['loss'] loss = loss / loss_factor self.loss_scaler.scale(loss).backward() if (self.every_n_iters(runner, self.cumulative_iters) or self.is_last_iter(runner)): # copy fp16 grads in the model to fp32 params in the optimizer self.loss_scaler.unscale_(runner.optimizer) if self.grad_clip is not None: grad_norm = self.clip_grads(runner.model.parameters()) if grad_norm is not None: # Add grad norm to the logger runner.log_buffer.update( {'grad_norm': float(grad_norm)}, runner.outputs['num_samples']) # backward and update scaler self.loss_scaler.step(runner.optimizer) self.loss_scaler.update(self._scale_update_param) # save state_dict of loss_scaler runner.meta.setdefault( 'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() # clear grads runner.model.zero_grad() runner.optimizer.zero_grad() else: @HOOKS.register_module() class Fp16OptimizerHook(OptimizerHook): """FP16 optimizer hook (mmcv's implementation). The steps of fp16 optimizer is as follows. 1. Scale the loss value. 2. BP in the fp16 model. 2. Copy gradients from fp16 model to fp32 weights. 3. Update fp32 weights. 4. Copy updated parameters from fp32 weights to fp16 model. Refer to https://arxiv.org/abs/1710.03740 for more details. Args: loss_scale (float | str | dict): Scale factor configuration. If loss_scale is a float, static loss scaling will be used with the specified scale. If loss_scale is a string, it must be 'dynamic', then dynamic loss scaling will be used. It can also be a dict containing arguments of LossScaler. Defaults to 512. """ def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1, loss_scale=512., distributed=True): self.grad_clip = grad_clip self.coalesce = coalesce self.bucket_size_mb = bucket_size_mb self.distributed = distributed if loss_scale == 'dynamic': self.loss_scaler = LossScaler(mode='dynamic') elif isinstance(loss_scale, float): self.loss_scaler = LossScaler( init_scale=loss_scale, mode='static') elif isinstance(loss_scale, dict): self.loss_scaler = LossScaler(**loss_scale) else: raise ValueError('loss_scale must be of type float, dict, or ' f'"dynamic", got {loss_scale}') def before_run(self, runner): """Preparing steps before Mixed Precision Training. 1. Make a master copy of fp32 weights for optimization. 2. Convert the main model from fp32 to fp16. """ # keep a copy of fp32 weights old_groups = runner.optimizer.param_groups runner.optimizer.param_groups = copy.deepcopy( runner.optimizer.param_groups) state = defaultdict(dict) p_map = { old_p: p for old_p, p in zip( chain(*(g['params'] for g in old_groups)), chain(*(g['params'] for g in runner.optimizer.param_groups))) } for k, v in runner.optimizer.state.items(): state[p_map[k]] = v runner.optimizer.state = state # convert model to fp16 wrap_fp16_model(runner.model) # resume from state dict if 'fp16' in runner.meta and 'loss_scaler' in runner.meta['fp16']: scaler_state_dict = runner.meta['fp16']['loss_scaler'] self.loss_scaler.load_state_dict(scaler_state_dict) def copy_grads_to_fp32(self, fp16_net, fp32_weights): """Copy gradients from fp16 model to fp32 weight copy.""" for fp32_param, fp16_param in zip(fp32_weights, fp16_net.parameters()): if fp16_param.grad is not None: if fp32_param.grad is None: fp32_param.grad = fp32_param.data.new( fp32_param.size()) fp32_param.grad.copy_(fp16_param.grad) def copy_params_to_fp16(self, fp16_net, fp32_weights): """Copy updated params from fp32 weight copy to fp16 model.""" for fp16_param, fp32_param in zip(fp16_net.parameters(), fp32_weights): fp16_param.data.copy_(fp32_param.data) def after_train_iter(self, runner): """Backward optimization steps for Mixed Precision Training. For dynamic loss scaling, please refer `loss_scalar.py` 1. Scale the loss by a scale factor. 2. Backward the loss to obtain the gradients (fp16). 3. Copy gradients from the model to the fp32 weight copy. 4. Scale the gradients back and update the fp32 weight copy. 5. Copy back the params from fp32 weight copy to the fp16 model. 6. Save loss_scaler state_dict for resume purpose. """ # clear grads of last iteration runner.model.zero_grad() runner.optimizer.zero_grad() # scale the loss value scaled_loss = runner.outputs['loss'] * self.loss_scaler.loss_scale scaled_loss.backward() # copy fp16 grads in the model to fp32 params in the optimizer fp32_weights = [] for param_group in runner.optimizer.param_groups: fp32_weights += param_group['params'] self.copy_grads_to_fp32(runner.model, fp32_weights) # allreduce grads if self.distributed: allreduce_grads(fp32_weights, self.coalesce, self.bucket_size_mb) has_overflow = self.loss_scaler.has_overflow(fp32_weights) # if has overflow, skip this iteration if not has_overflow: # scale the gradients back for param in fp32_weights: if param.grad is not None: param.grad.div_(self.loss_scaler.loss_scale) if self.grad_clip is not None: grad_norm = self.clip_grads(fp32_weights) if grad_norm is not None: # Add grad norm to the logger runner.log_buffer.update( {'grad_norm': float(grad_norm)}, runner.outputs['num_samples']) # update fp32 params runner.optimizer.step() # copy fp32 params to the fp16 model self.copy_params_to_fp16(runner.model, fp32_weights) self.loss_scaler.update_scale(has_overflow) if has_overflow: runner.logger.warning('Check overflow, downscale loss scale ' f'to {self.loss_scaler.cur_scale}') # save state_dict of loss_scaler runner.meta.setdefault( 'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() @HOOKS.register_module() class GradientCumulativeFp16OptimizerHook(GradientCumulativeOptimizerHook, Fp16OptimizerHook): """Fp16 optimizer Hook (using mmcv implementation) implements multi- iters gradient cumulating.""" def __init__(self, *args, **kwargs): super(GradientCumulativeFp16OptimizerHook, self).__init__(*args, **kwargs) def after_train_iter(self, runner): if not self.initialized: self._init(runner) if runner.iter < self.divisible_iters: loss_factor = self.cumulative_iters else: loss_factor = self.remainder_iters loss = runner.outputs['loss'] loss = loss / loss_factor # scale the loss value scaled_loss = loss * self.loss_scaler.loss_scale scaled_loss.backward() if (self.every_n_iters(runner, self.cumulative_iters) or self.is_last_iter(runner)): # copy fp16 grads in the model to fp32 params in the optimizer fp32_weights = [] for param_group in runner.optimizer.param_groups: fp32_weights += param_group['params'] self.copy_grads_to_fp32(runner.model, fp32_weights) # allreduce grads if self.distributed: allreduce_grads(fp32_weights, self.coalesce, self.bucket_size_mb) has_overflow = self.loss_scaler.has_overflow(fp32_weights) # if has overflow, skip this iteration if not has_overflow: # scale the gradients back for param in fp32_weights: if param.grad is not None: param.grad.div_(self.loss_scaler.loss_scale) if self.grad_clip is not None: grad_norm = self.clip_grads(fp32_weights) if grad_norm is not None: # Add grad norm to the logger runner.log_buffer.update( {'grad_norm': float(grad_norm)}, runner.outputs['num_samples']) # update fp32 params runner.optimizer.step() # copy fp32 params to the fp16 model self.copy_params_to_fp16(runner.model, fp32_weights) else: runner.logger.warning( 'Check overflow, downscale loss scale ' f'to {self.loss_scaler.cur_scale}') self.loss_scaler.update_scale(has_overflow) # save state_dict of loss_scaler runner.meta.setdefault( 'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() # clear grads runner.model.zero_grad() runner.optimizer.zero_grad() ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/profiler.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import warnings from typing import Callable, List, Optional, Union import torch from ..dist_utils import master_only from .hook import HOOKS, Hook @HOOKS.register_module() class ProfilerHook(Hook): """Profiler to analyze performance during training. PyTorch Profiler is a tool that allows the collection of the performance metrics during the training. More details on Profiler can be found at https://pytorch.org/docs/1.8.1/profiler.html#torch.profiler.profile Args: by_epoch (bool): Profile performance by epoch or by iteration. Default: True. profile_iters (int): Number of iterations for profiling. If ``by_epoch=True``, profile_iters indicates that they are the first profile_iters epochs at the beginning of the training, otherwise it indicates the first profile_iters iterations. Default: 1. activities (list[str]): List of activity groups (CPU, CUDA) to use in profiling. Default: ['cpu', 'cuda']. schedule (dict, optional): Config of generating the callable schedule. if schedule is None, profiler will not add step markers into the trace and table view. Default: None. on_trace_ready (callable, dict): Either a handler or a dict of generate handler. Default: None. record_shapes (bool): Save information about operator's input shapes. Default: False. profile_memory (bool): Track tensor memory allocation/deallocation. Default: False. with_stack (bool): Record source information (file and line number) for the ops. Default: False. with_flops (bool): Use formula to estimate the FLOPS of specific operators (matrix multiplication and 2D convolution). Default: False. json_trace_path (str, optional): Exports the collected trace in Chrome JSON format. Default: None. Example: >>> runner = ... # instantiate a Runner >>> # tensorboard trace >>> trace_config = dict(type='tb_trace', dir_name='work_dir') >>> profiler_config = dict(on_trace_ready=trace_config) >>> runner.register_profiler_hook(profiler_config) >>> runner.run(data_loaders=[trainloader], workflow=[('train', 1)]) """ def __init__(self, by_epoch: bool = True, profile_iters: int = 1, activities: List[str] = ['cpu', 'cuda'], schedule: Optional[dict] = None, on_trace_ready: Optional[Union[Callable, dict]] = None, record_shapes: bool = False, profile_memory: bool = False, with_stack: bool = False, with_flops: bool = False, json_trace_path: Optional[str] = None) -> None: try: from torch import profiler # torch version >= 1.8.1 except ImportError: raise ImportError('profiler is the new feature of torch1.8.1, ' f'but your version is {torch.__version__}') assert isinstance(by_epoch, bool), '``by_epoch`` should be a boolean.' self.by_epoch = by_epoch if profile_iters < 1: raise ValueError('profile_iters should be greater than 0, but got ' f'{profile_iters}') self.profile_iters = profile_iters if not isinstance(activities, list): raise ValueError( f'activities should be list, but got {type(activities)}') self.activities = [] for activity in activities: activity = activity.lower() if activity == 'cpu': self.activities.append(profiler.ProfilerActivity.CPU) elif activity == 'cuda': self.activities.append(profiler.ProfilerActivity.CUDA) else: raise ValueError( f'activity should be "cpu" or "cuda", but got {activity}') if schedule is not None: self.schedule = profiler.schedule(**schedule) else: self.schedule = None self.on_trace_ready = on_trace_ready self.record_shapes = record_shapes self.profile_memory = profile_memory self.with_stack = with_stack self.with_flops = with_flops self.json_trace_path = json_trace_path @master_only def before_run(self, runner): if self.by_epoch and runner.max_epochs < self.profile_iters: raise ValueError('self.profile_iters should not be greater than ' f'{runner.max_epochs}') if not self.by_epoch and runner.max_iters < self.profile_iters: raise ValueError('self.profile_iters should not be greater than ' f'{runner.max_iters}') if callable(self.on_trace_ready): # handler _on_trace_ready = self.on_trace_ready elif isinstance(self.on_trace_ready, dict): # config of handler trace_cfg = self.on_trace_ready.copy() trace_type = trace_cfg.pop('type') # log_trace handler if trace_type == 'log_trace': def _log_handler(prof): print(prof.key_averages().table(**trace_cfg)) _on_trace_ready = _log_handler elif trace_type == 'tb_trace': # tensorboard_trace handler try: import torch_tb_profiler # noqa: F401 except ImportError: raise ImportError('please run "pip install ' 'torch-tb-profiler" to install ' 'torch_tb_profiler') _on_trace_ready = torch.profiler.tensorboard_trace_handler( **trace_cfg) else: raise ValueError('trace_type should be "log_trace" or ' f'"tb_trace", but got {trace_type}') elif self.on_trace_ready is None: _on_trace_ready = None # type: ignore else: raise ValueError('on_trace_ready should be handler, dict or None, ' f'but got {type(self.on_trace_ready)}') if runner.max_epochs > 1: warnings.warn(f'profiler will profile {runner.max_epochs} epochs ' 'instead of 1 epoch. Since profiler will slow down ' 'the training, it is recommended to train 1 epoch ' 'with ProfilerHook and adjust your setting according' ' to the profiler summary. During normal training ' '(epoch > 1), you may disable the ProfilerHook.') self.profiler = torch.profiler.profile( activities=self.activities, schedule=self.schedule, on_trace_ready=_on_trace_ready, record_shapes=self.record_shapes, profile_memory=self.profile_memory, with_stack=self.with_stack, with_flops=self.with_flops) self.profiler.__enter__() runner.logger.info('profiler is profiling...') @master_only def after_train_epoch(self, runner): if self.by_epoch and runner.epoch == self.profile_iters - 1: runner.logger.info('profiler may take a few minutes...') self.profiler.__exit__(None, None, None) if self.json_trace_path is not None: self.profiler.export_chrome_trace(self.json_trace_path) @master_only def after_train_iter(self, runner): self.profiler.step() if not self.by_epoch and runner.iter == self.profile_iters - 1: runner.logger.info('profiler may take a few minutes...') self.profiler.__exit__(None, None, None) if self.json_trace_path is not None: self.profiler.export_chrome_trace(self.json_trace_path) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/sampler_seed.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from .hook import HOOKS, Hook @HOOKS.register_module() class DistSamplerSeedHook(Hook): """Data-loading sampler for distributed training. When distributed training, it is only useful in conjunction with :obj:`EpochBasedRunner`, while :obj:`IterBasedRunner` achieves the same purpose with :obj:`IterLoader`. """ def before_epoch(self, runner): if hasattr(runner.data_loader.sampler, 'set_epoch'): # in case the data loader uses `SequentialSampler` in Pytorch runner.data_loader.sampler.set_epoch(runner.epoch) elif hasattr(runner.data_loader.batch_sampler.sampler, 'set_epoch'): # batch sampler in pytorch warps the sampler as its attributes. runner.data_loader.batch_sampler.sampler.set_epoch(runner.epoch) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/sync_buffer.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from ..dist_utils import allreduce_params from .hook import HOOKS, Hook @HOOKS.register_module() class SyncBuffersHook(Hook): """Synchronize model buffers such as running_mean and running_var in BN at the end of each epoch. Args: distributed (bool): Whether distributed training is used. It is effective only for distributed training. Defaults to True. """ def __init__(self, distributed=True): self.distributed = distributed def after_epoch(self, runner): """All-reduce model buffers at the end of each epoch.""" if self.distributed: allreduce_params(runner.model.buffers()) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/iter_based_runner.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import os.path as osp import platform import shutil import time import warnings import torch from torch.optim import Optimizer import mmcv from .base_runner import BaseRunner from .builder import RUNNERS from .checkpoint import save_checkpoint from .hooks import IterTimerHook from .utils import get_host_info class IterLoader: def __init__(self, dataloader): self._dataloader = dataloader self.iter_loader = iter(self._dataloader) self._epoch = 0 @property def epoch(self): return self._epoch def __next__(self): try: data = next(self.iter_loader) except StopIteration: self._epoch += 1 if hasattr(self._dataloader.sampler, 'set_epoch'): self._dataloader.sampler.set_epoch(self._epoch) time.sleep(2) # Prevent possible deadlock during epoch transition self.iter_loader = iter(self._dataloader) data = next(self.iter_loader) return data def __len__(self): return len(self._dataloader) @RUNNERS.register_module() class IterBasedRunner(BaseRunner): """Iteration-based Runner. This runner train models iteration by iteration. """ def train(self, data_loader, **kwargs): self.model.train() self.mode = 'train' self.data_loader = data_loader self._epoch = data_loader.epoch data_batch = next(data_loader) self.call_hook('before_train_iter') outputs = self.model.train_step(data_batch, self.optimizer, **kwargs) if not isinstance(outputs, dict): raise TypeError('model.train_step() must return a dict') if 'log_vars' in outputs: self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) self.outputs = outputs self.call_hook('after_train_iter') self._inner_iter += 1 self._iter += 1 @torch.no_grad() def val(self, data_loader, **kwargs): self.model.eval() self.mode = 'val' self.data_loader = data_loader data_batch = next(data_loader) self.call_hook('before_val_iter') outputs = self.model.val_step(data_batch, **kwargs) if not isinstance(outputs, dict): raise TypeError('model.val_step() must return a dict') if 'log_vars' in outputs: self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) self.outputs = outputs self.call_hook('after_val_iter') self._inner_iter += 1 def run(self, data_loaders, workflow, max_iters=None, **kwargs): """Start running. Args: data_loaders (list[:obj:`DataLoader`]): Dataloaders for training and validation. workflow (list[tuple]): A list of (phase, iters) to specify the running order and iterations. E.g, [('train', 10000), ('val', 1000)] means running 10000 iterations for training and 1000 iterations for validation, iteratively. """ assert isinstance(data_loaders, list) assert mmcv.is_list_of(workflow, tuple) assert len(data_loaders) == len(workflow) if max_iters is not None: warnings.warn( 'setting max_iters in run is deprecated, ' 'please set max_iters in runner_config', DeprecationWarning) self._max_iters = max_iters assert self._max_iters is not None, ( 'max_iters must be specified during instantiation') work_dir = self.work_dir if self.work_dir is not None else 'NONE' self.logger.info('Start running, host: %s, work_dir: %s', get_host_info(), work_dir) self.logger.info('Hooks will be executed in the following order:\n%s', self.get_hook_info()) self.logger.info('workflow: %s, max: %d iters', workflow, self._max_iters) self.call_hook('before_run') iter_loaders = [IterLoader(x) for x in data_loaders] self.call_hook('before_epoch') while self.iter < self._max_iters: for i, flow in enumerate(workflow): self._inner_iter = 0 mode, iters = flow if not isinstance(mode, str) or not hasattr(self, mode): raise ValueError( 'runner has no method named "{}" to run a workflow'. format(mode)) iter_runner = getattr(self, mode) for _ in range(iters): if mode == 'train' and self.iter >= self._max_iters: break iter_runner(iter_loaders[i], **kwargs) #> time.sleep(1) # wait for some hooks like loggers to finish self.call_hook('after_epoch') self.call_hook('after_run') def resume(self, checkpoint, resume_optimizer=True, map_location='default'): """Resume model from checkpoint. Args: checkpoint (str): Checkpoint to resume from. resume_optimizer (bool, optional): Whether resume the optimizer(s) if the checkpoint file includes optimizer(s). Default to True. map_location (str, optional): Same as :func:`torch.load`. Default to 'default'. """ if map_location == 'default': device_id = torch.cuda.current_device() checkpoint = self.load_checkpoint( checkpoint, map_location=lambda storage, loc: storage.cuda(device_id)) else: checkpoint = self.load_checkpoint( checkpoint, map_location=map_location) self._epoch = checkpoint['meta']['epoch'] self._iter = checkpoint['meta']['iter'] self._inner_iter = checkpoint['meta']['iter'] if 'optimizer' in checkpoint and resume_optimizer: if isinstance(self.optimizer, Optimizer): self.optimizer.load_state_dict(checkpoint['optimizer']) elif isinstance(self.optimizer, dict): for k in self.optimizer.keys(): self.optimizer[k].load_state_dict( checkpoint['optimizer'][k]) else: raise TypeError( 'Optimizer should be dict or torch.optim.Optimizer ' f'but got {type(self.optimizer)}') self.logger.info(f'resumed from epoch: {self.epoch}, iter {self.iter}') def save_checkpoint(self, out_dir, filename_tmpl='iter_{}.pth', meta=None, save_optimizer=True, create_symlink=True): """Save checkpoint to file. Args: out_dir (str): Directory to save checkpoint files. filename_tmpl (str, optional): Checkpoint file template. Defaults to 'iter_{}.pth'. meta (dict, optional): Metadata to be saved in checkpoint. Defaults to None. save_optimizer (bool, optional): Whether save optimizer. Defaults to True. create_symlink (bool, optional): Whether create symlink to the latest checkpoint file. Defaults to True. """ if meta is None: meta = {} elif not isinstance(meta, dict): raise TypeError( f'meta should be a dict or None, but got {type(meta)}') if self.meta is not None: meta.update(self.meta) # Note: meta.update(self.meta) should be done before # meta.update(epoch=self.epoch + 1, iter=self.iter) otherwise # there will be problems with resumed checkpoints. # More details in https://github.com/open-mmlab/mmcv/pull/1108 meta.update(epoch=self.epoch + 1, iter=self.iter) filename = filename_tmpl.format(self.iter + 1) filepath = osp.join(out_dir, filename) optimizer = self.optimizer if save_optimizer else None save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) # in some environments, `os.symlink` is not supported, you may need to # set `create_symlink` to False if create_symlink: dst_file = osp.join(out_dir, 'latest.pth') if platform.system() != 'Windows': mmcv.symlink(filename, dst_file) else: shutil.copy(filepath, dst_file) def register_training_hooks(self, lr_config, optimizer_config=None, checkpoint_config=None, log_config=None, momentum_config=None, custom_hooks_config=None): """Register default hooks for iter-based training. Checkpoint hook, optimizer stepper hook and logger hooks will be set to `by_epoch=False` by default. Default hooks include: +----------------------+-------------------------+ | Hooks | Priority | +======================+=========================+ | LrUpdaterHook | VERY_HIGH (10) | +----------------------+-------------------------+ | MomentumUpdaterHook | HIGH (30) | +----------------------+-------------------------+ | OptimizerStepperHook | ABOVE_NORMAL (40) | +----------------------+-------------------------+ | CheckpointSaverHook | NORMAL (50) | +----------------------+-------------------------+ | IterTimerHook | LOW (70) | +----------------------+-------------------------+ | LoggerHook(s) | VERY_LOW (90) | +----------------------+-------------------------+ | CustomHook(s) | defaults to NORMAL (50) | +----------------------+-------------------------+ If custom hooks have same priority with default hooks, custom hooks will be triggered after default hooks. """ if checkpoint_config is not None: checkpoint_config.setdefault('by_epoch', False) if lr_config is not None: lr_config.setdefault('by_epoch', False) if log_config is not None: for info in log_config['hooks']: info.setdefault('by_epoch', False) super(IterBasedRunner, self).register_training_hooks( lr_config=lr_config, momentum_config=momentum_config, optimizer_config=optimizer_config, checkpoint_config=checkpoint_config, log_config=log_config, timer_config=IterTimerHook(), custom_hooks_config=custom_hooks_config) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/log_buffer.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from collections import OrderedDict import torch import numpy as np class LogBuffer: def __init__(self): self.val_history = OrderedDict() self.n_history = OrderedDict() self.output = OrderedDict() self.ready = False def clear(self): self.val_history.clear() self.n_history.clear() self.clear_output() def clear_output(self): self.output.clear() self.ready = False # def update(self, vars, count=1): # assert isinstance(vars, dict) # for key, var in vars.items(): # if key not in self.val_history: # self.val_history[key] = [] # self.n_history[key] = [] # self.val_history[key].append(var) # self.n_history[key].append(count) # {k:v}打印,对每个k都有val、avg、max、deque属性 def update(self, vars, count=1): # dist.barrier() for k, v in vars.items(): if k not in self.val_history: self.val_history[k] = [] self.n_history[k] = [] if isinstance(v, torch.Tensor): v = torch.mean(v) if hasattr(v, 'item'): v = v.item() assert isinstance(v, (float, int, str)), print(f"{k} type: {type(v)}") self.val_history[k].append(v) self.n_history[k].append(count) def average(self, n=0): """Average latest n values or all values.""" assert n >= 0 for key in self.val_history: values = np.array(self.val_history[key][-n:]) nums = np.array(self.n_history[key][-n:]) avg = np.sum(values * nums) / np.sum(nums) self.output[key] = avg self.ready = True ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/misc.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import glob import os.path as osp import warnings def find_latest_checkpoint(path, suffix='pth'): """Find the latest checkpoint from the working directory. Args: path(str): The path to find checkpoints. suffix(str): File extension. Defaults to pth. Returns: latest_path(str | None): File path of the latest checkpoint. References: .. [1] https://github.com/microsoft/SoftTeacher /blob/main/ssod/utils/patch.py """ if not osp.exists(path): warnings.warn('The path of checkpoints does not exist.') return None if osp.exists(osp.join(path, f'latest.{suffix}')): return osp.join(path, f'latest.{suffix}') checkpoints = glob.glob(osp.join(path, f'*.{suffix}')) if len(checkpoints) == 0: warnings.warn('There are no checkpoints in the path.') return None latest = -1 latest_path = None for checkpoint in checkpoints: count = int(osp.basename(checkpoint).split('_')[-1].split('.')[0]) if count > latest: latest = count latest_path = checkpoint return latest_path ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/optimizer/__init__.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from .builder import (OPTIMIZER_BUILDERS, OPTIMIZERS, build_optimizer, build_optimizer_constructor) from .default_constructor import DefaultOptimizerConstructor __all__ = [ 'OPTIMIZER_BUILDERS', 'OPTIMIZERS', 'DefaultOptimizerConstructor', 'build_optimizer', 'build_optimizer_constructor' ] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/optimizer/builder.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import copy import inspect import torch from ...utils import Registry, build_from_cfg OPTIMIZERS = Registry('optimizer') OPTIMIZER_BUILDERS = Registry('optimizer builder') def register_torch_optimizers(): torch_optimizers = [] for module_name in dir(torch.optim): if module_name.startswith('__'): continue _optim = getattr(torch.optim, module_name) if inspect.isclass(_optim) and issubclass(_optim, torch.optim.Optimizer): OPTIMIZERS.register_module()(_optim) torch_optimizers.append(module_name) return torch_optimizers TORCH_OPTIMIZERS = register_torch_optimizers() def build_optimizer_constructor(cfg): return build_from_cfg(cfg, OPTIMIZER_BUILDERS) def build_optimizer(model, cfg): optimizer_cfg = copy.deepcopy(cfg) constructor_type = optimizer_cfg.pop('constructor', 'DefaultOptimizerConstructor') paramwise_cfg = optimizer_cfg.pop('paramwise_cfg', None) optim_constructor = build_optimizer_constructor( dict( type=constructor_type, optimizer_cfg=optimizer_cfg, paramwise_cfg=paramwise_cfg)) optimizer = optim_constructor(model) return optimizer ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/optimizer/default_constructor.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import warnings import torch from torch.nn import GroupNorm, LayerNorm from mmcv.utils import _BatchNorm, _InstanceNorm, build_from_cfg, is_list_of from mmcv.utils.ext_loader import check_ops_exist from .builder import OPTIMIZER_BUILDERS, OPTIMIZERS @OPTIMIZER_BUILDERS.register_module() class DefaultOptimizerConstructor: """Default constructor for optimizers. By default each parameter share the same optimizer settings, and we provide an argument ``paramwise_cfg`` to specify parameter-wise settings. It is a dict and may contain the following fields: - ``custom_keys`` (dict): Specified parameters-wise settings by keys. If one of the keys in ``custom_keys`` is a substring of the name of one parameter, then the setting of the parameter will be specified by ``custom_keys[key]`` and other setting like ``bias_lr_mult`` etc. will be ignored. It should be noted that the aforementioned ``key`` is the longest key that is a substring of the name of the parameter. If there are multiple matched keys with the same length, then the key with lower alphabet order will be chosen. ``custom_keys[key]`` should be a dict and may contain fields ``lr_mult`` and ``decay_mult``. See Example 2 below. - ``bias_lr_mult`` (float): It will be multiplied to the learning rate for all bias parameters (except for those in normalization layers and offset layers of DCN). - ``bias_decay_mult`` (float): It will be multiplied to the weight decay for all bias parameters (except for those in normalization layers, depthwise conv layers, offset layers of DCN). - ``norm_decay_mult`` (float): It will be multiplied to the weight decay for all weight and bias parameters of normalization layers. - ``dwconv_decay_mult`` (float): It will be multiplied to the weight decay for all weight and bias parameters of depthwise conv layers. - ``dcn_offset_lr_mult`` (float): It will be multiplied to the learning rate for parameters of offset layer in the deformable convs of a model. - ``bypass_duplicate`` (bool): If true, the duplicate parameters would not be added into optimizer. Default: False. Note: 1. If the option ``dcn_offset_lr_mult`` is used, the constructor will override the effect of ``bias_lr_mult`` in the bias of offset layer. So be careful when using both ``bias_lr_mult`` and ``dcn_offset_lr_mult``. If you wish to apply both of them to the offset layer in deformable convs, set ``dcn_offset_lr_mult`` to the original ``dcn_offset_lr_mult`` * ``bias_lr_mult``. 2. If the option ``dcn_offset_lr_mult`` is used, the constructor will apply it to all the DCN layers in the model. So be careful when the model contains multiple DCN layers in places other than backbone. Args: model (:obj:`nn.Module`): The model with parameters to be optimized. optimizer_cfg (dict): The config dict of the optimizer. Positional fields are - `type`: class name of the optimizer. Optional fields are - any arguments of the corresponding optimizer type, e.g., lr, weight_decay, momentum, etc. paramwise_cfg (dict, optional): Parameter-wise options. Example 1: >>> model = torch.nn.modules.Conv1d(1, 1, 1) >>> optimizer_cfg = dict(type='SGD', lr=0.01, momentum=0.9, >>> weight_decay=0.0001) >>> paramwise_cfg = dict(norm_decay_mult=0.) >>> optim_builder = DefaultOptimizerConstructor( >>> optimizer_cfg, paramwise_cfg) >>> optimizer = optim_builder(model) Example 2: >>> # assume model have attribute model.backbone and model.cls_head >>> optimizer_cfg = dict(type='SGD', lr=0.01, weight_decay=0.95) >>> paramwise_cfg = dict(custom_keys={ '.backbone': dict(lr_mult=0.1, decay_mult=0.9)}) >>> optim_builder = DefaultOptimizerConstructor( >>> optimizer_cfg, paramwise_cfg) >>> optimizer = optim_builder(model) >>> # Then the `lr` and `weight_decay` for model.backbone is >>> # (0.01 * 0.1, 0.95 * 0.9). `lr` and `weight_decay` for >>> # model.cls_head is (0.01, 0.95). """ def __init__(self, optimizer_cfg, paramwise_cfg=None): if not isinstance(optimizer_cfg, dict): raise TypeError('optimizer_cfg should be a dict', f'but got {type(optimizer_cfg)}') self.optimizer_cfg = optimizer_cfg self.paramwise_cfg = {} if paramwise_cfg is None else paramwise_cfg self.base_lr = optimizer_cfg.get('lr', None) self.base_wd = optimizer_cfg.get('weight_decay', None) self._validate_cfg() def _validate_cfg(self): if not isinstance(self.paramwise_cfg, dict): raise TypeError('paramwise_cfg should be None or a dict, ' f'but got {type(self.paramwise_cfg)}') if 'custom_keys' in self.paramwise_cfg: if not isinstance(self.paramwise_cfg['custom_keys'], dict): raise TypeError( 'If specified, custom_keys must be a dict, ' f'but got {type(self.paramwise_cfg["custom_keys"])}') if self.base_wd is None: for key in self.paramwise_cfg['custom_keys']: if 'decay_mult' in self.paramwise_cfg['custom_keys'][key]: raise ValueError('base_wd should not be None') # get base lr and weight decay # weight_decay must be explicitly specified if mult is specified if ('bias_decay_mult' in self.paramwise_cfg or 'norm_decay_mult' in self.paramwise_cfg or 'dwconv_decay_mult' in self.paramwise_cfg): if self.base_wd is None: raise ValueError('base_wd should not be None') def _is_in(self, param_group, param_group_list): assert is_list_of(param_group_list, dict) param = set(param_group['params']) param_set = set() for group in param_group_list: param_set.update(set(group['params'])) return not param.isdisjoint(param_set) def add_params(self, params, module, prefix='', is_dcn_module=None): """Add all parameters of module to the params list. The parameters of the given module will be added to the list of param groups, with specific rules defined by paramwise_cfg. Args: params (list[dict]): A list of param groups, it will be modified in place. module (nn.Module): The module to be added. prefix (str): The prefix of the module is_dcn_module (int|float|None): If the current module is a submodule of DCN, `is_dcn_module` will be passed to control conv_offset layer's learning rate. Defaults to None. """ # get param-wise options custom_keys = self.paramwise_cfg.get('custom_keys', {}) # first sort with alphabet order and then sort with reversed len of str sorted_keys = sorted(sorted(custom_keys.keys()), key=len, reverse=True) bias_lr_mult = self.paramwise_cfg.get('bias_lr_mult', 1.) bias_decay_mult = self.paramwise_cfg.get('bias_decay_mult', 1.) norm_decay_mult = self.paramwise_cfg.get('norm_decay_mult', 1.) dwconv_decay_mult = self.paramwise_cfg.get('dwconv_decay_mult', 1.) bypass_duplicate = self.paramwise_cfg.get('bypass_duplicate', False) dcn_offset_lr_mult = self.paramwise_cfg.get('dcn_offset_lr_mult', 1.) # special rules for norm layers and depth-wise conv layers is_norm = isinstance(module, (_BatchNorm, _InstanceNorm, GroupNorm, LayerNorm)) is_dwconv = ( isinstance(module, torch.nn.Conv2d) and module.in_channels == module.groups) for name, param in module.named_parameters(recurse=False): param_group = {'params': [param]} if not param.requires_grad: params.append(param_group) continue if bypass_duplicate and self._is_in(param_group, params): warnings.warn(f'{prefix} is duplicate. It is skipped since ' f'bypass_duplicate={bypass_duplicate}') continue # if the parameter match one of the custom keys, ignore other rules is_custom = False for key in sorted_keys: if key in f'{prefix}.{name}': is_custom = True lr_mult = custom_keys[key].get('lr_mult', 1.) param_group['lr'] = self.base_lr * lr_mult if self.base_wd is not None: decay_mult = custom_keys[key].get('decay_mult', 1.) param_group['weight_decay'] = self.base_wd * decay_mult break if not is_custom: # bias_lr_mult affects all bias parameters # except for norm.bias dcn.conv_offset.bias if name == 'bias' and not (is_norm or is_dcn_module): param_group['lr'] = self.base_lr * bias_lr_mult if (prefix.find('conv_offset') != -1 and is_dcn_module and isinstance(module, torch.nn.Conv2d)): # deal with both dcn_offset's bias & weight param_group['lr'] = self.base_lr * dcn_offset_lr_mult # apply weight decay policies if self.base_wd is not None: # norm decay if is_norm: param_group[ 'weight_decay'] = self.base_wd * norm_decay_mult # depth-wise conv elif is_dwconv: param_group[ 'weight_decay'] = self.base_wd * dwconv_decay_mult # bias lr and decay elif name == 'bias' and not is_dcn_module: # TODO: current bias_decay_mult will have affect on DCN param_group[ 'weight_decay'] = self.base_wd * bias_decay_mult params.append(param_group) if check_ops_exist(): from mmcv.ops import DeformConv2d, ModulatedDeformConv2d is_dcn_module = isinstance(module, (DeformConv2d, ModulatedDeformConv2d)) else: is_dcn_module = False for child_name, child_mod in module.named_children(): child_prefix = f'{prefix}.{child_name}' if prefix else child_name self.add_params( params, child_mod, prefix=child_prefix, is_dcn_module=is_dcn_module) def __call__(self, model): if hasattr(model, 'module'): model = model.module optimizer_cfg = self.optimizer_cfg.copy() # if no paramwise option is specified, just use the global setting if not self.paramwise_cfg: optimizer_cfg['params'] = model.parameters() return build_from_cfg(optimizer_cfg, OPTIMIZERS) # set param-wise lr and weight decay recursively params = [] self.add_params(params, model) optimizer_cfg['params'] = params return build_from_cfg(optimizer_cfg, OPTIMIZERS) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/priority.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from enum import Enum class Priority(Enum): """Hook priority levels. +--------------+------------+ | Level | Value | +==============+============+ | HIGHEST | 0 | +--------------+------------+ | VERY_HIGH | 10 | +--------------+------------+ | HIGH | 30 | +--------------+------------+ | ABOVE_NORMAL | 40 | +--------------+------------+ | NORMAL | 50 | +--------------+------------+ | BELOW_NORMAL | 60 | +--------------+------------+ | LOW | 70 | +--------------+------------+ | VERY_LOW | 90 | +--------------+------------+ | LOWEST | 100 | +--------------+------------+ """ HIGHEST = 0 VERY_HIGH = 10 HIGH = 30 ABOVE_NORMAL = 40 NORMAL = 50 BELOW_NORMAL = 60 LOW = 70 VERY_LOW = 90 LOWEST = 100 def get_priority(priority): """Get priority value. Args: priority (int or str or :obj:`Priority`): Priority. Returns: int: The priority value. """ if isinstance(priority, int): if priority < 0 or priority > 100: raise ValueError('priority must be between 0 and 100') return priority elif isinstance(priority, Priority): return priority.value elif isinstance(priority, str): return Priority[priority.upper()].value else: raise TypeError('priority must be an integer or Priority enum value') ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/record.py ================================================ import os import datetime import torch import psutil from collections import defaultdict, deque import time # from UDL.AutoDL.logger import log_string # from logging import info as log_string # from .logger import get_root_logger import numpy as np import random import torch.backends.cudnn as cudnn import torch.distributed as dist from functools import partial from mmcv import print_log as log_string def get_grad_norm(parameters, norm_type=2): if isinstance(parameters, torch.Tensor): parameters = [parameters] parameters = list(filter(lambda p: p.requires_grad and p.grad is not None, parameters)) norm_type = float(norm_type) total_norm = 0 for p in parameters: param_norm = p.grad.data.norm(norm_type) total_norm += param_norm.item() ** norm_type total_norm = total_norm ** (1. / norm_type) return parameters, total_norm def set_random_seed(seed): np.random.seed(seed) random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) cudnn.deterministic = True def show_memory_info(hint): pid = os.getpid() p = psutil.Process(pid) info = p.memory_full_info() memory = info.uss / 1024. / 1024 print('{} memory used: {} MB'.format(hint, memory)) # class OrderedAverageMeter(object): # def __init__(self): class AverageMeter(object): """Computes and stores the average and current value""" def __init__(self, name=None, fmt=":f"): # self.name = name # self.fmt = fmt self.reset() def reset(self): self.val = 0 self.avg = 0 self.sum = 0 self.count = 0 def update(self, val, n=1): self.val = val self.sum += val * n self.count += n self.avg = self.sum / self.count # def __str__(self): # fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' # return fmtstr.format(**self.__dict__) class ProgressMeter(object): def __init__(self, num_batches, meters, prefix=""): self.batch_fmtstr = self._get_batch_fmtstr(num_batches) self.meters = meters self.prefix = prefix def display(self, batch): entries = [self.prefix + self.batch_fmtstr.format(batch)] entries += [str(meter) for meter in self.meters] print('\t'.join(entries)) def _get_batch_fmtstr(self, num_batches): num_digits = len(str(num_batches // 1)) fmt = '{:' + str(num_digits) + 'd}' return '[' + fmt + '/' + fmt.format(num_batches) + ']' def accuracy(output, target, topk=(1,)): """Computes the precision@k for the specified values of k""" with torch.no_grad(): maxk = max(topk) batch_size = target.size(0) _, pred = output.topk(maxk, 1, True, True) pred = pred.t() correct = pred.eq(target.view(1, -1).expand_as(pred)) res = [] for k in topk: correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) res.append(correct_k.mul_(100.0 / batch_size)) return res # class logger(): # def __init__(self, obj, LOG_DIR, parser): # logname = 'log_train' + datetime.datetime.now().strftime('%Y_%m_%d-%H_%M_%S')+'.txt' # self.LOG_FOUT = open(os.path.join(LOG_DIR, logname), 'w') # self.LOG_FOUT.write(str(parser)+'\n') # def __call__(self, out_str): # self.LOG_FOUT.write(out_str+'\n') # self.LOG_FOUT.flush() # print(out_str) def is_dist_avail_and_initialized(): if not dist.is_available(): return False if not dist.is_initialized(): return False return True class SmoothedValue(object): """Track a series of values and provide access to smoothed values over a window or the global series average. """ def __init__(self, window_size=20, fmt=None, eval=False): if fmt is None: if not eval: fmt = "{value:.7f} (avg:{avg:.7f})" else: fmt = "{value:.7f} (avg:{avg:.7f}, std:{std:.7f})" self.reset(window_size) self.fmt = fmt def reset(self, window_size): self.deque = deque(maxlen=window_size) self.val = 0 self.avg = 0 self.total = 0 self.count = 0 def update(self, value, n=1): self.deque.append(value) self.val = value self.count += n self.total += value * n self.avg = self.total / self.count def synchronize_between_processes(self): """ Warning: does not synchronize the deque! """ if not is_dist_avail_and_initialized(): return t = torch.tensor([self.val, self.count, self.total], dtype=torch.float64, device='cuda') dist.barrier() dist.all_reduce(t) t = t.tolist() self.val = t[0] self.count = int(t[1]) self.total = t[2] self.avg = self.total / self.count @property def median(self): d = torch.tensor(list(self.deque)) return d.median().item() @property def std(self): return torch.tensor(list(self.deque)).std().item() # @property # def avg(self): # d = torch.tensor(list(self.deque), dtype=torch.float32) # return d.mean().item() # @property # def global_avg(self): # return self.total / self.count @property def max(self): return max(self.deque) # # @property # def value(self): # return self.deque[-1] def __str__(self): # return self.fmt.format( # median=self.median, # avg=self.avg, # global_avg=self.global_avg, # max=self.max, # value=self.value) return self.fmt.format( median=self.median, avg=self.avg, max=self.max, value=self.val, std=self.std) class MetricLogger(object): def __init__(self, logger=None, delimiter="\t", dist_print=0, window_size=20, eval=False): self.meters = defaultdict(partial(SmoothedValue, window_size=window_size, eval=eval)) self.delimiter = delimiter self.dist_print = dist_print # self.log = get_root_logger("UDL") self.logger = logger self.ready = False def clear(self): self.clear_output() def clear_output(self): self.meters.clear() self.ready = False # {k:v}打印,对每个k都有val、avg、max、deque属性 def update(self, n=1, **kwargs): # dist.barrier() for k, v in kwargs.items(): if isinstance(v, torch.Tensor): v = torch.mean(v) if hasattr(v, 'item'): v = v.item() assert isinstance(v, (float, int, str)), print("type: ", type(v)) self.meters[k].update(v, n) # {k:v}打印,对每个k都有val、avg、max、deque属性 def update_dict(self, kwargs: dict, n=1): # dist.barrier() for k, v in kwargs.items(): if isinstance(v, torch.Tensor): v = torch.mean(v) if hasattr(v, 'item'): v = v.item() assert isinstance(v, (float, int, str)), print("type: ", type(v)) self.meters[k].update(v, n) def __getattr__(self, attr): if attr in self.meters: return self.meters[attr] if attr in self.__dict__: return self.__dict__[attr] raise AttributeError("'{}' object has no attribute '{}'".format( type(self).__name__, attr)) def __str__(self): loss_str = [] for name, meter in self.meters.items(): loss_str.append( "{}: {}".format(name, str(meter)) ) return self.delimiter.join(loss_str) def synchronize_between_processes(self): for meter in self.meters.values(): meter.synchronize_between_processes() def add_meter(self, name, meter): self.meters[name] = meter def log_every(self, iterable, print_freq, header=None): i = 1 if not header: header = '' start_time = time.time() end = time.time() iter_time = SmoothedValue(fmt='{avg:.4f}') data_time = SmoothedValue(fmt='{avg:.4f}') space_fmt = ':' + str(len(str(len(iterable)))) + 'd' if torch.cuda.is_available(): log_msg = self.delimiter.join([ header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}', 'max mem: {memory:.0f}MB' ]) else: log_msg = self.delimiter.join([ header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}' ]) MB = 1024.0 * 1024.0 # log_string = self.logger.info for obj in iterable: data_time.update(time.time() - end) yield obj, i iter_time.update(time.time() - end) if i % print_freq == 0 or i == len(iterable): eta_seconds = iter_time.avg * (len(iterable) - i) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if torch.cuda.is_available(): if self.dist_print == 0: log_string(log_msg.format( i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time), memory=torch.cuda.max_memory_allocated() / MB), logger=self.logger) # self.logger.info(log_msg.format( # i, len(iterable), eta=eta_string, # meters=str(self), # time=str(iter_time), data=str(data_time), # memory=torch.cuda.max_memory_allocated() / MB)) else: log_string(log_msg.format( i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time)), logger=self.logger) i += 1 end = time.time() total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) if self.dist_print == 0: log_string('{} Total time: {} ({:.4f} s / it)'.format( header, total_time_str, total_time / len(iterable)), logger=self.logger) # self.logger.info('{} Total time: {} ({:.4f} s / it)'.format( # header, total_time_str, total_time / len(iterable))) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/utils.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import os import random import sys import time import warnings from getpass import getuser from socket import gethostname import numpy as np import torch import mmcv def get_host_info(): """Get hostname and username. Return empty string if exception raised, e.g. ``getpass.getuser()`` will lead to error in docker container """ host = '' try: host = f'{getuser()}@{gethostname()}' except Exception as e: warnings.warn(f'Host or user not found: {str(e)}') finally: return host def get_time_str(): return time.strftime('%Y%m%d_%H%M%S', time.localtime()) def obj_from_dict(info, parent=None, default_args=None): """Initialize an object from dict. The dict must contain the key "type", which indicates the object type, it can be either a string or type, such as "list" or ``list``. Remaining fields are treated as the arguments for constructing the object. Args: info (dict): Object types and arguments. parent (:class:`module`): Module which may containing expected object classes. default_args (dict, optional): Default arguments for initializing the object. Returns: any type: Object built from the dict. """ assert isinstance(info, dict) and 'type' in info assert isinstance(default_args, dict) or default_args is None args = info.copy() obj_type = args.pop('type') if mmcv.is_str(obj_type): if parent is not None: obj_type = getattr(parent, obj_type) else: obj_type = sys.modules[obj_type] elif not isinstance(obj_type, type): raise TypeError('type must be a str or valid type, but ' f'got {type(obj_type)}') if default_args is not None: for name, value in default_args.items(): args.setdefault(name, value) return obj_type(**args) def set_random_seed(seed, deterministic=False, use_rank_shift=False): """Set random seed. Args: seed (int): Seed to be used. deterministic (bool): Whether to set the deterministic option for CUDNN backend, i.e., set `torch.backends.cudnn.deterministic` to True and `torch.backends.cudnn.benchmark` to False. Default: False. rank_shift (bool): Whether to add rank number to the random seed to have different random seed in different threads. Default: False. """ if use_rank_shift: rank, _ = mmcv.runner.get_dist_info() seed += rank random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) os.environ['PYTHONHASHSEED'] = str(seed) if deterministic: torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/tensorrt/__init__.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. # flake8: noqa from .init_plugins import is_tensorrt_plugin_loaded, load_tensorrt_plugin from .preprocess import preprocess_onnx def is_tensorrt_available(): try: import tensorrt del tensorrt return True except ModuleNotFoundError: return False __all__ = [] if is_tensorrt_available(): from .tensorrt_utils import (TRTWraper, TRTWrapper, load_trt_engine, onnx2trt, save_trt_engine) # load tensorrt plugin lib load_tensorrt_plugin() __all__.append([ 'onnx2trt', 'save_trt_engine', 'load_trt_engine', 'TRTWraper', 'TRTWrapper' ]) __all__.append(['is_tensorrt_plugin_loaded', 'preprocess_onnx']) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/tensorrt/init_plugins.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import ctypes import glob import os def get_tensorrt_op_path(): """Get TensorRT plugins library path.""" wildcard = os.path.join( os.path.abspath(os.path.dirname(os.path.dirname(__file__))), '_ext_trt.*.so') paths = glob.glob(wildcard) lib_path = paths[0] if len(paths) > 0 else '' return lib_path plugin_is_loaded = False def is_tensorrt_plugin_loaded(): """Check if TensorRT plugins library is loaded or not. Returns: bool: plugin_is_loaded flag """ global plugin_is_loaded return plugin_is_loaded def load_tensorrt_plugin(): """load TensorRT plugins library.""" global plugin_is_loaded lib_path = get_tensorrt_op_path() if (not plugin_is_loaded) and os.path.exists(lib_path): ctypes.CDLL(lib_path) plugin_is_loaded = True ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/tensorrt/preprocess.py ================================================ import numpy as np import onnx def preprocess_onnx(onnx_model): """Modify onnx model to match with TensorRT plugins in mmcv. There are some conflict between onnx node definition and TensorRT limit. This function perform preprocess on the onnx model to solve the conflicts. For example, onnx `attribute` is loaded in TensorRT on host and onnx `input` is loaded on device. The shape inference is performed on host, so any `input` related to shape (such as `max_output_boxes_per_class` in NonMaxSuppression) should be transformed to `attribute` before conversion. Arguments: onnx_model (onnx.ModelProto): Input onnx model. Returns: onnx.ModelProto: Modified onnx model. """ graph = onnx_model.graph nodes = graph.node initializers = graph.initializer node_dict = {} for node in nodes: node_outputs = node.output for output in node_outputs: if len(output) > 0: node_dict[output] = node init_dict = {_.name: _ for _ in initializers} nodes_name_to_remove = set() def is_node_without_output(name): for node_name, node in node_dict.items(): if node_name not in nodes_name_to_remove: if name in node.input: return False return True def mark_nodes_to_remove(name): node = node_dict[name] nodes_name_to_remove.add(name) for input_node_name in node.input: if is_node_without_output(input_node_name): mark_nodes_to_remove(input_node_name) def parse_data(name, typ, default_value=0): if name in node_dict: node = node_dict[name] if node.op_type == 'Constant': raw_data = node.attribute[0].t.raw_data else: mark_nodes_to_remove(name) return default_value elif name in init_dict: raw_data = init_dict[name].raw_data else: raise ValueError(f'{name} not found in node or initilizer.') return np.frombuffer(raw_data, typ).item() nrof_node = len(nodes) for idx in range(nrof_node): node = nodes[idx] node_attributes = node.attribute node_inputs = node.input node_outputs = node.output node_name = node.name # process NonMaxSuppression node if node.op_type == 'NonMaxSuppression': center_point_box = 0 max_output_boxes_per_class = 1000000 iou_threshold = 0.3 score_threshold = 0.0 offset = 0 for attribute in node_attributes: if attribute.name == 'center_point_box': center_point_box = attribute.i elif attribute.name == 'offset': offset = attribute.i if len(node_inputs) >= 3: max_output_boxes_per_class = parse_data( node_inputs[2], np.int64, max_output_boxes_per_class) mark_nodes_to_remove(node_inputs[2]) if len(node_inputs) >= 4: iou_threshold = parse_data(node_inputs[3], np.float32, iou_threshold) mark_nodes_to_remove(node_inputs[3]) if len(node_inputs) >= 5: score_threshold = parse_data(node_inputs[4], np.float32) mark_nodes_to_remove(node_inputs[4]) new_node = onnx.helper.make_node( 'NonMaxSuppression', node_inputs[:2], node_outputs, name=node_name, center_point_box=center_point_box, max_output_boxes_per_class=max_output_boxes_per_class, iou_threshold=iou_threshold, score_threshold=score_threshold, offset=offset) for output in node_outputs: if output in node_dict: node_dict[output] = new_node nodes.insert(idx, new_node) nodes.remove(node) elif node.op_type == 'InstanceNormalization': # directly change op name node.op_type = 'MMCVInstanceNormalization' for node_name in nodes_name_to_remove: nodes.remove(node_dict[node_name]) return onnx_model ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/tensorrt/tensorrt_utils.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import warnings import onnx import tensorrt as trt import torch from .preprocess import preprocess_onnx def onnx2trt(onnx_model, opt_shape_dict, log_level=trt.Logger.ERROR, fp16_mode=False, max_workspace_size=0, device_id=0): """Convert onnx model to tensorrt engine. Arguments: onnx_model (str or onnx.ModelProto): the onnx model to convert from opt_shape_dict (dict): the min/opt/max shape of each input log_level (TensorRT log level): the log level of TensorRT fp16_mode (bool): enable fp16 mode max_workspace_size (int): set max workspace size of TensorRT engine. some tactic and layers need large workspace. device_id (int): choice the device to create engine. Returns: tensorrt.ICudaEngine: the TensorRT engine created from onnx_model Example: >>> engine = onnx2trt( >>> "onnx_model.onnx", >>> {'input': [[1, 3, 160, 160], >>> [1, 3, 320, 320], >>> [1, 3, 640, 640]]}, >>> log_level=trt.Logger.WARNING, >>> fp16_mode=True, >>> max_workspace_size=1 << 30, >>> device_id=0) >>> }) """ device = torch.device('cuda:{}'.format(device_id)) # create builder and network logger = trt.Logger(log_level) builder = trt.Builder(logger) EXPLICIT_BATCH = 1 << (int)( trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) network = builder.create_network(EXPLICIT_BATCH) # parse onnx parser = trt.OnnxParser(network, logger) if isinstance(onnx_model, str): onnx_model = onnx.load(onnx_model) onnx_model = preprocess_onnx(onnx_model) if not parser.parse(onnx_model.SerializeToString()): error_msgs = '' for error in range(parser.num_errors): error_msgs += f'{parser.get_error(error)}\n' raise RuntimeError(f'parse onnx failed:\n{error_msgs}') # config builder builder.max_workspace_size = max_workspace_size config = builder.create_builder_config() config.max_workspace_size = max_workspace_size profile = builder.create_optimization_profile() for input_name, param in opt_shape_dict.items(): min_shape = tuple(param[0][:]) opt_shape = tuple(param[1][:]) max_shape = tuple(param[2][:]) profile.set_shape(input_name, min_shape, opt_shape, max_shape) config.add_optimization_profile(profile) if fp16_mode: builder.fp16_mode = fp16_mode config.set_flag(trt.BuilderFlag.FP16) # create engine with torch.cuda.device(device): engine = builder.build_engine(network, config) return engine def save_trt_engine(engine, path): """Serialize TensorRT engine to disk. Arguments: engine (tensorrt.ICudaEngine): TensorRT engine to serialize path (str): disk path to write the engine """ with open(path, mode='wb') as f: f.write(bytearray(engine.serialize())) def load_trt_engine(path): """Deserialize TensorRT engine from disk. Arguments: path (str): disk path to read the engine Returns: tensorrt.ICudaEngine: the TensorRT engine loaded from disk """ with trt.Logger() as logger, trt.Runtime(logger) as runtime: with open(path, mode='rb') as f: engine_bytes = f.read() engine = runtime.deserialize_cuda_engine(engine_bytes) return engine def torch_dtype_from_trt(dtype): """Convert pytorch dtype to TensorRT dtype.""" if dtype == trt.bool: return torch.bool elif dtype == trt.int8: return torch.int8 elif dtype == trt.int32: return torch.int32 elif dtype == trt.float16: return torch.float16 elif dtype == trt.float32: return torch.float32 else: raise TypeError('%s is not supported by torch' % dtype) def torch_device_from_trt(device): """Convert pytorch device to TensorRT device.""" if device == trt.TensorLocation.DEVICE: return torch.device('cuda') elif device == trt.TensorLocation.HOST: return torch.device('cpu') else: return TypeError('%s is not supported by torch' % device) class TRTWrapper(torch.nn.Module): """TensorRT engine Wrapper. Arguments: engine (tensorrt.ICudaEngine): TensorRT engine to wrap input_names (list[str]): names of each inputs output_names (list[str]): names of each outputs Note: If the engine is converted from onnx model. The input_names and output_names should be the same as onnx model. """ def __init__(self, engine, input_names=None, output_names=None): super(TRTWrapper, self).__init__() self.engine = engine if isinstance(self.engine, str): self.engine = load_trt_engine(engine) if not isinstance(self.engine, trt.ICudaEngine): raise TypeError('engine should be str or trt.ICudaEngine') self._register_state_dict_hook(TRTWrapper._on_state_dict) self.context = self.engine.create_execution_context() # get input and output names from engine if input_names is None or output_names is None: names = [_ for _ in self.engine] input_names = list(filter(self.engine.binding_is_input, names)) output_names = list(set(names) - set(input_names)) self.input_names = input_names self.output_names = output_names def _on_state_dict(self, state_dict, prefix, local_metadata): state_dict[prefix + 'engine'] = bytearray(self.engine.serialize()) state_dict[prefix + 'input_names'] = self.input_names state_dict[prefix + 'output_names'] = self.output_names def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs): engine_bytes = state_dict[prefix + 'engine'] with trt.Logger() as logger, trt.Runtime(logger) as runtime: self.engine = runtime.deserialize_cuda_engine(engine_bytes) self.context = self.engine.create_execution_context() self.input_names = state_dict[prefix + 'input_names'] self.output_names = state_dict[prefix + 'output_names'] def forward(self, inputs): """ Arguments: inputs (dict): dict of input name-tensors pair Return: dict: dict of output name-tensors pair """ assert self.input_names is not None assert self.output_names is not None bindings = [None] * (len(self.input_names) + len(self.output_names)) for input_name, input_tensor in inputs.items(): idx = self.engine.get_binding_index(input_name) if input_tensor.dtype == torch.long: input_tensor = input_tensor.int() self.context.set_binding_shape(idx, tuple(input_tensor.shape)) bindings[idx] = input_tensor.contiguous().data_ptr() # create output tensors outputs = {} for i, output_name in enumerate(self.output_names): idx = self.engine.get_binding_index(output_name) dtype = torch_dtype_from_trt(self.engine.get_binding_dtype(idx)) shape = tuple(self.context.get_binding_shape(idx)) device = torch_device_from_trt(self.engine.get_location(idx)) output = torch.empty(size=shape, dtype=dtype, device=device) outputs[output_name] = output bindings[idx] = output.data_ptr() self.context.execute_async_v2(bindings, torch.cuda.current_stream().cuda_stream) return outputs class TRTWraper(TRTWrapper): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) warnings.warn( 'TRTWraper will be deprecated in' ' future. Please use TRTWrapper instead', DeprecationWarning) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/__init__.py ================================================ # flake8: noqa # Copyright (c) OpenMMLab. All rights reserved. from .config import Config, ConfigDict, DictAction from .misc import (check_prerequisites, concat_list, deprecated_api_warning, has_method, import_modules_from_strings, is_list_of, is_method_overridden, is_seq_of, is_str, is_tuple_of, iter_cast, list_cast, requires_executable, requires_package, slice_list, to_1tuple, to_2tuple, to_3tuple, to_4tuple, to_ntuple, tuple_cast) from .path import (check_file_exist, fopen, is_filepath, mkdir_or_exist, scandir, symlink) from .progressbar import (ProgressBar, track_iter_progress, track_parallel_progress, track_progress) from .testing import (assert_attrs_equal, assert_dict_contains_subset, assert_dict_has_keys, assert_is_norm_layer, assert_keys_equal, assert_params_all_zeros, check_python_script) from .timer import Timer, TimerError, check_time from .version_utils import digit_version, get_git_hash try: import torch except ImportError: __all__ = [ 'Config', 'ConfigDict', 'DictAction', 'is_str', 'iter_cast', 'list_cast', 'tuple_cast', 'is_seq_of', 'is_list_of', 'is_tuple_of', 'slice_list', 'concat_list', 'check_prerequisites', 'requires_package', 'requires_executable', 'is_filepath', 'fopen', 'check_file_exist', 'mkdir_or_exist', 'symlink', 'scandir', 'ProgressBar', 'track_progress', 'track_iter_progress', 'track_parallel_progress', 'Timer', 'TimerError', 'check_time', 'deprecated_api_warning', 'digit_version', 'get_git_hash', 'import_modules_from_strings', 'assert_dict_contains_subset', 'assert_attrs_equal', 'assert_dict_has_keys', 'assert_keys_equal', 'check_python_script', 'to_1tuple', 'to_2tuple', 'to_3tuple', 'to_4tuple', 'to_ntuple', 'is_method_overridden', 'has_method' ] else: from .env import collect_env from .logging import get_logger, print_log from .parrots_jit import jit, skip_no_elena from .parrots_wrapper import ( TORCH_VERSION, BuildExtension, CppExtension, CUDAExtension, DataLoader, PoolDataLoader, SyncBatchNorm, _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _BatchNorm, _ConvNd, _ConvTransposeMixin, _InstanceNorm, _MaxPoolNd, get_build_config, is_rocm_pytorch, _get_cuda_home) from .registry import Registry, build_from_cfg from .trace import is_jit_tracing from .hub import load_url __all__ = [ 'Config', 'ConfigDict', 'DictAction', 'collect_env', 'get_logger', 'print_log', 'is_str', 'iter_cast', 'list_cast', 'tuple_cast', 'is_seq_of', 'is_list_of', 'is_tuple_of', 'slice_list', 'concat_list', 'check_prerequisites', 'requires_package', 'requires_executable', 'is_filepath', 'fopen', 'check_file_exist', 'mkdir_or_exist', 'symlink', 'scandir', 'ProgressBar', 'track_progress', 'track_iter_progress', 'track_parallel_progress', 'Registry', 'build_from_cfg', 'Timer', 'TimerError', 'check_time', 'SyncBatchNorm', '_AdaptiveAvgPoolNd', '_AdaptiveMaxPoolNd', '_AvgPoolNd', '_BatchNorm', '_ConvNd', '_ConvTransposeMixin', '_InstanceNorm', '_MaxPoolNd', 'get_build_config', 'BuildExtension', 'CppExtension', 'CUDAExtension', 'DataLoader', 'PoolDataLoader', 'TORCH_VERSION', 'deprecated_api_warning', 'digit_version', 'get_git_hash', 'import_modules_from_strings', 'jit', 'skip_no_elena', 'assert_dict_contains_subset', 'assert_attrs_equal', 'assert_dict_has_keys', 'assert_keys_equal', 'assert_is_norm_layer', 'assert_params_all_zeros', 'check_python_script', 'is_method_overridden', 'is_jit_tracing', 'is_rocm_pytorch', '_get_cuda_home', 'load_url', 'has_method' ] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/config.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import ast import copy import os import os.path as osp import platform import shutil import sys import tempfile import uuid import warnings from argparse import Action, ArgumentParser from collections import abc from importlib import import_module from addict import Dict from yapf.yapflib.yapf_api import FormatCode from .misc import import_modules_from_strings from .path import check_file_exist if platform.system() == 'Windows': import regex as re else: import re BASE_KEY = '_base_' DELETE_KEY = '_delete_' DEPRECATION_KEY = '_deprecation_' RESERVED_KEYS = ['filename', 'text', 'pretty_text'] class ConfigDict(Dict): def __missing__(self, name): raise KeyError(name) def __getattr__(self, name): try: value = super(ConfigDict, self).__getattr__(name) except KeyError: ex = AttributeError(f"'{self.__class__.__name__}' object has no " f"attribute '{name}'") except Exception as e: ex = e else: return value raise ex def add_args(parser, cfg, prefix=''): for k, v in cfg.items(): if isinstance(v, str): parser.add_argument('--' + prefix + k) elif isinstance(v, int): parser.add_argument('--' + prefix + k, type=int) elif isinstance(v, float): parser.add_argument('--' + prefix + k, type=float) elif isinstance(v, bool): parser.add_argument('--' + prefix + k, action='store_true') elif isinstance(v, dict): add_args(parser, v, prefix + k + '.') elif isinstance(v, abc.Iterable): parser.add_argument('--' + prefix + k, type=type(v[0]), nargs='+') else: print(f'cannot parse key {prefix + k} of type {type(v)}') return parser class Config: """A facility for config and config files. It supports common file formats as configs: python/json/yaml. The interface is the same as a dict object and also allows access config values as attributes. Example: >>> cfg = Config(dict(a=1, b=dict(b1=[0, 1]))) >>> cfg.a 1 >>> cfg.b {'b1': [0, 1]} >>> cfg.b.b1 [0, 1] >>> cfg = Config.fromfile('tests/data/config/a.py') >>> cfg.filename "/home/kchen/projects/mmcv/tests/data/config/a.py" >>> cfg.item4 'test' >>> cfg "Config [path: /home/kchen/projects/mmcv/tests/data/config/a.py]: " "{'item1': [1, 2], 'item2': {'a': 0}, 'item3': True, 'item4': 'test'}" """ @staticmethod def _validate_py_syntax(filename): with open(filename, 'r', encoding='utf-8') as f: # Setting encoding explicitly to resolve coding issue on windows content = f.read() try: ast.parse(content) except SyntaxError as e: raise SyntaxError('There are syntax errors in config ' f'file {filename}: {e}') @staticmethod def _substitute_predefined_vars(filename, temp_config_name): file_dirname = osp.dirname(filename) file_basename = osp.basename(filename) file_basename_no_extension = osp.splitext(file_basename)[0] file_extname = osp.splitext(filename)[1] support_templates = dict( fileDirname=file_dirname, fileBasename=file_basename, fileBasenameNoExtension=file_basename_no_extension, fileExtname=file_extname) with open(filename, 'r', encoding='utf-8') as f: # Setting encoding explicitly to resolve coding issue on windows config_file = f.read() for key, value in support_templates.items(): regexp = r'\{\{\s*' + str(key) + r'\s*\}\}' value = value.replace('\\', '/') config_file = re.sub(regexp, value, config_file) with open(temp_config_name, 'w', encoding='utf-8') as tmp_config_file: tmp_config_file.write(config_file) @staticmethod def _pre_substitute_base_vars(filename, temp_config_name): """Substitute base variable placehoders to string, so that parsing would work.""" with open(filename, 'r', encoding='utf-8') as f: # Setting encoding explicitly to resolve coding issue on windows config_file = f.read() base_var_dict = {} regexp = r'\{\{\s*' + BASE_KEY + r'\.([\w\.]+)\s*\}\}' base_vars = set(re.findall(regexp, config_file)) for base_var in base_vars: randstr = f'_{base_var}_{uuid.uuid4().hex.lower()[:6]}' base_var_dict[randstr] = base_var regexp = r'\{\{\s*' + BASE_KEY + r'\.' + base_var + r'\s*\}\}' config_file = re.sub(regexp, f'"{randstr}"', config_file) with open(temp_config_name, 'w', encoding='utf-8') as tmp_config_file: tmp_config_file.write(config_file) return base_var_dict @staticmethod def _substitute_base_vars(cfg, base_var_dict, base_cfg): """Substitute variable strings to their actual values.""" cfg = copy.deepcopy(cfg) if isinstance(cfg, dict): for k, v in cfg.items(): if isinstance(v, str) and v in base_var_dict: new_v = base_cfg for new_k in base_var_dict[v].split('.'): new_v = new_v[new_k] cfg[k] = new_v elif isinstance(v, (list, tuple, dict)): cfg[k] = Config._substitute_base_vars( v, base_var_dict, base_cfg) elif isinstance(cfg, tuple): cfg = tuple( Config._substitute_base_vars(c, base_var_dict, base_cfg) for c in cfg) elif isinstance(cfg, list): cfg = [ Config._substitute_base_vars(c, base_var_dict, base_cfg) for c in cfg ] elif isinstance(cfg, str) and cfg in base_var_dict: new_v = base_cfg for new_k in base_var_dict[cfg].split('.'): new_v = new_v[new_k] cfg = new_v return cfg @staticmethod def _file2dict(filename, use_predefined_variables=True): filename = osp.abspath(osp.expanduser(filename)) check_file_exist(filename) fileExtname = osp.splitext(filename)[1] if fileExtname not in ['.py', '.json', '.yaml', '.yml']: raise IOError('Only py/yml/yaml/json type are supported now!') with tempfile.TemporaryDirectory() as temp_config_dir: temp_config_file = tempfile.NamedTemporaryFile( dir=temp_config_dir, suffix=fileExtname) if platform.system() == 'Windows': temp_config_file.close() temp_config_name = osp.basename(temp_config_file.name) # Substitute predefined variables if use_predefined_variables: Config._substitute_predefined_vars(filename, temp_config_file.name) else: shutil.copyfile(filename, temp_config_file.name) # Substitute base variables from placeholders to strings base_var_dict = Config._pre_substitute_base_vars( temp_config_file.name, temp_config_file.name) if filename.endswith('.py'): temp_module_name = osp.splitext(temp_config_name)[0] sys.path.insert(0, temp_config_dir) Config._validate_py_syntax(filename) mod = import_module(temp_module_name) sys.path.pop(0) cfg_dict = { name: value for name, value in mod.__dict__.items() if not name.startswith('__') } # delete imported module del sys.modules[temp_module_name] elif filename.endswith(('.yml', '.yaml', '.json')): import mmcv cfg_dict = mmcv.load(temp_config_file.name) # close temp file temp_config_file.close() # check deprecation information if DEPRECATION_KEY in cfg_dict: deprecation_info = cfg_dict.pop(DEPRECATION_KEY) warning_msg = f'The config file {filename} will be deprecated ' \ 'in the future.' if 'expected' in deprecation_info: warning_msg += f' Please use {deprecation_info["expected"]} ' \ 'instead.' if 'reference' in deprecation_info: warning_msg += ' More information can be found at ' \ f'{deprecation_info["reference"]}' warnings.warn(warning_msg, DeprecationWarning) cfg_text = filename + '\n' with open(filename, 'r', encoding='utf-8') as f: # Setting encoding explicitly to resolve coding issue on windows cfg_text += f.read() if BASE_KEY in cfg_dict: cfg_dir = osp.dirname(filename) base_filename = cfg_dict.pop(BASE_KEY) base_filename = base_filename if isinstance( base_filename, list) else [base_filename] cfg_dict_list = list() cfg_text_list = list() for f in base_filename: _cfg_dict, _cfg_text = Config._file2dict(osp.join(cfg_dir, f)) cfg_dict_list.append(_cfg_dict) cfg_text_list.append(_cfg_text) base_cfg_dict = dict() for c in cfg_dict_list: duplicate_keys = base_cfg_dict.keys() & c.keys() if len(duplicate_keys) > 0: raise KeyError('Duplicate key is not allowed among bases. ' f'Duplicate keys: {duplicate_keys}') base_cfg_dict.update(c) # Substitute base variables from strings to their actual values cfg_dict = Config._substitute_base_vars(cfg_dict, base_var_dict, base_cfg_dict) base_cfg_dict = Config._merge_a_into_b(cfg_dict, base_cfg_dict) cfg_dict = base_cfg_dict # merge cfg_text cfg_text_list.append(cfg_text) cfg_text = '\n'.join(cfg_text_list) return cfg_dict, cfg_text @staticmethod def _merge_a_into_b(a, b, allow_list_keys=False): """merge dict ``a`` into dict ``b`` (non-inplace). Values in ``a`` will overwrite ``b``. ``b`` is copied first to avoid in-place modifications. Args: a (dict): The source dict to be merged into ``b``. b (dict): The origin dict to be fetch keys from ``a``. allow_list_keys (bool): If True, int string keys (e.g. '0', '1') are allowed in source ``a`` and will replace the element of the corresponding index in b if b is a list. Default: False. Returns: dict: The modified dict of ``b`` using ``a``. Examples: # Normally merge a into b. >>> Config._merge_a_into_b( ... dict(obj=dict(a=2)), dict(obj=dict(a=1))) {'obj': {'a': 2}} # Delete b first and merge a into b. >>> Config._merge_a_into_b( ... dict(obj=dict(_delete_=True, a=2)), dict(obj=dict(a=1))) {'obj': {'a': 2}} # b is a list >>> Config._merge_a_into_b( ... {'0': dict(a=2)}, [dict(a=1), dict(b=2)], True) [{'a': 2}, {'b': 2}] """ b = b.copy() for k, v in a.items(): if allow_list_keys and k.isdigit() and isinstance(b, list): k = int(k) if len(b) <= k: raise KeyError(f'Index {k} exceeds the length of list {b}') b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys) elif isinstance(v, dict): if k in b and not v.pop(DELETE_KEY, False): allowed_types = (dict, list) if allow_list_keys else dict if not isinstance(b[k], allowed_types): raise TypeError( f'{k}={v} in child config cannot inherit from ' f'base because {k} is a dict in the child config ' f'but is of type {type(b[k])} in base config. ' f'You may set `{DELETE_KEY}=True` to ignore the ' f'base config.') b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys) else: b[k] = ConfigDict(v) else: b[k] = v return b @staticmethod def fromfile(filename, use_predefined_variables=True, import_custom_modules=True): cfg_dict, cfg_text = Config._file2dict(filename, use_predefined_variables) if import_custom_modules and cfg_dict.get('custom_imports', None): import_modules_from_strings(**cfg_dict['custom_imports']) return Config(cfg_dict, cfg_text=cfg_text, filename=filename) @staticmethod def fromstring(cfg_str, file_format): """Generate config from config str. Args: cfg_str (str): Config str. file_format (str): Config file format corresponding to the config str. Only py/yml/yaml/json type are supported now! Returns: :obj:`Config`: Config obj. """ if file_format not in ['.py', '.json', '.yaml', '.yml']: raise IOError('Only py/yml/yaml/json type are supported now!') if file_format != '.py' and 'dict(' in cfg_str: # check if users specify a wrong suffix for python warnings.warn( 'Please check "file_format", the file format may be .py') with tempfile.NamedTemporaryFile( 'w', encoding='utf-8', suffix=file_format, delete=False) as temp_file: temp_file.write(cfg_str) # on windows, previous implementation cause error # see PR 1077 for details cfg = Config.fromfile(temp_file.name) os.remove(temp_file.name) return cfg @staticmethod def auto_argparser(description=None): """Generate argparser from config file automatically (experimental)""" partial_parser = ArgumentParser(description=description) partial_parser.add_argument('config', help='config file path') cfg_file = partial_parser.parse_known_args()[0].config cfg = Config.fromfile(cfg_file) parser = ArgumentParser(description=description) parser.add_argument('config', help='config file path') add_args(parser, cfg) return parser, cfg def __init__(self, cfg_dict=None, cfg_text=None, filename=None): if cfg_dict is None: cfg_dict = dict() elif not isinstance(cfg_dict, dict): raise TypeError('cfg_dict must be a dict, but ' f'got {type(cfg_dict)}') for key in cfg_dict: if key in RESERVED_KEYS: raise KeyError(f'{key} is reserved for config file') super(Config, self).__setattr__('_cfg_dict', ConfigDict(cfg_dict)) super(Config, self).__setattr__('_filename', filename) if cfg_text: text = cfg_text elif filename: with open(filename, 'r') as f: text = f.read() else: text = '' super(Config, self).__setattr__('_text', text) @property def filename(self): return self._filename @property def text(self): return self._text @property def pretty_text(self): indent = 4 def _indent(s_, num_spaces): s = s_.split('\n') if len(s) == 1: return s_ first = s.pop(0) s = [(num_spaces * ' ') + line for line in s] s = '\n'.join(s) s = first + '\n' + s return s def _format_basic_types(k, v, use_mapping=False): if isinstance(v, str): v_str = f"'{v}'" else: v_str = str(v) if use_mapping: k_str = f"'{k}'" if isinstance(k, str) else str(k) attr_str = f'{k_str}: {v_str}' else: attr_str = f'{str(k)}={v_str}' attr_str = _indent(attr_str, indent) return attr_str def _format_list(k, v, use_mapping=False): # check if all items in the list are dict if all(isinstance(_, dict) for _ in v): v_str = '[\n' v_str += '\n'.join( f'dict({_indent(_format_dict(v_), indent)}),' for v_ in v).rstrip(',') if use_mapping: k_str = f"'{k}'" if isinstance(k, str) else str(k) attr_str = f'{k_str}: {v_str}' else: attr_str = f'{str(k)}={v_str}' attr_str = _indent(attr_str, indent) + ']' else: attr_str = _format_basic_types(k, v, use_mapping) return attr_str def _contain_invalid_identifier(dict_str): contain_invalid_identifier = False for key_name in dict_str: contain_invalid_identifier |= \ (not str(key_name).isidentifier()) return contain_invalid_identifier def _format_dict(input_dict, outest_level=False): r = '' s = [] use_mapping = _contain_invalid_identifier(input_dict) if use_mapping: r += '{' for idx, (k, v) in enumerate(input_dict.items()): is_last = idx >= len(input_dict) - 1 end = '' if outest_level or is_last else ',' if isinstance(v, dict): v_str = '\n' + _format_dict(v) if use_mapping: k_str = f"'{k}'" if isinstance(k, str) else str(k) attr_str = f'{k_str}: dict({v_str}' else: attr_str = f'{str(k)}=dict({v_str}' attr_str = _indent(attr_str, indent) + ')' + end elif isinstance(v, list): attr_str = _format_list(k, v, use_mapping) + end else: attr_str = _format_basic_types(k, v, use_mapping) + end s.append(attr_str) r += '\n'.join(s) if use_mapping: r += '}' return r cfg_dict = self._cfg_dict.to_dict() text = _format_dict(cfg_dict, outest_level=True) # copied from setup.cfg yapf_style = dict( based_on_style='pep8', blank_line_before_nested_class_or_def=True, split_before_expression_after_opening_paren=True) text, _ = FormatCode(text, style_config=yapf_style, verify=True) return text def __repr__(self): return f'Config (path: {self.filename}): {self._cfg_dict.__repr__()}' def __len__(self): return len(self._cfg_dict) def __getattr__(self, name): return getattr(self._cfg_dict, name) def __getitem__(self, name): return self._cfg_dict.__getitem__(name) def __setattr__(self, name, value): if isinstance(value, dict): value = ConfigDict(value) self._cfg_dict.__setattr__(name, value) def __setitem__(self, name, value): if isinstance(value, dict): value = ConfigDict(value) self._cfg_dict.__setitem__(name, value) def __iter__(self): return iter(self._cfg_dict) def __getstate__(self): return (self._cfg_dict, self._filename, self._text) def __setstate__(self, state): _cfg_dict, _filename, _text = state super(Config, self).__setattr__('_cfg_dict', _cfg_dict) super(Config, self).__setattr__('_filename', _filename) super(Config, self).__setattr__('_text', _text) def dump(self, file=None): cfg_dict = super(Config, self).__getattribute__('_cfg_dict').to_dict() if self.filename.endswith('.py'): if file is None: return self.pretty_text else: with open(file, 'w', encoding='utf-8') as f: f.write(self.pretty_text) else: import mmcv if file is None: file_format = self.filename.split('.')[-1] return mmcv.dump(cfg_dict, file_format=file_format) else: mmcv.dump(cfg_dict, file) def merge_from_dict(self, options, allow_list_keys=True): """Merge list into cfg_dict. Merge the dict parsed by MultipleKVAction into this cfg. Examples: >>> options = {'model.backbone.depth': 50, ... 'model.backbone.with_cp':True} >>> cfg = Config(dict(model=dict(backbone=dict(type='ResNet')))) >>> cfg.merge_from_dict(options) >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict') >>> assert cfg_dict == dict( ... model=dict(backbone=dict(depth=50, with_cp=True))) >>> # Merge list element >>> cfg = Config(dict(pipeline=[ ... dict(type='LoadImage'), dict(type='LoadAnnotations')])) >>> options = dict(pipeline={'0': dict(type='SelfLoadImage')}) >>> cfg.merge_from_dict(options, allow_list_keys=True) >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict') >>> assert cfg_dict == dict(pipeline=[ ... dict(type='SelfLoadImage'), dict(type='LoadAnnotations')]) Args: options (dict): dict of configs to merge from. allow_list_keys (bool): If True, int string keys (e.g. '0', '1') are allowed in ``options`` and will replace the element of the corresponding index in the config if the config is a list. Default: True. """ option_cfg_dict = {} for full_key, v in options.items(): d = option_cfg_dict key_list = full_key.split('.') for subkey in key_list[:-1]: d.setdefault(subkey, ConfigDict()) d = d[subkey] subkey = key_list[-1] d[subkey] = v cfg_dict = super(Config, self).__getattribute__('_cfg_dict') super(Config, self).__setattr__( '_cfg_dict', Config._merge_a_into_b( option_cfg_dict, cfg_dict, allow_list_keys=allow_list_keys)) class DictAction(Action): """ argparse action to split an argument into KEY=VALUE form on the first = and append to a dictionary. List options can be passed as comma separated values, i.e 'KEY=V1,V2,V3', or with explicit brackets, i.e. 'KEY=[V1,V2,V3]'. It also support nested brackets to build list/tuple values. e.g. 'KEY=[(V1,V2),(V3,V4)]' """ @staticmethod def _parse_int_float_bool(val): try: return int(val) except ValueError: pass try: return float(val) except ValueError: pass if val.lower() in ['true', 'false']: return True if val.lower() == 'true' else False return val @staticmethod def _parse_iterable(val): """Parse iterable values in the string. All elements inside '()' or '[]' are treated as iterable values. Args: val (str): Value string. Returns: list | tuple: The expanded list or tuple from the string. Examples: >>> DictAction._parse_iterable('1,2,3') [1, 2, 3] >>> DictAction._parse_iterable('[a, b, c]') ['a', 'b', 'c'] >>> DictAction._parse_iterable('[(1, 2, 3), [a, b], c]') [(1, 2, 3), ['a', 'b'], 'c'] """ def find_next_comma(string): """Find the position of next comma in the string. If no ',' is found in the string, return the string length. All chars inside '()' and '[]' are treated as one element and thus ',' inside these brackets are ignored. """ assert (string.count('(') == string.count(')')) and ( string.count('[') == string.count(']')), \ f'Imbalanced brackets exist in {string}' end = len(string) for idx, char in enumerate(string): pre = string[:idx] # The string before this ',' is balanced if ((char == ',') and (pre.count('(') == pre.count(')')) and (pre.count('[') == pre.count(']'))): end = idx break return end # Strip ' and " characters and replace whitespace. val = val.strip('\'\"').replace(' ', '') is_tuple = False if val.startswith('(') and val.endswith(')'): is_tuple = True val = val[1:-1] elif val.startswith('[') and val.endswith(']'): val = val[1:-1] elif ',' not in val: # val is a single value return DictAction._parse_int_float_bool(val) values = [] while len(val) > 0: comma_idx = find_next_comma(val) element = DictAction._parse_iterable(val[:comma_idx]) values.append(element) val = val[comma_idx + 1:] if is_tuple: values = tuple(values) return values def __call__(self, parser, namespace, values, option_string=None): options = {} for kv in values: key, val = kv.split('=', maxsplit=1) options[key] = self._parse_iterable(val) setattr(namespace, self.dest, options) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/env.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. """This file holding some environment constant for sharing by other files.""" import os.path as osp import subprocess import sys from collections import defaultdict import cv2 import torch import mmcv from .parrots_wrapper import get_build_config def collect_env(): """Collect the information of the running environments. Returns: dict: The environment information. The following fields are contained. - sys.platform: The variable of ``sys.platform``. - Python: Python version. - CUDA available: Bool, indicating if CUDA is available. - GPU devices: Device type of each GPU. - CUDA_HOME (optional): The env var ``CUDA_HOME``. - NVCC (optional): NVCC version. - GCC: GCC version, "n/a" if GCC is not installed. - PyTorch: PyTorch version. - PyTorch compiling details: The output of \ ``torch.__config__.show()``. - TorchVision (optional): TorchVision version. - OpenCV: OpenCV version. - MMCV: MMCV version. - MMCV Compiler: The GCC version for compiling MMCV ops. - MMCV CUDA Compiler: The CUDA version for compiling MMCV ops. """ env_info = {} env_info['sys.platform'] = sys.platform env_info['Python'] = sys.version.replace('\n', '') cuda_available = torch.cuda.is_available() env_info['CUDA available'] = cuda_available if cuda_available: devices = defaultdict(list) for k in range(torch.cuda.device_count()): devices[torch.cuda.get_device_name(k)].append(str(k)) for name, device_ids in devices.items(): env_info['GPU ' + ','.join(device_ids)] = name from mmcv.utils.parrots_wrapper import _get_cuda_home CUDA_HOME = _get_cuda_home() env_info['CUDA_HOME'] = CUDA_HOME if CUDA_HOME is not None and osp.isdir(CUDA_HOME): try: nvcc = osp.join(CUDA_HOME, 'bin/nvcc') nvcc = subprocess.check_output( f'"{nvcc}" -V | tail -n1', shell=True) nvcc = nvcc.decode('utf-8').strip() except subprocess.SubprocessError: nvcc = 'Not Available' env_info['NVCC'] = nvcc try: gcc = subprocess.check_output('gcc --version | head -n1', shell=True) gcc = gcc.decode('utf-8').strip() env_info['GCC'] = gcc except subprocess.CalledProcessError: # gcc is unavailable env_info['GCC'] = 'n/a' env_info['PyTorch'] = torch.__version__ env_info['PyTorch compiling details'] = get_build_config() try: import torchvision env_info['TorchVision'] = torchvision.__version__ except ModuleNotFoundError: pass env_info['OpenCV'] = cv2.__version__ env_info['MMCV'] = mmcv.__version__ try: from mmcv.ops import get_compiler_version, get_compiling_cuda_version except ModuleNotFoundError: env_info['MMCV Compiler'] = 'n/a' env_info['MMCV CUDA Compiler'] = 'n/a' else: env_info['MMCV Compiler'] = get_compiler_version() env_info['MMCV CUDA Compiler'] = get_compiling_cuda_version() return env_info ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/ext_loader.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import importlib import os import pkgutil import warnings from collections import namedtuple import torch if torch.__version__ != 'parrots': def load_ext(name, funcs): ext = importlib.import_module('mmcv.' + name) for fun in funcs: assert hasattr(ext, fun), f'{fun} miss in module {name}' return ext else: from parrots import extension from parrots.base import ParrotsException has_return_value_ops = [ 'nms', 'softnms', 'nms_match', 'nms_rotated', 'top_pool_forward', 'top_pool_backward', 'bottom_pool_forward', 'bottom_pool_backward', 'left_pool_forward', 'left_pool_backward', 'right_pool_forward', 'right_pool_backward', 'fused_bias_leakyrelu', 'upfirdn2d', 'ms_deform_attn_forward', 'pixel_group', 'contour_expand', ] def get_fake_func(name, e): def fake_func(*args, **kwargs): warnings.warn(f'{name} is not supported in parrots now') raise e return fake_func def load_ext(name, funcs): ExtModule = namedtuple('ExtModule', funcs) ext_list = [] lib_root = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) for fun in funcs: try: ext_fun = extension.load(fun, name, lib_dir=lib_root) except ParrotsException as e: if 'No element registered' not in e.message: warnings.warn(e.message) ext_fun = get_fake_func(fun, e) ext_list.append(ext_fun) else: if fun in has_return_value_ops: ext_list.append(ext_fun.op) else: ext_list.append(ext_fun.op_) return ExtModule(*ext_list) def check_ops_exist(): ext_loader = pkgutil.find_loader('mmcv._ext') return ext_loader is not None ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/hub.py ================================================ # The 1.6 release of PyTorch switched torch.save to use a new zipfile-based # file format. It will cause RuntimeError when a checkpoint was saved in # torch >= 1.6.0 but loaded in torch < 1.7.0. # More details at https://github.com/open-mmlab/mmpose/issues/904 from .parrots_wrapper import TORCH_VERSION from .path import mkdir_or_exist from .version_utils import digit_version if TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) < digit_version( '1.7.0'): # Modified from https://github.com/pytorch/pytorch/blob/master/torch/hub.py import os import torch import warnings from urllib.parse import urlparse import sys import zipfile from torch.hub import download_url_to_file, _get_torch_home, HASH_REGEX # Hub used to support automatically extracts from zipfile manually # compressed by users. The legacy zip format expects only one file from # torch.save() < 1.6 in the zip. We should remove this support since # zipfile is now default zipfile format for torch.save(). def _is_legacy_zip_format(filename): if zipfile.is_zipfile(filename): infolist = zipfile.ZipFile(filename).infolist() return len(infolist) == 1 and not infolist[0].is_dir() return False def _legacy_zip_load(filename, model_dir, map_location): warnings.warn( 'Falling back to the old format < 1.6. This support will' ' be deprecated in favor of default zipfile format ' 'introduced in 1.6. Please redo torch.save() to save it ' 'in the new zipfile format.', DeprecationWarning) # Note: extractall() defaults to overwrite file if exists. No need to # clean up beforehand. We deliberately don't handle tarfile here # since our legacy serialization format was in tar. # E.g. resnet18-5c106cde.pth which is widely used. with zipfile.ZipFile(filename) as f: members = f.infolist() if len(members) != 1: raise RuntimeError( 'Only one file(not dir) is allowed in the zipfile') f.extractall(model_dir) extraced_name = members[0].filename extracted_file = os.path.join(model_dir, extraced_name) return torch.load(extracted_file, map_location=map_location) def load_url(url, model_dir=None, map_location=None, progress=True, check_hash=False, file_name=None): r"""Loads the Torch serialized object at the given URL. If downloaded file is a zip file, it will be automatically decompressed If the object is already present in `model_dir`, it's deserialized and returned. The default value of ``model_dir`` is ``/checkpoints`` where ``hub_dir`` is the directory returned by :func:`~torch.hub.get_dir`. Args: url (str): URL of the object to download model_dir (str, optional): directory in which to save the object map_location (optional): a function or a dict specifying how to remap storage locations (see torch.load) progress (bool, optional): whether or not to display a progress bar to stderr. Default: True check_hash(bool, optional): If True, the filename part of the URL should follow the naming convention ``filename-.ext`` where ```` is the first eight or more digits of the SHA256 hash of the contents of the file. The hash is used to ensure unique names and to verify the contents of the file. Default: False file_name (str, optional): name for the downloaded file. Filename from ``url`` will be used if not set. Default: None. Example: >>> url = ('https://s3.amazonaws.com/pytorch/models/resnet18-5c106' ... 'cde.pth') >>> state_dict = torch.hub.load_state_dict_from_url(url) """ # Issue warning to move data if old env is set if os.getenv('TORCH_MODEL_ZOO'): warnings.warn( 'TORCH_MODEL_ZOO is deprecated, please use env ' 'TORCH_HOME instead', DeprecationWarning) if model_dir is None: torch_home = _get_torch_home() model_dir = os.path.join(torch_home, 'checkpoints') mkdir_or_exist(model_dir) parts = urlparse(url) filename = os.path.basename(parts.path) if file_name is not None: filename = file_name cached_file = os.path.join(model_dir, filename) if not os.path.exists(cached_file): sys.stderr.write('Downloading: "{}" to {}\n'.format( url, cached_file)) hash_prefix = None if check_hash: r = HASH_REGEX.search(filename) # r is Optional[Match[str]] hash_prefix = r.group(1) if r else None download_url_to_file( url, cached_file, hash_prefix, progress=progress) if _is_legacy_zip_format(cached_file): return _legacy_zip_load(cached_file, model_dir, map_location) try: return torch.load(cached_file, map_location=map_location) except RuntimeError as error: if digit_version(TORCH_VERSION) < digit_version('1.5.0'): warnings.warn( f'If the error is the same as "{cached_file} is a zip ' 'archive (did you mean to use torch.jit.load()?)", you can' ' upgrade your torch to 1.5.0 or higher (current torch ' f'version is {TORCH_VERSION}). The error was raised ' ' because the checkpoint was saved in torch>=1.6.0 but ' 'loaded in torch<1.5.') raise error else: from torch.utils.model_zoo import load_url # noqa: F401 ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/logging.py ================================================ # # Copyright (c) OpenMMLab. All rights reserved. # import logging # # import torch.distributed as dist # # logger_initialized = {} # # # def get_logger(name, log_file=None, log_level=logging.INFO, file_mode='w'): # """Initialize and get a logger by name. # # If the logger has not been initialized, this method will initialize the # logger by adding one or two handlers, otherwise the initialized logger will # be directly returned. During initialization, a StreamHandler will always be # added. If `log_file` is specified and the process rank is 0, a FileHandler # will also be added. # # Args: # name (str): Logger name. # log_file (str | None): The log filename. If specified, a FileHandler # will be added to the logger. # log_level (int): The logger level. Note that only the process of # rank 0 is affected, and other processes will set the level to # "Error" thus be silent most of the time. # file_mode (str): The file mode used in opening log file. # Defaults to 'w'. # # Returns: # logging.Logger: The expected logger. # """ # logger = logging.getLogger(name) # if name in logger_initialized: # return logger # # handle hierarchical names # # e.g., logger "a" is initialized, then logger "a.b" will skip the # # initialization since it is a child of "a". # for logger_name in logger_initialized: # if name.startswith(logger_name): # return logger # # # handle duplicate logs to the console # # Starting in 1.8.0, PyTorch DDP attaches a StreamHandler (NOTSET) # # to the root logger. As logger.propagate is True by default, this root # # level handler causes logging messages from rank>0 processes to # # unexpectedly show up on the console, creating much unwanted clutter. # # To fix this issue, we set the root logger's StreamHandler, if any, to log # # at the ERROR level. # for handler in logger.root.handlers: # if type(handler) is logging.StreamHandler: # handler.setLevel(logging.ERROR) # # stream_handler = logging.StreamHandler() # handlers = [stream_handler] # # if dist.is_available() and dist.is_initialized(): # rank = dist.get_rank() # else: # rank = 0 # # # only rank 0 will add a FileHandler # if rank == 0 and log_file is not None: # # Here, the default behaviour of the official logger is 'a'. Thus, we # # provide an interface to change the file mode to the default # # behaviour. # file_handler = logging.FileHandler(log_file, file_mode) # handlers.append(file_handler) # # formatter = logging.Formatter( # '%(asctime)s - %(name)s - %(levelname)s - %(message)s') # for handler in handlers: # handler.setFormatter(formatter) # handler.setLevel(log_level) # logger.addHandler(handler) # # if rank == 0: # logger.setLevel(log_level) # else: # logger.setLevel(logging.ERROR) # # logger_initialized[name] = True # # return logger # # # def print_log(msg, logger=None, level=logging.INFO): # """Print a log message. # # Args: # msg (str): The message to be logged. # logger (logging.Logger | str | None): The logger to be used. # Some special loggers are: # - "silent": no message will be printed. # - other str: the logger obtained with `get_root_logger(logger)`. # - None: The `print()` method will be used to print log messages. # level (int): Logging level. Only available when `logger` is a Logger # object or "root". # """ # if logger is None: # print(msg) # elif isinstance(logger, logging.Logger): # logger.log(level, msg) # elif logger == 'silent': # pass # elif isinstance(logger, str): # _logger = get_logger(logger) # _logger.log(level, msg) # else: # raise TypeError( # 'logger should be either a logging.Logger object, str, ' # f'"silent" or None, but got {type(logger)}') # GPL License # Copyright (C) 2021 , UESTC # All Rights Reserved # # @Time : 2022/1/24 11:03 # @Author : Xiao Wu # @reference: # Copyright (c) OpenMMLab. All rights reserved. import json from collections import defaultdict import logging import os import functools import torch.distributed as dist import colorlog import time from pathlib import Path logger_initialized = {} log_colors_config = { 'DEBUG': 'cyan', 'INFO': 'white', 'WARNING': 'yellow', 'ERROR': 'red', 'CRITICAL': 'red', } # def get_root_logger(name, log_file=None, log_level=logging.INFO): # return get_logger('mmcls', log_file, log_level) def get_root_logger(name=None, cfg=None, cfg_name=None, log_level=logging.INFO): return get_logger(name, cfg, cfg_name, log_level) # TODO: Depre # the same as "get_root_logger" def create_logger(cfg=None, cfg_name=None, dist_print=0, log_level=logging.INFO): return get_logger(None, cfg, cfg_name, log_level) @functools.lru_cache() # so that calling setup_logger multiple times won't add many handlers def setup_logger(name, final_log_file, color=True): # LOG_DIR = cfg.log_dir # LOG_FOUT = open(final_log_file, 'w') # head = '%(asctime)-15s %(message)s' logging.basicConfig(filename=str(final_log_file).replace('\\', '/'), format='%(message)s', level=logging.INFO) # logger = logging.getLogger() # logger.setLevel(logging.INFO) # console = logging.StreamHandler() # logging.getLogger('').addHandler(console) logger = logging.getLogger(name) # if name in logger_initialized: # return logger for handler in logger.root.handlers: if type(handler) is logging.StreamHandler: handler.setLevel(logging.ERROR) # stream_handler = logging.StreamHandler() console = colorlog.StreamHandler() handlers = [console] # logger.setLevel(logging.INFO) # formatter = colorlog.ColoredFormatter( # '%(log_color)s[%(asctime)s] [%(filename)s:%(lineno)d] [%(module)s:%(funcName)s] [%(levelname)s]- %(message)s', # log_colors=log_colors_config) # 日志输出格式 if dist.is_available() and dist.is_initialized(): rank = dist.get_rank() else: rank = 0 if rank == 0: # console = colorlog.StreamHandler() # console.setLevel(logging.DEBUG) handlers.append(console) # if color: # formatter = _ColorfulFormatter( # colored("%(message)s", "green") # ) # else: formatter = colorlog.ColoredFormatter( '%(log_color)s- %(message)s', log_colors=log_colors_config) # 日志输出格式 # console.setFormatter(formatter) # logger.addHandler(console) for handler in handlers: handler.setFormatter(formatter) handler.setLevel(logging.INFO) # log_level logger.addHandler(handler) # if rank == 0: # logger.setLevel(logging.INFO) # log_level # else: # logger.setLevel(logging.ERROR) logger_initialized[name] = True return logger def get_logger(name=None, cfg=None, cfg_name=None, phase='train', log_level=logging.INFO, file_mode='w'): # log_file=None, """Initialize and get a logger by name. If the logger has not been initialized, this method will initialize the logger by adding one or two handlers, otherwise the initialized logger will be directly returned. During initialization, a StreamHandler will always be added. If `log_file` is specified and the process rank is 0, a FileHandler will also be added. Args: name (str): Logger name. log_file (str | None): The log filename. If specified, a FileHandler will be added to the logger. log_level (int): The logger level. Note that only the process of rank 0 is affected, and other processes will set the level to "Error" thus be silent most of the time. file_mode (str): The file mode used in opening log file. Defaults to 'w'. Returns: logging.Logger: The expected logger. """ if name in logger_initialized: if cfg is None: # cfg.use_log return logging.getLogger(name) else: return None # handle hierarchical names # e.g., logger "a" is initialized, then logger "a.b" will skip the # initialization since it is a child of "a". for logger_name in logger_initialized: if name.startswith(logger_name): if cfg.use_log: return logging.getLogger(name) else: return None logger = None tensorboard_log_dir = None root_output_dir = Path(cfg.out_dir) # set up logger in root_path if not root_output_dir.exists(): # if not dist_print: #rank 0-N, 0 is False print('=> creating {}'.format(root_output_dir)) root_output_dir.mkdir(parents=True, exist_ok=True) dataset = cfg.dataset assert isinstance(dataset, dict), print(f"{dataset}'s type is {type(dataset)}, not a dict. ") dataset = dataset.get('train') if dataset.get('train', None) is not None else dataset.get('val') model = cfg.arch cfg_name = os.path.basename(cfg_name).split('.')[0] time_str = time.strftime('%Y-%m-%d-%H-%M-%S') # store all output except tb_log file final_output_dir = root_output_dir / dataset / model / cfg_name if cfg.eval: model_save_tmp = os.path.dirname(cfg.resume_from).split('/')[-1] else: model_save_tmp = "model_{}".format(time_str) model_save_dir = final_output_dir / model_save_tmp # if not dist_print: print_log('=> creating {}'.format(final_output_dir)) final_output_dir.mkdir(parents=True, exist_ok=True) model_save_dir.mkdir(parents=True, exist_ok=True) if cfg.use_log: cfg_name = '{}_{}'.format(cfg_name, time_str) # a logger to save results log_file = '{}_{}.log'.format(cfg_name, phase) if cfg.eval: final_log_file = model_save_dir / log_file else: final_log_file = final_output_dir / log_file # tensorboard_log tensorboard_log_dir = root_output_dir / Path(cfg.log_dir) / dataset / model / cfg_name # if not dist_print: print_log('=> creating tfb logs {}'.format(tensorboard_log_dir)) tensorboard_log_dir.mkdir(parents=True, exist_ok=True) logger = setup_logger(name, final_log_file) return logger, str(final_output_dir), str(model_save_dir), str( tensorboard_log_dir) # logger, def print_log(msg, logger=None, level=logging.INFO): """Print a log message. Args: msg (str): The message to be logged. logger (logging.Logger | str | None): The logger to be used. Some special loggers are: - "silent": no message will be printed. - other str: the logger obtained with `get_root_logger(logger)`. - None: The `print()` method will be used to print log messages. level (int): Logging level. Only available when `logger` is a Logger object or "root". """ if logger is None: print(msg) elif isinstance(logger, logging.Logger): logger.log(level, msg) elif logger == 'silent': pass elif isinstance(logger, str): _logger = get_logger(logger) _logger.log(level, msg) else: raise TypeError( 'logger should be either a logging.Logger object, str, ' f'"silent" or None, but got {type(logger)}') def load_json_log(json_log): """load and convert json_logs to log_dicts. Args: json_log (str): The path of the json log file. Returns: dict[int, dict[str, list]]: Key is the epoch, value is a sub dict. The keys in each sub dict are different metrics, e.g. memory, bbox_mAP, and the value is a list of corresponding values in all iterations in this epoch. .. code-block:: python # An example output { 1: {'iter': [100, 200, 300], 'loss': [6.94, 6.73, 6.53]}, 2: {'iter': [100, 200, 300], 'loss': [6.33, 6.20, 6.07]}, ... } """ log_dict = dict() with open(json_log, 'r') as log_file: for line in log_file: log = json.loads(line.strip()) # skip lines without `epoch` field if 'epoch' not in log: continue epoch = log.pop('epoch') if epoch not in log_dict: log_dict[epoch] = defaultdict(list) for k, v in log.items(): log_dict[epoch][k].append(v) return log_dict ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/misc.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import collections.abc import functools import itertools import subprocess import warnings from collections import abc from importlib import import_module from inspect import getfullargspec from itertools import repeat # From PyTorch internals def _ntuple(n): def parse(x): if isinstance(x, collections.abc.Iterable): return x return tuple(repeat(x, n)) return parse to_1tuple = _ntuple(1) to_2tuple = _ntuple(2) to_3tuple = _ntuple(3) to_4tuple = _ntuple(4) to_ntuple = _ntuple def is_str(x): """Whether the input is an string instance. Note: This method is deprecated since python 2 is no longer supported. """ return isinstance(x, str) def import_modules_from_strings(imports, allow_failed_imports=False): """Import modules from the given list of strings. Args: imports (list | str | None): The given module names to be imported. allow_failed_imports (bool): If True, the failed imports will return None. Otherwise, an ImportError is raise. Default: False. Returns: list[module] | module | None: The imported modules. Examples: >>> osp, sys = import_modules_from_strings( ... ['os.path', 'sys']) >>> import os.path as osp_ >>> import sys as sys_ >>> assert osp == osp_ >>> assert sys == sys_ """ if not imports: return single_import = False if isinstance(imports, str): single_import = True imports = [imports] if not isinstance(imports, list): raise TypeError( f'custom_imports must be a list but got type {type(imports)}') imported = [] for imp in imports: if not isinstance(imp, str): raise TypeError( f'{imp} is of type {type(imp)} and cannot be imported.') try: imported_tmp = import_module(imp) except ImportError: if allow_failed_imports: warnings.warn(f'{imp} failed to import and is ignored.', UserWarning) imported_tmp = None else: raise ImportError imported.append(imported_tmp) if single_import: imported = imported[0] return imported def iter_cast(inputs, dst_type, return_type=None): """Cast elements of an iterable object into some type. Args: inputs (Iterable): The input object. dst_type (type): Destination type. return_type (type, optional): If specified, the output object will be converted to this type, otherwise an iterator. Returns: iterator or specified type: The converted object. """ if not isinstance(inputs, abc.Iterable): raise TypeError('inputs must be an iterable object') if not isinstance(dst_type, type): raise TypeError('"dst_type" must be a valid type') out_iterable = map(dst_type, inputs) if return_type is None: return out_iterable else: return return_type(out_iterable) def list_cast(inputs, dst_type): """Cast elements of an iterable object into a list of some type. A partial method of :func:`iter_cast`. """ return iter_cast(inputs, dst_type, return_type=list) def tuple_cast(inputs, dst_type): """Cast elements of an iterable object into a tuple of some type. A partial method of :func:`iter_cast`. """ return iter_cast(inputs, dst_type, return_type=tuple) def is_seq_of(seq, expected_type, seq_type=None): """Check whether it is a sequence of some type. Args: seq (Sequence): The sequence to be checked. expected_type (type): Expected type of sequence items. seq_type (type, optional): Expected sequence type. Returns: bool: Whether the sequence is valid. """ if seq_type is None: exp_seq_type = abc.Sequence else: assert isinstance(seq_type, type) exp_seq_type = seq_type if not isinstance(seq, exp_seq_type): return False for item in seq: if not isinstance(item, expected_type): return False return True def is_list_of(seq, expected_type): """Check whether it is a list of some type. A partial method of :func:`is_seq_of`. """ return is_seq_of(seq, expected_type, seq_type=list) def is_tuple_of(seq, expected_type): """Check whether it is a tuple of some type. A partial method of :func:`is_seq_of`. """ return is_seq_of(seq, expected_type, seq_type=tuple) def slice_list(in_list, lens): """Slice a list into several sub lists by a list of given length. Args: in_list (list): The list to be sliced. lens(int or list): The expected length of each out list. Returns: list: A list of sliced list. """ if isinstance(lens, int): assert len(in_list) % lens == 0 lens = [lens] * int(len(in_list) / lens) if not isinstance(lens, list): raise TypeError('"indices" must be an integer or a list of integers') elif sum(lens) != len(in_list): raise ValueError('sum of lens and list length does not ' f'match: {sum(lens)} != {len(in_list)}') out_list = [] idx = 0 for i in range(len(lens)): out_list.append(in_list[idx:idx + lens[i]]) idx += lens[i] return out_list def concat_list(in_list): """Concatenate a list of list into a single list. Args: in_list (list): The list of list to be merged. Returns: list: The concatenated flat list. """ return list(itertools.chain(*in_list)) def check_prerequisites( prerequisites, checker, msg_tmpl='Prerequisites "{}" are required in method "{}" but not ' 'found, please install them first.'): # yapf: disable """A decorator factory to check if prerequisites are satisfied. Args: prerequisites (str of list[str]): Prerequisites to be checked. checker (callable): The checker method that returns True if a prerequisite is meet, False otherwise. msg_tmpl (str): The message template with two variables. Returns: decorator: A specific decorator. """ def wrap(func): @functools.wraps(func) def wrapped_func(*args, **kwargs): requirements = [prerequisites] if isinstance( prerequisites, str) else prerequisites missing = [] for item in requirements: if not checker(item): missing.append(item) if missing: print(msg_tmpl.format(', '.join(missing), func.__name__)) raise RuntimeError('Prerequisites not meet.') else: return func(*args, **kwargs) return wrapped_func return wrap def _check_py_package(package): try: import_module(package) except ImportError: return False else: return True def _check_executable(cmd): if subprocess.call(f'which {cmd}', shell=True) != 0: return False else: return True def requires_package(prerequisites): """A decorator to check if some python packages are installed. Example: >>> @requires_package('numpy') >>> func(arg1, args): >>> return numpy.zeros(1) array([0.]) >>> @requires_package(['numpy', 'non_package']) >>> func(arg1, args): >>> return numpy.zeros(1) ImportError """ return check_prerequisites(prerequisites, checker=_check_py_package) def requires_executable(prerequisites): """A decorator to check if some executable files are installed. Example: >>> @requires_executable('ffmpeg') >>> func(arg1, args): >>> print(1) 1 """ return check_prerequisites(prerequisites, checker=_check_executable) def deprecated_api_warning(name_dict, cls_name=None): """A decorator to check if some arguments are deprecate and try to replace deprecate src_arg_name to dst_arg_name. Args: name_dict(dict): key (str): Deprecate argument names. val (str): Expected argument names. Returns: func: New function. """ def api_warning_wrapper(old_func): @functools.wraps(old_func) def new_func(*args, **kwargs): # get the arg spec of the decorated method args_info = getfullargspec(old_func) # get name of the function func_name = old_func.__name__ if cls_name is not None: func_name = f'{cls_name}.{func_name}' if args: arg_names = args_info.args[:len(args)] for src_arg_name, dst_arg_name in name_dict.items(): if src_arg_name in arg_names: warnings.warn( f'"{src_arg_name}" is deprecated in ' f'`{func_name}`, please use "{dst_arg_name}" ' 'instead', DeprecationWarning) arg_names[arg_names.index(src_arg_name)] = dst_arg_name if kwargs: for src_arg_name, dst_arg_name in name_dict.items(): if src_arg_name in kwargs: assert dst_arg_name not in kwargs, ( f'The expected behavior is to replace ' f'the deprecated key `{src_arg_name}` to ' f'new key `{dst_arg_name}`, but got them ' f'in the arguments at the same time, which ' f'is confusing. `{src_arg_name} will be ' f'deprecated in the future, please ' f'use `{dst_arg_name}` instead.') warnings.warn( f'"{src_arg_name}" is deprecated in ' f'`{func_name}`, please use "{dst_arg_name}" ' 'instead', DeprecationWarning) kwargs[dst_arg_name] = kwargs.pop(src_arg_name) # apply converted arguments to the decorated method output = old_func(*args, **kwargs) return output return new_func return api_warning_wrapper def is_method_overridden(method, base_class, derived_class): """Check if a method of base class is overridden in derived class. Args: method (str): the method name to check. base_class (type): the class of the base class. derived_class (type | Any): the class or instance of the derived class. """ assert isinstance(base_class, type), \ "base_class doesn't accept instance, Please pass class instead." if not isinstance(derived_class, type): derived_class = derived_class.__class__ base_method = getattr(base_class, method) derived_method = getattr(derived_class, method) return derived_method != base_method def has_method(obj: object, method: str) -> bool: """Check whether the object has a method. Args: method (str): The method name to check. obj (object): The object to check. Returns: bool: True if the object has the method else False. """ return hasattr(obj, method) and callable(getattr(obj, method)) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/parrots_jit.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import os from .parrots_wrapper import TORCH_VERSION parrots_jit_option = os.getenv('PARROTS_JIT_OPTION') if TORCH_VERSION == 'parrots' and parrots_jit_option == 'ON': from parrots.jit import pat as jit else: def jit(func=None, check_input=None, full_shape=True, derivate=False, coderize=False, optimize=False): def wrapper(func): def wrapper_inner(*args, **kargs): return func(*args, **kargs) return wrapper_inner if func is None: return wrapper else: return func if TORCH_VERSION == 'parrots': from parrots.utils.tester import skip_no_elena else: def skip_no_elena(func): def wrapper(*args, **kargs): return func(*args, **kargs) return wrapper ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/parrots_wrapper.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from functools import partial import torch TORCH_VERSION = torch.__version__ def is_rocm_pytorch() -> bool: is_rocm = False if TORCH_VERSION != 'parrots': try: from torch.utils.cpp_extension import ROCM_HOME is_rocm = True if ((torch.version.hip is not None) and (ROCM_HOME is not None)) else False except ImportError: pass return is_rocm def _get_cuda_home(): if TORCH_VERSION == 'parrots': from parrots.utils.build_extension import CUDA_HOME else: if is_rocm_pytorch(): from torch.utils.cpp_extension import ROCM_HOME CUDA_HOME = ROCM_HOME else: from torch.utils.cpp_extension import CUDA_HOME return CUDA_HOME def get_build_config(): if TORCH_VERSION == 'parrots': from parrots.config import get_build_info return get_build_info() else: return torch.__config__.show() def _get_conv(): if TORCH_VERSION == 'parrots': from parrots.nn.modules.conv import _ConvNd, _ConvTransposeMixin else: from torch.nn.modules.conv import _ConvNd, _ConvTransposeMixin return _ConvNd, _ConvTransposeMixin def _get_dataloader(): if TORCH_VERSION == 'parrots': from torch.utils.data import DataLoader, PoolDataLoader else: from torch.utils.data import DataLoader PoolDataLoader = DataLoader return DataLoader, PoolDataLoader def _get_extension(): if TORCH_VERSION == 'parrots': from parrots.utils.build_extension import BuildExtension, Extension CppExtension = partial(Extension, cuda=False) CUDAExtension = partial(Extension, cuda=True) else: from torch.utils.cpp_extension import (BuildExtension, CppExtension, CUDAExtension) return BuildExtension, CppExtension, CUDAExtension def _get_pool(): if TORCH_VERSION == 'parrots': from parrots.nn.modules.pool import (_AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd) else: from torch.nn.modules.pooling import (_AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd) return _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd def _get_norm(): if TORCH_VERSION == 'parrots': from parrots.nn.modules.batchnorm import _BatchNorm, _InstanceNorm SyncBatchNorm_ = torch.nn.SyncBatchNorm2d else: from torch.nn.modules.instancenorm import _InstanceNorm from torch.nn.modules.batchnorm import _BatchNorm SyncBatchNorm_ = torch.nn.SyncBatchNorm return _BatchNorm, _InstanceNorm, SyncBatchNorm_ _ConvNd, _ConvTransposeMixin = _get_conv() DataLoader, PoolDataLoader = _get_dataloader() BuildExtension, CppExtension, CUDAExtension = _get_extension() _BatchNorm, _InstanceNorm, SyncBatchNorm_ = _get_norm() _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd = _get_pool() class SyncBatchNorm(SyncBatchNorm_): def _check_input_dim(self, input): if TORCH_VERSION == 'parrots': if input.dim() < 2: raise ValueError( f'expected at least 2D input (got {input.dim()}D input)') else: super()._check_input_dim(input) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/path.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import os import os.path as osp from pathlib import Path from .misc import is_str def is_filepath(x): return is_str(x) or isinstance(x, Path) def fopen(filepath, *args, **kwargs): if is_str(filepath): return open(filepath, *args, **kwargs) elif isinstance(filepath, Path): return filepath.open(*args, **kwargs) raise ValueError('`filepath` should be a string or a Path') def check_file_exist(filename, msg_tmpl='file "{}" does not exist'): if not osp.isfile(filename): raise FileNotFoundError(msg_tmpl.format(filename)) def mkdir_or_exist(dir_name, mode=0o777): if dir_name == '': return dir_name = osp.expanduser(dir_name) os.makedirs(dir_name, mode=mode, exist_ok=True) def symlink(src, dst, overwrite=True, **kwargs): if os.path.lexists(dst) and overwrite: os.remove(dst) os.symlink(src, dst, **kwargs) def scandir(dir_path, suffix=None, recursive=False, case_sensitive=True): """Scan a directory to find the interested files. Args: dir_path (str | :obj:`Path`): Path of the directory. suffix (str | tuple(str), optional): File suffix that we are interested in. Default: None. recursive (bool, optional): If set to True, recursively scan the directory. Default: False. case_sensitive (bool, optional) : If set to False, ignore the case of suffix. Default: True. Returns: A generator for all the interested files with relative paths. """ if isinstance(dir_path, (str, Path)): dir_path = str(dir_path) else: raise TypeError('"dir_path" must be a string or Path object') if (suffix is not None) and not isinstance(suffix, (str, tuple)): raise TypeError('"suffix" must be a string or tuple of strings') if suffix is not None and not case_sensitive: suffix = suffix.lower() if isinstance(suffix, str) else tuple( item.lower() for item in suffix) root = dir_path def _scandir(dir_path, suffix, recursive, case_sensitive): for entry in os.scandir(dir_path): if not entry.name.startswith('.') and entry.is_file(): rel_path = osp.relpath(entry.path, root) _rel_path = rel_path if case_sensitive else rel_path.lower() if suffix is None or _rel_path.endswith(suffix): yield rel_path elif recursive and os.path.isdir(entry.path): # scan recursively if entry.path is a directory yield from _scandir(entry.path, suffix, recursive, case_sensitive) return _scandir(dir_path, suffix, recursive, case_sensitive) def find_vcs_root(path, markers=('.git', )): """Finds the root directory (including itself) of specified markers. Args: path (str): Path of directory or file. markers (list[str], optional): List of file or directory names. Returns: The directory contained one of the markers or None if not found. """ if osp.isfile(path): path = osp.dirname(path) prev, cur = None, osp.abspath(osp.expanduser(path)) while cur != prev: if any(osp.exists(osp.join(cur, marker)) for marker in markers): return cur prev, cur = cur, osp.split(cur)[0] return None ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/progressbar.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import sys from collections.abc import Iterable from multiprocessing import Pool from shutil import get_terminal_size from .timer import Timer class ProgressBar: """A progress bar which can print the progress.""" def __init__(self, task_num=0, bar_width=50, start=True, file=sys.stdout): self.task_num = task_num self.bar_width = bar_width self.completed = 0 self.file = file if start: self.start() @property def terminal_width(self): width, _ = get_terminal_size() return width def start(self): if self.task_num > 0: self.file.write(f'[{" " * self.bar_width}] 0/{self.task_num}, ' 'elapsed: 0s, ETA:') else: self.file.write('completed: 0, elapsed: 0s') self.file.flush() self.timer = Timer() def update(self, num_tasks=1): assert num_tasks > 0 self.completed += num_tasks elapsed = self.timer.since_start() if elapsed > 0: fps = self.completed / elapsed else: fps = float('inf') if self.task_num > 0: percentage = self.completed / float(self.task_num) eta = int(elapsed * (1 - percentage) / percentage + 0.5) msg = f'\r[{{}}] {self.completed}/{self.task_num}, ' \ f'{fps:.1f} task/s, elapsed: {int(elapsed + 0.5)}s, ' \ f'ETA: {eta:5}s' bar_width = min(self.bar_width, int(self.terminal_width - len(msg)) + 2, int(self.terminal_width * 0.6)) bar_width = max(2, bar_width) mark_width = int(bar_width * percentage) bar_chars = '>' * mark_width + ' ' * (bar_width - mark_width) self.file.write(msg.format(bar_chars)) else: self.file.write( f'completed: {self.completed}, elapsed: {int(elapsed + 0.5)}s,' f' {fps:.1f} tasks/s') self.file.flush() def track_progress(func, tasks, bar_width=50, file=sys.stdout, **kwargs): """Track the progress of tasks execution with a progress bar. Tasks are done with a simple for-loop. Args: func (callable): The function to be applied to each task. tasks (list or tuple[Iterable, int]): A list of tasks or (tasks, total num). bar_width (int): Width of progress bar. Returns: list: The task results. """ if isinstance(tasks, tuple): assert len(tasks) == 2 assert isinstance(tasks[0], Iterable) assert isinstance(tasks[1], int) task_num = tasks[1] tasks = tasks[0] elif isinstance(tasks, Iterable): task_num = len(tasks) else: raise TypeError( '"tasks" must be an iterable object or a (iterator, int) tuple') prog_bar = ProgressBar(task_num, bar_width, file=file) results = [] for task in tasks: results.append(func(task, **kwargs)) prog_bar.update() prog_bar.file.write('\n') return results def init_pool(process_num, initializer=None, initargs=None): if initializer is None: return Pool(process_num) elif initargs is None: return Pool(process_num, initializer) else: if not isinstance(initargs, tuple): raise TypeError('"initargs" must be a tuple') return Pool(process_num, initializer, initargs) def track_parallel_progress(func, tasks, nproc, initializer=None, initargs=None, bar_width=50, chunksize=1, skip_first=False, keep_order=True, file=sys.stdout): """Track the progress of parallel task execution with a progress bar. The built-in :mod:`multiprocessing` module is used for process pools and tasks are done with :func:`Pool.map` or :func:`Pool.imap_unordered`. Args: func (callable): The function to be applied to each task. tasks (list or tuple[Iterable, int]): A list of tasks or (tasks, total num). nproc (int): Process (worker) number. initializer (None or callable): Refer to :class:`multiprocessing.Pool` for details. initargs (None or tuple): Refer to :class:`multiprocessing.Pool` for details. chunksize (int): Refer to :class:`multiprocessing.Pool` for details. bar_width (int): Width of progress bar. skip_first (bool): Whether to skip the first sample for each worker when estimating fps, since the initialization step may takes longer. keep_order (bool): If True, :func:`Pool.imap` is used, otherwise :func:`Pool.imap_unordered` is used. Returns: list: The task results. """ if isinstance(tasks, tuple): assert len(tasks) == 2 assert isinstance(tasks[0], Iterable) assert isinstance(tasks[1], int) task_num = tasks[1] tasks = tasks[0] elif isinstance(tasks, Iterable): task_num = len(tasks) else: raise TypeError( '"tasks" must be an iterable object or a (iterator, int) tuple') pool = init_pool(nproc, initializer, initargs) start = not skip_first task_num -= nproc * chunksize * int(skip_first) prog_bar = ProgressBar(task_num, bar_width, start, file=file) results = [] if keep_order: gen = pool.imap(func, tasks, chunksize) else: gen = pool.imap_unordered(func, tasks, chunksize) for result in gen: results.append(result) if skip_first: if len(results) < nproc * chunksize: continue elif len(results) == nproc * chunksize: prog_bar.start() continue prog_bar.update() prog_bar.file.write('\n') pool.close() pool.join() return results def track_iter_progress(tasks, bar_width=50, file=sys.stdout): """Track the progress of tasks iteration or enumeration with a progress bar. Tasks are yielded with a simple for-loop. Args: tasks (list or tuple[Iterable, int]): A list of tasks or (tasks, total num). bar_width (int): Width of progress bar. Yields: list: The task results. """ if isinstance(tasks, tuple): assert len(tasks) == 2 assert isinstance(tasks[0], Iterable) assert isinstance(tasks[1], int) task_num = tasks[1] tasks = tasks[0] elif isinstance(tasks, Iterable): task_num = len(tasks) else: raise TypeError( '"tasks" must be an iterable object or a (iterator, int) tuple') prog_bar = ProgressBar(task_num, bar_width, file=file) for task in tasks: yield task prog_bar.update() prog_bar.file.write('\n') ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/registry.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import inspect import warnings from functools import partial from .misc import is_seq_of def build_from_cfg(cfg, registry, default_args=None): """Build a module from config dict. Args: cfg (dict): Config dict. It should at least contain the key "type". registry (:obj:`Registry`): The registry to search the type from. default_args (dict, optional): Default initialization arguments. Returns: object: The constructed object. """ if not isinstance(cfg, dict): raise TypeError(f'cfg must be a dict, but got {type(cfg)}') if 'type' not in cfg: if default_args is None or 'type' not in default_args: raise KeyError( '`cfg` or `default_args` must contain the key "type", ' f'but got {cfg}\n{default_args}') if not isinstance(registry, Registry): raise TypeError('registry must be an mmcv.Registry object, ' f'but got {type(registry)}') if not (isinstance(default_args, dict) or default_args is None): raise TypeError('default_args must be a dict or None, ' f'but got {type(default_args)}') args = cfg.copy() if default_args is not None: for name, value in default_args.items(): args.setdefault(name, value) obj_type = args.pop('type') if isinstance(obj_type, str): obj_cls = registry.get(obj_type) if obj_cls is None: raise KeyError( f'{obj_type} is not in the {registry.name} registry') elif inspect.isclass(obj_type): obj_cls = obj_type else: raise TypeError( f'type must be a str or valid type, but got {type(obj_type)}') try: return obj_cls(**args) except Exception as e: # Normal TypeError does not print class name. raise type(e)(f'{obj_cls.__name__}: {e}') class Registry: """A registry to map strings to classes. Registered object could be built from registry. Example: >>> MODELS = Registry('models') >>> @MODELS.register_module() >>> class ResNet: >>> pass >>> resnet = MODELS.build(dict(type='ResNet')) Please refer to https://mmcv.readthedocs.io/en/latest/understand_mmcv/registry.html for advanced usage. Args: name (str): Registry name. build_func(func, optional): Build function to construct instance from Registry, func:`build_from_cfg` is used if neither ``parent`` or ``build_func`` is specified. If ``parent`` is specified and ``build_func`` is not given, ``build_func`` will be inherited from ``parent``. Default: None. parent (Registry, optional): Parent registry. The class registered in children registry could be built from parent. Default: None. scope (str, optional): The scope of registry. It is the key to search for children registry. If not specified, scope will be the name of the package where class is defined, e.g. mmdet, mmcls, mmseg. Default: None. """ def __init__(self, name, build_func=None, parent=None, scope=None): self._name = name self._module_dict = dict() self._children = dict() self._scope = self.infer_scope() if scope is None else scope # self.build_func will be set with the following priority: # 1. build_func # 2. parent.build_func # 3. build_from_cfg if build_func is None: if parent is not None: self.build_func = parent.build_func else: self.build_func = build_from_cfg else: self.build_func = build_func if parent is not None: assert isinstance(parent, Registry) parent._add_children(self) self.parent = parent else: self.parent = None def __len__(self): return len(self._module_dict) def __contains__(self, key): return self.get(key) is not None def __repr__(self): format_str = self.__class__.__name__ + \ f'(name={self._name}, ' \ f'items={self._module_dict})' return format_str @staticmethod def infer_scope(): """Infer the scope of registry. The name of the package where registry is defined will be returned. Example: >>> # in mmdet/models/backbone/resnet.py >>> MODELS = Registry('models') >>> @MODELS.register_module() >>> class ResNet: >>> pass The scope of ``ResNet`` will be ``mmdet``. Returns: str: The inferred scope name. """ # inspect.stack() trace where this function is called, the index-2 # indicates the frame where `infer_scope()` is called filename = inspect.getmodule(inspect.stack()[2][0]).__name__ split_filename = filename.split('.') return split_filename[0] @staticmethod def split_scope_key(key): """Split scope and key. The first scope will be split from key. Examples: >>> Registry.split_scope_key('mmdet.ResNet') 'mmdet', 'ResNet' >>> Registry.split_scope_key('ResNet') None, 'ResNet' Return: tuple[str | None, str]: The former element is the first scope of the key, which can be ``None``. The latter is the remaining key. """ split_index = key.find('.') if split_index != -1: return key[:split_index], key[split_index + 1:] else: return None, key @property def name(self): return self._name @property def scope(self): return self._scope @property def module_dict(self): return self._module_dict @property def children(self): return self._children def get(self, key): """Get the registry record. Args: key (str): The class name in string format. Returns: class: The corresponding class. """ scope, real_key = self.split_scope_key(key) if scope is None or scope == self._scope: # get from self if real_key in self._module_dict: return self._module_dict[real_key] else: # get from self._children if scope in self._children: return self._children[scope].get(real_key) else: # goto root parent = self.parent while parent.parent is not None: parent = parent.parent return parent.get(key) def build(self, *args, **kwargs): return self.build_func(*args, **kwargs, registry=self) def _add_children(self, registry): """Add children for a registry. The ``registry`` will be added as children based on its scope. The parent registry could build objects from children registry. Example: >>> models = Registry('models') >>> mmdet_models = Registry('models', parent=models) >>> @mmdet_models.register_module() >>> class ResNet: >>> pass >>> resnet = models.build(dict(type='mmdet.ResNet')) """ assert isinstance(registry, Registry) assert registry.scope is not None assert registry.scope not in self.children, \ f'scope {registry.scope} exists in {self.name} registry' self.children[registry.scope] = registry def _register_module(self, module_class, module_name=None, force=False): if not inspect.isclass(module_class): raise TypeError('module must be a class, ' f'but got {type(module_class)}') if not force and module_name in self._module_dict.keys(): return self._module_dict[module_name] if module_name is None: module_name = module_class.__name__ if isinstance(module_name, str): module_name = [module_name] for name in module_name: if not force and name in self._module_dict: # print(isinstance(module_name, list) and len(module_name) == 1) raise KeyError(f'{name} is already registered ' f'in {self.name}') # print(f'{name} is already registered in {self.name}') self._module_dict[name] = module_class def deprecated_register_module(self, cls=None, force=False): warnings.warn( 'The old API of register_module(module, force=False) ' 'is deprecated and will be removed, please use the new API ' 'register_module(name=None, force=False, module=None) instead.', DeprecationWarning) if cls is None: return partial(self.deprecated_register_module, force=force) self._register_module(cls, force=force) return cls def register_module(self, name=None, force=False, module=None): """Register a module. A record will be added to `self._module_dict`, whose key is the class name or the specified name, and value is the class itself. It can be used as a decorator or a normal function. Example: >>> backbones = Registry('backbone') >>> @backbones.register_module() >>> class ResNet: >>> pass >>> backbones = Registry('backbone') >>> @backbones.register_module(name='mnet') >>> class MobileNet: >>> pass >>> backbones = Registry('backbone') >>> class ResNet: >>> pass >>> backbones.register_module(ResNet) Args: name (str | None): The module name to be registered. If not specified, the class name will be used. force (bool, optional): Whether to override an existing class with the same name. Default: False. module (type): Module class to be registered. """ if not isinstance(force, bool): raise TypeError(f'force must be a boolean, but got {type(force)}') # NOTE: This is a walkaround to be compatible with the old api, # while it may introduce unexpected bugs. if isinstance(name, type): return self.deprecated_register_module(name, force=force) # raise the error ahead of time if not (name is None or isinstance(name, str) or is_seq_of(name, str)): raise TypeError( 'name must be either of None, an instance of str or a sequence' f' of str, but got {type(name)}') # use it as a normal method: x.register_module(module=SomeClass) if module is not None: self._register_module( module_class=module, module_name=name, force=force) return module # use it as a decorator: @x.register_module() def _register(cls): self._register_module( module_class=cls, module_name=name, force=force) return cls return _register ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/testing.py ================================================ # Copyright (c) Open-MMLab. import sys from collections.abc import Iterable from runpy import run_path from shlex import split from typing import Any, Dict, List from unittest.mock import patch def check_python_script(cmd): """Run the python cmd script with `__main__`. The difference between `os.system` is that, this function exectues code in the current process, so that it can be tracked by coverage tools. Currently it supports two forms: - ./tests/data/scripts/hello.py zz - python tests/data/scripts/hello.py zz """ args = split(cmd) if args[0] == 'python': args = args[1:] with patch.object(sys, 'argv', args): run_path(args[0], run_name='__main__') def _any(judge_result): """Since built-in ``any`` works only when the element of iterable is not iterable, implement the function.""" if not isinstance(judge_result, Iterable): return judge_result try: for element in judge_result: if _any(element): return True except TypeError: # Maybe encounter the case: torch.tensor(True) | torch.tensor(False) if judge_result: return True return False def assert_dict_contains_subset(dict_obj: Dict[Any, Any], expected_subset: Dict[Any, Any]) -> bool: """Check if the dict_obj contains the expected_subset. Args: dict_obj (Dict[Any, Any]): Dict object to be checked. expected_subset (Dict[Any, Any]): Subset expected to be contained in dict_obj. Returns: bool: Whether the dict_obj contains the expected_subset. """ for key, value in expected_subset.items(): if key not in dict_obj.keys() or _any(dict_obj[key] != value): return False return True def assert_attrs_equal(obj: Any, expected_attrs: Dict[str, Any]) -> bool: """Check if attribute of class object is correct. Args: obj (object): Class object to be checked. expected_attrs (Dict[str, Any]): Dict of the expected attrs. Returns: bool: Whether the attribute of class object is correct. """ for attr, value in expected_attrs.items(): if not hasattr(obj, attr) or _any(getattr(obj, attr) != value): return False return True def assert_dict_has_keys(obj: Dict[str, Any], expected_keys: List[str]) -> bool: """Check if the obj has all the expected_keys. Args: obj (Dict[str, Any]): Object to be checked. expected_keys (List[str]): Keys expected to contained in the keys of the obj. Returns: bool: Whether the obj has the expected keys. """ return set(expected_keys).issubset(set(obj.keys())) def assert_keys_equal(result_keys: List[str], target_keys: List[str]) -> bool: """Check if target_keys is equal to result_keys. Args: result_keys (List[str]): Result keys to be checked. target_keys (List[str]): Target keys to be checked. Returns: bool: Whether target_keys is equal to result_keys. """ return set(result_keys) == set(target_keys) def assert_is_norm_layer(module) -> bool: """Check if the module is a norm layer. Args: module (nn.Module): The module to be checked. Returns: bool: Whether the module is a norm layer. """ from .parrots_wrapper import _BatchNorm, _InstanceNorm from torch.nn import GroupNorm, LayerNorm norm_layer_candidates = (_BatchNorm, _InstanceNorm, GroupNorm, LayerNorm) return isinstance(module, norm_layer_candidates) def assert_params_all_zeros(module) -> bool: """Check if the parameters of the module is all zeros. Args: module (nn.Module): The module to be checked. Returns: bool: Whether the parameters of the module is all zeros. """ weight_data = module.weight.data is_weight_zero = weight_data.allclose( weight_data.new_zeros(weight_data.size())) if hasattr(module, 'bias') and module.bias is not None: bias_data = module.bias.data is_bias_zero = bias_data.allclose( bias_data.new_zeros(bias_data.size())) else: is_bias_zero = True return is_weight_zero and is_bias_zero ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/timer.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from time import time class TimerError(Exception): def __init__(self, message): self.message = message super(TimerError, self).__init__(message) class Timer: """A flexible Timer class. Examples: >>> import time >>> import mmcv >>> with mmcv.Timer(): >>> # simulate a code block that will run for 1s >>> time.sleep(1) 1.000 >>> with mmcv.Timer(print_tmpl='it takes {:.1f} seconds'): >>> # simulate a code block that will run for 1s >>> time.sleep(1) it takes 1.0 seconds >>> timer = mmcv.Timer() >>> time.sleep(0.5) >>> print(timer.since_start()) 0.500 >>> time.sleep(0.5) >>> print(timer.since_last_check()) 0.500 >>> print(timer.since_start()) 1.000 """ def __init__(self, start=True, print_tmpl=None): self._is_running = False self.print_tmpl = print_tmpl if print_tmpl else '{:.3f}' if start: self.start() @property def is_running(self): """bool: indicate whether the timer is running""" return self._is_running def __enter__(self): self.start() return self def __exit__(self, type, value, traceback): print(self.print_tmpl.format(self.since_last_check())) self._is_running = False def start(self): """Start the timer.""" if not self._is_running: self._t_start = time() self._is_running = True self._t_last = time() def since_start(self): """Total time since the timer is started. Returns: float: Time in seconds. """ if not self._is_running: raise TimerError('timer is not running') self._t_last = time() return self._t_last - self._t_start def since_last_check(self): """Time since the last checking. Either :func:`since_start` or :func:`since_last_check` is a checking operation. Returns: float: Time in seconds. """ if not self._is_running: raise TimerError('timer is not running') dur = time() - self._t_last self._t_last = time() return dur _g_timers = {} # global timers def check_time(timer_id): """Add check points in a single line. This method is suitable for running a task on a list of items. A timer will be registered when the method is called for the first time. Examples: >>> import time >>> import mmcv >>> for i in range(1, 6): >>> # simulate a code block >>> time.sleep(i) >>> mmcv.check_time('task1') 2.000 3.000 4.000 5.000 Args: str: Timer identifier. """ if timer_id not in _g_timers: _g_timers[timer_id] = Timer() return 0 else: return _g_timers[timer_id].since_last_check() ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/trace.py ================================================ import warnings import torch from mmcv.utils import digit_version def is_jit_tracing() -> bool: if (torch.__version__ != 'parrots' and digit_version(torch.__version__) >= digit_version('1.6.0')): on_trace = torch.jit.is_tracing() # In PyTorch 1.6, torch.jit.is_tracing has a bug. # Refers to https://github.com/pytorch/pytorch/issues/42448 if isinstance(on_trace, bool): return on_trace else: return torch._C._is_tracing() else: warnings.warn( 'torch.jit.is_tracing is only supported after v1.6.0. ' 'Therefore is_tracing returns False automatically. Please ' 'set on_trace manually if you are using trace.', UserWarning) return False ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/version_utils.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import os import subprocess import warnings from packaging.version import parse def digit_version(version_str: str, length: int = 4): """Convert a version string into a tuple of integers. This method is usually used for comparing two versions. For pre-release versions: alpha < beta < rc. Args: version_str (str): The version string. length (int): The maximum number of version levels. Default: 4. Returns: tuple[int]: The version info in digits (integers). """ assert 'parrots' not in version_str version = parse(version_str) assert version.release, f'failed to parse version {version_str}' release = list(version.release) release = release[:length] if len(release) < length: release = release + [0] * (length - len(release)) if version.is_prerelease: mapping = {'a': -3, 'b': -2, 'rc': -1} val = -4 # version.pre can be None if version.pre: if version.pre[0] not in mapping: warnings.warn(f'unknown prerelease version {version.pre[0]}, ' 'version checking may go wrong') else: val = mapping[version.pre[0]] release.extend([val, version.pre[-1]]) else: release.extend([val, 0]) elif version.is_postrelease: release.extend([1, version.post]) else: release.extend([0, 0]) return tuple(release) def _minimal_ext_cmd(cmd): # construct minimal environment env = {} for k in ['SYSTEMROOT', 'PATH', 'HOME']: v = os.environ.get(k) if v is not None: env[k] = v # LANGUAGE is used on win32 env['LANGUAGE'] = 'C' env['LANG'] = 'C' env['LC_ALL'] = 'C' out = subprocess.Popen( cmd, stdout=subprocess.PIPE, env=env).communicate()[0] return out def get_git_hash(fallback='unknown', digits=None): """Get the git hash of the current repo. Args: fallback (str, optional): The fallback string when git hash is unavailable. Defaults to 'unknown'. digits (int, optional): kept digits of the hash. Defaults to None, meaning all digits are kept. Returns: str: Git commit hash. """ if digits is not None and not isinstance(digits, int): raise TypeError('digits must be None or an integer') try: out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) sha = out.strip().decode('ascii') if digits is not None: sha = sha[:digits] except OSError: sha = fallback return sha ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/version.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. __version__ = '1.4.4' def parse_version_info(version_str: str, length: int = 4) -> tuple: """Parse a version string into a tuple. Args: version_str (str): The version string. length (int): The maximum number of version levels. Default: 4. Returns: tuple[int | str]: The version info, e.g., "1.3.0" is parsed into (1, 3, 0, 0, 0, 0), and "2.0.0rc1" is parsed into (2, 0, 0, 0, 'rc', 1) (when length is set to 4). """ from packaging.version import parse version = parse(version_str) assert version.release, f'failed to parse version {version_str}' release = list(version.release) release = release[:length] if len(release) < length: release = release + [0] * (length - len(release)) if version.is_prerelease: release.extend(list(version.pre)) elif version.is_postrelease: release.extend(list(version.post)) else: release.extend([0, 0]) return tuple(release) version_info = tuple(int(x) for x in __version__.split('.')[:3]) __all__ = ['__version__', 'version_info', 'parse_version_info'] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/video/__init__.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from .io import Cache, VideoReader, frames2video from .optflow import (dequantize_flow, flow_from_bytes, flow_warp, flowread, flowwrite, quantize_flow, sparse_flow_from_bytes) from .processing import concat_video, convert_video, cut_video, resize_video __all__ = [ 'Cache', 'VideoReader', 'frames2video', 'convert_video', 'resize_video', 'cut_video', 'concat_video', 'flowread', 'flowwrite', 'quantize_flow', 'dequantize_flow', 'flow_warp', 'flow_from_bytes', 'sparse_flow_from_bytes' ] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/video/io.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import os.path as osp from collections import OrderedDict import cv2 from cv2 import (CAP_PROP_FOURCC, CAP_PROP_FPS, CAP_PROP_FRAME_COUNT, CAP_PROP_FRAME_HEIGHT, CAP_PROP_FRAME_WIDTH, CAP_PROP_POS_FRAMES, VideoWriter_fourcc) from mmcv.utils import (check_file_exist, mkdir_or_exist, scandir, track_progress) class Cache: def __init__(self, capacity): self._cache = OrderedDict() self._capacity = int(capacity) if capacity <= 0: raise ValueError('capacity must be a positive integer') @property def capacity(self): return self._capacity @property def size(self): return len(self._cache) def put(self, key, val): if key in self._cache: return if len(self._cache) >= self.capacity: self._cache.popitem(last=False) self._cache[key] = val def get(self, key, default=None): val = self._cache[key] if key in self._cache else default return val class VideoReader: """Video class with similar usage to a list object. This video warpper class provides convenient apis to access frames. There exists an issue of OpenCV's VideoCapture class that jumping to a certain frame may be inaccurate. It is fixed in this class by checking the position after jumping each time. Cache is used when decoding videos. So if the same frame is visited for the second time, there is no need to decode again if it is stored in the cache. Examples: >>> import mmcv >>> v = mmcv.VideoReader('sample.mp4') >>> len(v) # get the total frame number with `len()` 120 >>> for img in v: # v is iterable >>> mmcv.imshow(img) >>> v[5] # get the 6th frame """ def __init__(self, filename, cache_capacity=10): # Check whether the video path is a url if not filename.startswith(('https://', 'http://')): check_file_exist(filename, 'Video file not found: ' + filename) self._vcap = cv2.VideoCapture(filename) assert cache_capacity > 0 self._cache = Cache(cache_capacity) self._position = 0 # get basic info self._width = int(self._vcap.get(CAP_PROP_FRAME_WIDTH)) self._height = int(self._vcap.get(CAP_PROP_FRAME_HEIGHT)) self._fps = self._vcap.get(CAP_PROP_FPS) self._frame_cnt = int(self._vcap.get(CAP_PROP_FRAME_COUNT)) self._fourcc = self._vcap.get(CAP_PROP_FOURCC) @property def vcap(self): """:obj:`cv2.VideoCapture`: The raw VideoCapture object.""" return self._vcap @property def opened(self): """bool: Indicate whether the video is opened.""" return self._vcap.isOpened() @property def width(self): """int: Width of video frames.""" return self._width @property def height(self): """int: Height of video frames.""" return self._height @property def resolution(self): """tuple: Video resolution (width, height).""" return (self._width, self._height) @property def fps(self): """float: FPS of the video.""" return self._fps @property def frame_cnt(self): """int: Total frames of the video.""" return self._frame_cnt @property def fourcc(self): """str: "Four character code" of the video.""" return self._fourcc @property def position(self): """int: Current cursor position, indicating frame decoded.""" return self._position def _get_real_position(self): return int(round(self._vcap.get(CAP_PROP_POS_FRAMES))) def _set_real_position(self, frame_id): self._vcap.set(CAP_PROP_POS_FRAMES, frame_id) pos = self._get_real_position() for _ in range(frame_id - pos): self._vcap.read() self._position = frame_id def read(self): """Read the next frame. If the next frame have been decoded before and in the cache, then return it directly, otherwise decode, cache and return it. Returns: ndarray or None: Return the frame if successful, otherwise None. """ # pos = self._position if self._cache: img = self._cache.get(self._position) if img is not None: ret = True else: if self._position != self._get_real_position(): self._set_real_position(self._position) ret, img = self._vcap.read() if ret: self._cache.put(self._position, img) else: ret, img = self._vcap.read() if ret: self._position += 1 return img def get_frame(self, frame_id): """Get frame by index. Args: frame_id (int): Index of the expected frame, 0-based. Returns: ndarray or None: Return the frame if successful, otherwise None. """ if frame_id < 0 or frame_id >= self._frame_cnt: raise IndexError( f'"frame_id" must be between 0 and {self._frame_cnt - 1}') if frame_id == self._position: return self.read() if self._cache: img = self._cache.get(frame_id) if img is not None: self._position = frame_id + 1 return img self._set_real_position(frame_id) ret, img = self._vcap.read() if ret: if self._cache: self._cache.put(self._position, img) self._position += 1 return img def current_frame(self): """Get the current frame (frame that is just visited). Returns: ndarray or None: If the video is fresh, return None, otherwise return the frame. """ if self._position == 0: return None return self._cache.get(self._position - 1) def cvt2frames(self, frame_dir, file_start=0, filename_tmpl='{:06d}.jpg', start=0, max_num=0, show_progress=True): """Convert a video to frame images. Args: frame_dir (str): Output directory to store all the frame images. file_start (int): Filenames will start from the specified number. filename_tmpl (str): Filename template with the index as the placeholder. start (int): The starting frame index. max_num (int): Maximum number of frames to be written. show_progress (bool): Whether to show a progress bar. """ mkdir_or_exist(frame_dir) if max_num == 0: task_num = self.frame_cnt - start else: task_num = min(self.frame_cnt - start, max_num) if task_num <= 0: raise ValueError('start must be less than total frame number') if start > 0: self._set_real_position(start) def write_frame(file_idx): img = self.read() if img is None: return filename = osp.join(frame_dir, filename_tmpl.format(file_idx)) cv2.imwrite(filename, img) if show_progress: track_progress(write_frame, range(file_start, file_start + task_num)) else: for i in range(task_num): write_frame(file_start + i) def __len__(self): return self.frame_cnt def __getitem__(self, index): if isinstance(index, slice): return [ self.get_frame(i) for i in range(*index.indices(self.frame_cnt)) ] # support negative indexing if index < 0: index += self.frame_cnt if index < 0: raise IndexError('index out of range') return self.get_frame(index) def __iter__(self): self._set_real_position(0) return self def __next__(self): img = self.read() if img is not None: return img else: raise StopIteration next = __next__ def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): self._vcap.release() def frames2video(frame_dir, video_file, fps=30, fourcc='XVID', filename_tmpl='{:06d}.jpg', start=0, end=0, show_progress=True): """Read the frame images from a directory and join them as a video. Args: frame_dir (str): The directory containing video frames. video_file (str): Output filename. fps (float): FPS of the output video. fourcc (str): Fourcc of the output video, this should be compatible with the output file type. filename_tmpl (str): Filename template with the index as the variable. start (int): Starting frame index. end (int): Ending frame index. show_progress (bool): Whether to show a progress bar. """ if end == 0: ext = filename_tmpl.split('.')[-1] end = len([name for name in scandir(frame_dir, ext)]) first_file = osp.join(frame_dir, filename_tmpl.format(start)) check_file_exist(first_file, 'The start frame not found: ' + first_file) img = cv2.imread(first_file) height, width = img.shape[:2] resolution = (width, height) vwriter = cv2.VideoWriter(video_file, VideoWriter_fourcc(*fourcc), fps, resolution) def write_frame(file_idx): filename = osp.join(frame_dir, filename_tmpl.format(file_idx)) img = cv2.imread(filename) vwriter.write(img) if show_progress: track_progress(write_frame, range(start, end)) else: for i in range(start, end): write_frame(i) vwriter.release() ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/video/optflow.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import warnings import cv2 import numpy as np from mmcv.arraymisc import dequantize, quantize from mmcv.image import imread, imwrite from mmcv.utils import is_str def flowread(flow_or_path, quantize=False, concat_axis=0, *args, **kwargs): """Read an optical flow map. Args: flow_or_path (ndarray or str): A flow map or filepath. quantize (bool): whether to read quantized pair, if set to True, remaining args will be passed to :func:`dequantize_flow`. concat_axis (int): The axis that dx and dy are concatenated, can be either 0 or 1. Ignored if quantize is False. Returns: ndarray: Optical flow represented as a (h, w, 2) numpy array """ if isinstance(flow_or_path, np.ndarray): if (flow_or_path.ndim != 3) or (flow_or_path.shape[-1] != 2): raise ValueError(f'Invalid flow with shape {flow_or_path.shape}') return flow_or_path elif not is_str(flow_or_path): raise TypeError(f'"flow_or_path" must be a filename or numpy array, ' f'not {type(flow_or_path)}') if not quantize: with open(flow_or_path, 'rb') as f: try: header = f.read(4).decode('utf-8') except Exception: raise IOError(f'Invalid flow file: {flow_or_path}') else: if header != 'PIEH': raise IOError(f'Invalid flow file: {flow_or_path}, ' 'header does not contain PIEH') w = np.fromfile(f, np.int32, 1).squeeze() h = np.fromfile(f, np.int32, 1).squeeze() flow = np.fromfile(f, np.float32, w * h * 2).reshape((h, w, 2)) else: assert concat_axis in [0, 1] cat_flow = imread(flow_or_path, flag='unchanged') if cat_flow.ndim != 2: raise IOError( f'{flow_or_path} is not a valid quantized flow file, ' f'its dimension is {cat_flow.ndim}.') assert cat_flow.shape[concat_axis] % 2 == 0 dx, dy = np.split(cat_flow, 2, axis=concat_axis) flow = dequantize_flow(dx, dy, *args, **kwargs) return flow.astype(np.float32) def flowwrite(flow, filename, quantize=False, concat_axis=0, *args, **kwargs): """Write optical flow to file. If the flow is not quantized, it will be saved as a .flo file losslessly, otherwise a jpeg image which is lossy but of much smaller size. (dx and dy will be concatenated horizontally into a single image if quantize is True.) Args: flow (ndarray): (h, w, 2) array of optical flow. filename (str): Output filepath. quantize (bool): Whether to quantize the flow and save it to 2 jpeg images. If set to True, remaining args will be passed to :func:`quantize_flow`. concat_axis (int): The axis that dx and dy are concatenated, can be either 0 or 1. Ignored if quantize is False. """ if not quantize: with open(filename, 'wb') as f: f.write('PIEH'.encode('utf-8')) np.array([flow.shape[1], flow.shape[0]], dtype=np.int32).tofile(f) flow = flow.astype(np.float32) flow.tofile(f) f.flush() else: assert concat_axis in [0, 1] dx, dy = quantize_flow(flow, *args, **kwargs) dxdy = np.concatenate((dx, dy), axis=concat_axis) imwrite(dxdy, filename) def quantize_flow(flow, max_val=0.02, norm=True): """Quantize flow to [0, 255]. After this step, the size of flow will be much smaller, and can be dumped as jpeg images. Args: flow (ndarray): (h, w, 2) array of optical flow. max_val (float): Maximum value of flow, values beyond [-max_val, max_val] will be truncated. norm (bool): Whether to divide flow values by image width/height. Returns: tuple[ndarray]: Quantized dx and dy. """ h, w, _ = flow.shape dx = flow[..., 0] dy = flow[..., 1] if norm: dx = dx / w # avoid inplace operations dy = dy / h # use 255 levels instead of 256 to make sure 0 is 0 after dequantization. flow_comps = [ quantize(d, -max_val, max_val, 255, np.uint8) for d in [dx, dy] ] return tuple(flow_comps) def dequantize_flow(dx, dy, max_val=0.02, denorm=True): """Recover from quantized flow. Args: dx (ndarray): Quantized dx. dy (ndarray): Quantized dy. max_val (float): Maximum value used when quantizing. denorm (bool): Whether to multiply flow values with width/height. Returns: ndarray: Dequantized flow. """ assert dx.shape == dy.shape assert dx.ndim == 2 or (dx.ndim == 3 and dx.shape[-1] == 1) dx, dy = [dequantize(d, -max_val, max_val, 255) for d in [dx, dy]] if denorm: dx *= dx.shape[1] dy *= dx.shape[0] flow = np.dstack((dx, dy)) return flow def flow_warp(img, flow, filling_value=0, interpolate_mode='nearest'): """Use flow to warp img. Args: img (ndarray, float or uint8): Image to be warped. flow (ndarray, float): Optical Flow. filling_value (int): The missing pixels will be set with filling_value. interpolate_mode (str): bilinear -> Bilinear Interpolation; nearest -> Nearest Neighbor. Returns: ndarray: Warped image with the same shape of img """ warnings.warn('This function is just for prototyping and cannot ' 'guarantee the computational efficiency.') assert flow.ndim == 3, 'Flow must be in 3D arrays.' height = flow.shape[0] width = flow.shape[1] channels = img.shape[2] output = np.ones( (height, width, channels), dtype=img.dtype) * filling_value grid = np.indices((height, width)).swapaxes(0, 1).swapaxes(1, 2) dx = grid[:, :, 0] + flow[:, :, 1] dy = grid[:, :, 1] + flow[:, :, 0] sx = np.floor(dx).astype(int) sy = np.floor(dy).astype(int) valid = (sx >= 0) & (sx < height - 1) & (sy >= 0) & (sy < width - 1) if interpolate_mode == 'nearest': output[valid, :] = img[dx[valid].round().astype(int), dy[valid].round().astype(int), :] elif interpolate_mode == 'bilinear': # dirty walkround for integer positions eps_ = 1e-6 dx, dy = dx + eps_, dy + eps_ left_top_ = img[np.floor(dx[valid]).astype(int), np.floor(dy[valid]).astype(int), :] * ( np.ceil(dx[valid]) - dx[valid])[:, None] * ( np.ceil(dy[valid]) - dy[valid])[:, None] left_down_ = img[np.ceil(dx[valid]).astype(int), np.floor(dy[valid]).astype(int), :] * ( dx[valid] - np.floor(dx[valid]))[:, None] * ( np.ceil(dy[valid]) - dy[valid])[:, None] right_top_ = img[np.floor(dx[valid]).astype(int), np.ceil(dy[valid]).astype(int), :] * ( np.ceil(dx[valid]) - dx[valid])[:, None] * ( dy[valid] - np.floor(dy[valid]))[:, None] right_down_ = img[np.ceil(dx[valid]).astype(int), np.ceil(dy[valid]).astype(int), :] * ( dx[valid] - np.floor(dx[valid]))[:, None] * ( dy[valid] - np.floor(dy[valid]))[:, None] output[valid, :] = left_top_ + left_down_ + right_top_ + right_down_ else: raise NotImplementedError( 'We only support interpolation modes of nearest and bilinear, ' f'but got {interpolate_mode}.') return output.astype(img.dtype) def flow_from_bytes(content): """Read dense optical flow from bytes. .. note:: This load optical flow function works for FlyingChairs, FlyingThings3D, Sintel, FlyingChairsOcc datasets, but cannot load the data from ChairsSDHom. Args: content (bytes): Optical flow bytes got from files or other streams. Returns: ndarray: Loaded optical flow with the shape (H, W, 2). """ # header in first 4 bytes header = content[:4] if header.decode('utf-8') != 'PIEH': raise Exception('Flow file header does not contain PIEH') # width in second 4 bytes width = np.frombuffer(content[4:], np.int32, 1).squeeze() # height in third 4 bytes height = np.frombuffer(content[8:], np.int32, 1).squeeze() # after first 12 bytes, all bytes are flow flow = np.frombuffer(content[12:], np.float32, width * height * 2).reshape( (height, width, 2)) return flow def sparse_flow_from_bytes(content): """Read the optical flow in KITTI datasets from bytes. This function is modified from RAFT load the `KITTI datasets `_. Args: content (bytes): Optical flow bytes got from files or other streams. Returns: Tuple(ndarray, ndarray): Loaded optical flow with the shape (H, W, 2) and flow valid mask with the shape (H, W). """ # nopa content = np.frombuffer(content, np.uint8) flow = cv2.imdecode(content, cv2.IMREAD_ANYDEPTH | cv2.IMREAD_COLOR) flow = flow[:, :, ::-1].astype(np.float32) # flow shape (H, W, 2) valid shape (H, W) flow, valid = flow[:, :, :2], flow[:, :, 2] flow = (flow - 2**15) / 64.0 return flow, valid ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/video/processing.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import os import os.path as osp import subprocess import tempfile from mmcv.utils import requires_executable @requires_executable('ffmpeg') def convert_video(in_file, out_file, print_cmd=False, pre_options='', **kwargs): """Convert a video with ffmpeg. This provides a general api to ffmpeg, the executed command is:: `ffmpeg -y -i ` Options(kwargs) are mapped to ffmpeg commands with the following rules: - key=val: "-key val" - key=True: "-key" - key=False: "" Args: in_file (str): Input video filename. out_file (str): Output video filename. pre_options (str): Options appears before "-i ". print_cmd (bool): Whether to print the final ffmpeg command. """ options = [] for k, v in kwargs.items(): if isinstance(v, bool): if v: options.append(f'-{k}') elif k == 'log_level': assert v in [ 'quiet', 'panic', 'fatal', 'error', 'warning', 'info', 'verbose', 'debug', 'trace' ] options.append(f'-loglevel {v}') else: options.append(f'-{k} {v}') cmd = f'ffmpeg -y {pre_options} -i {in_file} {" ".join(options)} ' \ f'{out_file}' if print_cmd: print(cmd) subprocess.call(cmd, shell=True) @requires_executable('ffmpeg') def resize_video(in_file, out_file, size=None, ratio=None, keep_ar=False, log_level='info', print_cmd=False): """Resize a video. Args: in_file (str): Input video filename. out_file (str): Output video filename. size (tuple): Expected size (w, h), eg, (320, 240) or (320, -1). ratio (tuple or float): Expected resize ratio, (2, 0.5) means (w*2, h*0.5). keep_ar (bool): Whether to keep original aspect ratio. log_level (str): Logging level of ffmpeg. print_cmd (bool): Whether to print the final ffmpeg command. """ if size is None and ratio is None: raise ValueError('expected size or ratio must be specified') if size is not None and ratio is not None: raise ValueError('size and ratio cannot be specified at the same time') options = {'log_level': log_level} if size: if not keep_ar: options['vf'] = f'scale={size[0]}:{size[1]}' else: options['vf'] = f'scale=w={size[0]}:h={size[1]}:' \ 'force_original_aspect_ratio=decrease' else: if not isinstance(ratio, tuple): ratio = (ratio, ratio) options['vf'] = f'scale="trunc(iw*{ratio[0]}):trunc(ih*{ratio[1]})"' convert_video(in_file, out_file, print_cmd, **options) @requires_executable('ffmpeg') def cut_video(in_file, out_file, start=None, end=None, vcodec=None, acodec=None, log_level='info', print_cmd=False): """Cut a clip from a video. Args: in_file (str): Input video filename. out_file (str): Output video filename. start (None or float): Start time (in seconds). end (None or float): End time (in seconds). vcodec (None or str): Output video codec, None for unchanged. acodec (None or str): Output audio codec, None for unchanged. log_level (str): Logging level of ffmpeg. print_cmd (bool): Whether to print the final ffmpeg command. """ options = {'log_level': log_level} if vcodec is None: options['vcodec'] = 'copy' if acodec is None: options['acodec'] = 'copy' if start: options['ss'] = start else: start = 0 if end: options['t'] = end - start convert_video(in_file, out_file, print_cmd, **options) @requires_executable('ffmpeg') def concat_video(video_list, out_file, vcodec=None, acodec=None, log_level='info', print_cmd=False): """Concatenate multiple videos into a single one. Args: video_list (list): A list of video filenames out_file (str): Output video filename vcodec (None or str): Output video codec, None for unchanged acodec (None or str): Output audio codec, None for unchanged log_level (str): Logging level of ffmpeg. print_cmd (bool): Whether to print the final ffmpeg command. """ tmp_filehandler, tmp_filename = tempfile.mkstemp(suffix='.txt', text=True) with open(tmp_filename, 'w') as f: for filename in video_list: f.write(f'file {osp.abspath(filename)}\n') options = {'log_level': log_level} if vcodec is None: options['vcodec'] = 'copy' if acodec is None: options['acodec'] = 'copy' convert_video( tmp_filename, out_file, print_cmd, pre_options='-f concat -safe 0', **options) os.close(tmp_filehandler) os.remove(tmp_filename) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/visualization/__init__.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from .color import Color, color_val from .image import imshow, imshow_bboxes, imshow_det_bboxes from .optflow import flow2rgb, flowshow, make_color_wheel __all__ = [ 'Color', 'color_val', 'imshow', 'imshow_bboxes', 'imshow_det_bboxes', 'flowshow', 'flow2rgb', 'make_color_wheel' ] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/visualization/color.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from enum import Enum import numpy as np from mmcv.utils import is_str class Color(Enum): """An enum that defines common colors. Contains red, green, blue, cyan, yellow, magenta, white and black. """ red = (0, 0, 255) green = (0, 255, 0) blue = (255, 0, 0) cyan = (255, 255, 0) yellow = (0, 255, 255) magenta = (255, 0, 255) white = (255, 255, 255) black = (0, 0, 0) def color_val(color): """Convert various input to color tuples. Args: color (:obj:`Color`/str/tuple/int/ndarray): Color inputs Returns: tuple[int]: A tuple of 3 integers indicating BGR channels. """ if is_str(color): return Color[color].value elif isinstance(color, Color): return color.value elif isinstance(color, tuple): assert len(color) == 3 for channel in color: assert 0 <= channel <= 255 return color elif isinstance(color, int): assert 0 <= color <= 255 return color, color, color elif isinstance(color, np.ndarray): assert color.ndim == 1 and color.size == 3 assert np.all((color >= 0) & (color <= 255)) color = color.astype(np.uint8) return tuple(color) else: raise TypeError(f'Invalid type for color: {type(color)}') ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/visualization/image.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import cv2 import numpy as np from mmcv.image import imread, imwrite from .color import color_val def imshow(img, win_name='', wait_time=0): """Show an image. Args: img (str or ndarray): The image to be displayed. win_name (str): The window name. wait_time (int): Value of waitKey param. """ cv2.imshow(win_name, imread(img)) if wait_time == 0: # prevent from hanging if windows was closed while True: ret = cv2.waitKey(1) closed = cv2.getWindowProperty(win_name, cv2.WND_PROP_VISIBLE) < 1 # if user closed window or if some key pressed if closed or ret != -1: break else: ret = cv2.waitKey(wait_time) def imshow_bboxes(img, bboxes, colors='green', top_k=-1, thickness=1, show=True, win_name='', wait_time=0, out_file=None): """Draw bboxes on an image. Args: img (str or ndarray): The image to be displayed. bboxes (list or ndarray): A list of ndarray of shape (k, 4). colors (list[str or tuple or Color]): A list of colors. top_k (int): Plot the first k bboxes only if set positive. thickness (int): Thickness of lines. show (bool): Whether to show the image. win_name (str): The window name. wait_time (int): Value of waitKey param. out_file (str, optional): The filename to write the image. Returns: ndarray: The image with bboxes drawn on it. """ img = imread(img) img = np.ascontiguousarray(img) if isinstance(bboxes, np.ndarray): bboxes = [bboxes] if not isinstance(colors, list): colors = [colors for _ in range(len(bboxes))] colors = [color_val(c) for c in colors] assert len(bboxes) == len(colors) for i, _bboxes in enumerate(bboxes): _bboxes = _bboxes.astype(np.int32) if top_k <= 0: _top_k = _bboxes.shape[0] else: _top_k = min(top_k, _bboxes.shape[0]) for j in range(_top_k): left_top = (_bboxes[j, 0], _bboxes[j, 1]) right_bottom = (_bboxes[j, 2], _bboxes[j, 3]) cv2.rectangle( img, left_top, right_bottom, colors[i], thickness=thickness) if show: imshow(img, win_name, wait_time) if out_file is not None: imwrite(img, out_file) return img def imshow_det_bboxes(img, bboxes, labels, class_names=None, score_thr=0, bbox_color='green', text_color='green', thickness=1, font_scale=0.5, show=True, win_name='', wait_time=0, out_file=None): """Draw bboxes and class labels (with scores) on an image. Args: img (str or ndarray): The image to be displayed. bboxes (ndarray): Bounding boxes (with scores), shaped (n, 4) or (n, 5). labels (ndarray): Labels of bboxes. class_names (list[str]): Names of each classes. score_thr (float): Minimum score of bboxes to be shown. bbox_color (str or tuple or :obj:`Color`): Color of bbox lines. text_color (str or tuple or :obj:`Color`): Color of texts. thickness (int): Thickness of lines. font_scale (float): Font scales of texts. show (bool): Whether to show the image. win_name (str): The window name. wait_time (int): Value of waitKey param. out_file (str or None): The filename to write the image. Returns: ndarray: The image with bboxes drawn on it. """ assert bboxes.ndim == 2 assert labels.ndim == 1 assert bboxes.shape[0] == labels.shape[0] assert bboxes.shape[1] == 4 or bboxes.shape[1] == 5 img = imread(img) img = np.ascontiguousarray(img) if score_thr > 0: assert bboxes.shape[1] == 5 scores = bboxes[:, -1] inds = scores > score_thr bboxes = bboxes[inds, :] labels = labels[inds] bbox_color = color_val(bbox_color) text_color = color_val(text_color) for bbox, label in zip(bboxes, labels): bbox_int = bbox.astype(np.int32) left_top = (bbox_int[0], bbox_int[1]) right_bottom = (bbox_int[2], bbox_int[3]) cv2.rectangle( img, left_top, right_bottom, bbox_color, thickness=thickness) label_text = class_names[ label] if class_names is not None else f'cls {label}' if len(bbox) > 4: label_text += f'|{bbox[-1]:.02f}' cv2.putText(img, label_text, (bbox_int[0], bbox_int[1] - 2), cv2.FONT_HERSHEY_COMPLEX, font_scale, text_color) if show: imshow(img, win_name, wait_time) if out_file is not None: imwrite(img, out_file) return img ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/visualization/optflow.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from __future__ import division import numpy as np from mmcv.image import rgb2bgr from mmcv.video import flowread from .image import imshow def flowshow(flow, win_name='', wait_time=0): """Show optical flow. Args: flow (ndarray or str): The optical flow to be displayed. win_name (str): The window name. wait_time (int): Value of waitKey param. """ flow = flowread(flow) flow_img = flow2rgb(flow) imshow(rgb2bgr(flow_img), win_name, wait_time) def flow2rgb(flow, color_wheel=None, unknown_thr=1e6): """Convert flow map to RGB image. Args: flow (ndarray): Array of optical flow. color_wheel (ndarray or None): Color wheel used to map flow field to RGB colorspace. Default color wheel will be used if not specified. unknown_thr (str): Values above this threshold will be marked as unknown and thus ignored. Returns: ndarray: RGB image that can be visualized. """ assert flow.ndim == 3 and flow.shape[-1] == 2 if color_wheel is None: color_wheel = make_color_wheel() assert color_wheel.ndim == 2 and color_wheel.shape[1] == 3 num_bins = color_wheel.shape[0] dx = flow[:, :, 0].copy() dy = flow[:, :, 1].copy() ignore_inds = ( np.isnan(dx) | np.isnan(dy) | (np.abs(dx) > unknown_thr) | (np.abs(dy) > unknown_thr)) dx[ignore_inds] = 0 dy[ignore_inds] = 0 rad = np.sqrt(dx**2 + dy**2) if np.any(rad > np.finfo(float).eps): max_rad = np.max(rad) dx /= max_rad dy /= max_rad rad = np.sqrt(dx**2 + dy**2) angle = np.arctan2(-dy, -dx) / np.pi bin_real = (angle + 1) / 2 * (num_bins - 1) bin_left = np.floor(bin_real).astype(int) bin_right = (bin_left + 1) % num_bins w = (bin_real - bin_left.astype(np.float32))[..., None] flow_img = (1 - w) * color_wheel[bin_left, :] + w * color_wheel[bin_right, :] small_ind = rad <= 1 flow_img[small_ind] = 1 - rad[small_ind, None] * (1 - flow_img[small_ind]) flow_img[np.logical_not(small_ind)] *= 0.75 flow_img[ignore_inds, :] = 0 return flow_img def make_color_wheel(bins=None): """Build a color wheel. Args: bins(list or tuple, optional): Specify the number of bins for each color range, corresponding to six ranges: red -> yellow, yellow -> green, green -> cyan, cyan -> blue, blue -> magenta, magenta -> red. [15, 6, 4, 11, 13, 6] is used for default (see Middlebury). Returns: ndarray: Color wheel of shape (total_bins, 3). """ if bins is None: bins = [15, 6, 4, 11, 13, 6] assert len(bins) == 6 RY, YG, GC, CB, BM, MR = tuple(bins) ry = [1, np.arange(RY) / RY, 0] yg = [1 - np.arange(YG) / YG, 1, 0] gc = [0, 1, np.arange(GC) / GC] cb = [0, 1 - np.arange(CB) / CB, 1] bm = [np.arange(BM) / BM, 0, 1] mr = [1, 0, 1 - np.arange(MR) / MR] num_bins = RY + YG + GC + CB + BM + MR color_wheel = np.zeros((3, num_bins), dtype=np.float32) col = 0 for i, color in enumerate([ry, yg, gc, cb, bm, mr]): for j in range(3): color_wheel[j, col:col + bins[i]] = color[j] col += bins[i] return color_wheel.T ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/readme.md ================================================ test ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/setup.cfg ================================================ [bdist_wheel] universal=1 [aliases] test=pytest [yapf] based_on_style = pep8 blank_line_before_nested_class_or_def = true split_before_expression_after_opening_paren = true [isort] line_length = 79 multi_line_output = 0 known_standard_library = pkg_resources,setuptools,logging,os,warnings,abc known_first_party = mmcv known_third_party = addict,cv2,numpy,onnx,onnxruntime,packaging,pytest,pytorch_sphinx_theme,scipy,sphinx,tensorrt,torch,torchvision,yaml,yapf no_lines_before = STDLIB,LOCALFOLDER default_section = THIRDPARTY [codespell] ignore-words-list = inout,hist ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/setup.py ================================================ import glob import os import platform import re from pkg_resources import DistributionNotFound, get_distribution from setuptools import find_packages, setup EXT_TYPE = '' try: import torch if torch.__version__ == 'parrots': from parrots.utils.build_extension import BuildExtension EXT_TYPE = 'parrots' else: from torch.utils.cpp_extension import BuildExtension EXT_TYPE = 'pytorch' cmd_class = {'build_ext': BuildExtension} except ModuleNotFoundError: cmd_class = {} print('Skip building ext ops due to the absence of torch.') def choose_requirement(primary, secondary): """If some version of primary requirement installed, return primary, else return secondary.""" try: name = re.split(r'[!<>=]', primary)[0] get_distribution(name) except DistributionNotFound: return secondary return str(primary) def get_version(): version_file = 'mmcv/version.py' with open(version_file, 'r', encoding='utf-8') as f: exec(compile(f.read(), version_file, 'exec')) return locals()['__version__'] def parse_requirements(fname='requirements/runtime.txt', with_version=True): """Parse the package dependencies listed in a requirements file but strips specific versioning information. Args: fname (str): path to requirements file with_version (bool, default=False): if True include version specs Returns: List[str]: list of requirements items CommandLine: python -c "import setup; print(setup.parse_requirements())" """ import sys from os.path import exists require_fpath = fname def parse_line(line): """Parse information from a line in a requirements text file.""" if line.startswith('-r '): # Allow specifying requirements in other files target = line.split(' ')[1] for info in parse_require_file(target): yield info else: info = {'line': line} if line.startswith('-e '): info['package'] = line.split('#egg=')[1] else: # Remove versioning from the package pat = '(' + '|'.join(['>=', '==', '>']) + ')' parts = re.split(pat, line, maxsplit=1) parts = [p.strip() for p in parts] info['package'] = parts[0] if len(parts) > 1: op, rest = parts[1:] if ';' in rest: # Handle platform specific dependencies # http://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-platform-specific-dependencies version, platform_deps = map(str.strip, rest.split(';')) info['platform_deps'] = platform_deps else: version = rest # NOQA info['version'] = (op, version) yield info def parse_require_file(fpath): with open(fpath, 'r') as f: for line in f.readlines(): line = line.strip() if line and not line.startswith('#'): for info in parse_line(line): yield info def gen_packages_items(): if exists(require_fpath): for info in parse_require_file(require_fpath): parts = [info['package']] if with_version and 'version' in info: parts.extend(info['version']) if not sys.version.startswith('3.4'): # apparently package_deps are broken in 3.4 platform_deps = info.get('platform_deps') if platform_deps is not None: parts.append(';' + platform_deps) item = ''.join(parts) yield item packages = list(gen_packages_items()) return packages install_requires = parse_requirements() try: # OpenCV installed via conda. import cv2 # NOQA: F401 major, minor, *rest = cv2.__version__.split('.') if int(major) < 3: raise RuntimeError( f'OpenCV >=3 is required but {cv2.__version__} is installed') except ImportError: # If first not installed install second package CHOOSE_INSTALL_REQUIRES = [('opencv-python-headless>=3', 'opencv-python>=3')] for main, secondary in CHOOSE_INSTALL_REQUIRES: install_requires.append(choose_requirement(main, secondary)) def get_extensions(): extensions = [] if os.getenv('MMCV_WITH_TRT', '0') != '0': ext_name = 'mmcv._ext_trt' from torch.utils.cpp_extension import include_paths, library_paths library_dirs = [] libraries = [] include_dirs = [] tensorrt_path = os.getenv('TENSORRT_DIR', '0') tensorrt_lib_path = glob.glob( os.path.join(tensorrt_path, 'targets', '*', 'lib'))[0] library_dirs += [tensorrt_lib_path] libraries += ['nvinfer', 'nvparsers', 'nvinfer_plugin'] libraries += ['cudart'] define_macros = [] extra_compile_args = {'cxx': []} include_path = os.path.abspath('./mmcv/ops/csrc/common/cuda') include_trt_path = os.path.abspath('./mmcv/ops/csrc/tensorrt') include_dirs.append(include_path) include_dirs.append(include_trt_path) include_dirs.append(os.path.join(tensorrt_path, 'include')) include_dirs += include_paths(cuda=True) op_files = glob.glob('./mmcv/ops/csrc/tensorrt/plugins/*') define_macros += [('MMCV_WITH_CUDA', None)] define_macros += [('MMCV_WITH_TRT', None)] cuda_args = os.getenv('MMCV_CUDA_ARGS') extra_compile_args['nvcc'] = [cuda_args] if cuda_args else [] # prevent cub/thrust conflict with other python library # More context See issues #1454 extra_compile_args['nvcc'] += ['-Xcompiler=-fno-gnu-unique'] library_dirs += library_paths(cuda=True) from setuptools import Extension ext_ops = Extension( name=ext_name, sources=op_files, include_dirs=include_dirs, define_macros=define_macros, extra_compile_args=extra_compile_args, language='c++', library_dirs=library_dirs, libraries=libraries) extensions.append(ext_ops) if os.getenv('MMCV_WITH_OPS', '0') == '0': return extensions if EXT_TYPE == 'parrots': ext_name = 'mmcv._ext' from parrots.utils.build_extension import Extension # new parrots op impl do not use MMCV_USE_PARROTS # define_macros = [('MMCV_USE_PARROTS', None)] define_macros = [] include_dirs = [] op_files = glob.glob('./mmcv/ops/csrc/pytorch/cuda/*.cu') +\ glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp') +\ glob.glob('./mmcv/ops/csrc/parrots/*.cpp') include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common')) include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/cuda')) cuda_args = os.getenv('MMCV_CUDA_ARGS') extra_compile_args = { 'nvcc': [cuda_args, '-std=c++14'] if cuda_args else ['-std=c++14'], 'cxx': ['-std=c++14'], } if torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1': define_macros += [('MMCV_WITH_CUDA', None)] extra_compile_args['nvcc'] += [ '-D__CUDA_NO_HALF_OPERATORS__', '-D__CUDA_NO_HALF_CONVERSIONS__', '-D__CUDA_NO_HALF2_OPERATORS__', ] ext_ops = Extension( name=ext_name, sources=op_files, include_dirs=include_dirs, define_macros=define_macros, extra_compile_args=extra_compile_args, cuda=True, pytorch=True) extensions.append(ext_ops) elif EXT_TYPE == 'pytorch': ext_name = 'mmcv._ext' from torch.utils.cpp_extension import CppExtension, CUDAExtension # prevent ninja from using too many resources try: import psutil num_cpu = len(psutil.Process().cpu_affinity()) cpu_use = max(4, num_cpu - 1) except (ModuleNotFoundError, AttributeError): cpu_use = 4 os.environ.setdefault('MAX_JOBS', str(cpu_use)) define_macros = [] # Before PyTorch1.8.0, when compiling CUDA code, `cxx` is a # required key passed to PyTorch. Even if there is no flag passed # to cxx, users also need to pass an empty list to PyTorch. # Since PyTorch1.8.0, it has a default value so users do not need # to pass an empty list anymore. # More details at https://github.com/pytorch/pytorch/pull/45956 extra_compile_args = {'cxx': []} # Since the PR (https://github.com/open-mmlab/mmcv/pull/1463) uses # c++14 features, the argument ['std=c++14'] must be added here. # However, in the windows environment, some standard libraries # will depend on c++17 or higher. In fact, for the windows # environment, the compiler will choose the appropriate compiler # to compile those cpp files, so there is no need to add the # argument if platform.system() != 'Windows': extra_compile_args['cxx'] = ['-std=c++14'] include_dirs = [] is_rocm_pytorch = False try: from torch.utils.cpp_extension import ROCM_HOME is_rocm_pytorch = True if ((torch.version.hip is not None) and (ROCM_HOME is not None)) else False except ImportError: pass project_dir = 'mmcv/ops/csrc/' if is_rocm_pytorch: from torch.utils.hipify import hipify_python hipify_python.hipify( project_directory=project_dir, output_directory=project_dir, includes='mmcv/ops/csrc/*', show_detailed=True, is_pytorch_extension=True, ) define_macros += [('MMCV_WITH_CUDA', None)] define_macros += [('HIP_DIFF', None)] cuda_args = os.getenv('MMCV_CUDA_ARGS') extra_compile_args['nvcc'] = [cuda_args] if cuda_args else [] op_files = glob.glob('./mmcv/ops/csrc/pytorch/hip/*') \ + glob.glob('./mmcv/ops/csrc/pytorch/cpu/hip/*') extension = CUDAExtension include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/hip')) elif torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1': define_macros += [('MMCV_WITH_CUDA', None)] cuda_args = os.getenv('MMCV_CUDA_ARGS') extra_compile_args['nvcc'] = [cuda_args] if cuda_args else [] op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \ glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp') + \ glob.glob('./mmcv/ops/csrc/pytorch/cuda/*.cu') + \ glob.glob('./mmcv/ops/csrc/pytorch/cuda/*.cpp') extension = CUDAExtension include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common')) include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/cuda')) else: print(f'Compiling {ext_name} without CUDA') op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \ glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp') extension = CppExtension include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common')) # Since the PR (https://github.com/open-mmlab/mmcv/pull/1463) uses # c++14 features, the argument ['std=c++14'] must be added here. # However, in the windows environment, some standard libraries # will depend on c++17 or higher. In fact, for the windows # environment, the compiler will choose the appropriate compiler # to compile those cpp files, so there is no need to add the # argument if 'nvcc' in extra_compile_args and platform.system() != 'Windows': extra_compile_args['nvcc'] += ['-std=c++14'] ext_ops = extension( name=ext_name, sources=op_files, include_dirs=include_dirs, define_macros=define_macros, extra_compile_args=extra_compile_args) extensions.append(ext_ops) if EXT_TYPE == 'pytorch' and os.getenv('MMCV_WITH_ORT', '0') != '0': ext_name = 'mmcv._ext_ort' from torch.utils.cpp_extension import library_paths, include_paths import onnxruntime library_dirs = [] libraries = [] include_dirs = [] ort_path = os.getenv('ONNXRUNTIME_DIR', '0') library_dirs += [os.path.join(ort_path, 'lib')] libraries.append('onnxruntime') define_macros = [] extra_compile_args = {'cxx': []} include_path = os.path.abspath('./mmcv/ops/csrc/onnxruntime') include_dirs.append(include_path) include_dirs.append(os.path.join(ort_path, 'include')) op_files = glob.glob('./mmcv/ops/csrc/onnxruntime/cpu/*') if onnxruntime.get_device() == 'GPU' or os.getenv('FORCE_CUDA', '0') == '1': define_macros += [('MMCV_WITH_CUDA', None)] cuda_args = os.getenv('MMCV_CUDA_ARGS') extra_compile_args['nvcc'] = [cuda_args] if cuda_args else [] op_files += glob.glob('./mmcv/ops/csrc/onnxruntime/gpu/*') include_dirs += include_paths(cuda=True) library_dirs += library_paths(cuda=True) else: include_dirs += include_paths(cuda=False) library_dirs += library_paths(cuda=False) from setuptools import Extension ext_ops = Extension( name=ext_name, sources=op_files, include_dirs=include_dirs, define_macros=define_macros, extra_compile_args=extra_compile_args, language='c++', library_dirs=library_dirs, libraries=libraries) extensions.append(ext_ops) return extensions setup( name='mmcv' if os.getenv('MMCV_WITH_OPS', '0') == '0' else 'mmcv-full', version=get_version(), description='OpenMMLab Computer Vision Foundation', keywords='computer vision', packages=find_packages(), include_package_data=True, classifiers=[ 'Development Status :: 4 - Beta', 'License :: OSI Approved :: Apache Software License', 'Operating System :: OS Independent', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Topic :: Utilities', ], url='https://github.com/open-mmlab/mmcv', author='MMCV Contributors', author_email='openmmlab@gmail.com', install_requires=install_requires, extras_require={ 'all': parse_requirements('requirements.txt'), 'tests': parse_requirements('requirements/test.txt'), 'build': parse_requirements('requirements/build.txt'), 'optional': parse_requirements('requirements/optional.txt'), }, ext_modules=get_extensions(), cmdclass=cmd_class, zip_safe=False) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_arraymisc.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. from __future__ import division import numpy as np import pytest import mmcv def test_quantize(): arr = np.random.randn(10, 10) levels = 20 qarr = mmcv.quantize(arr, -1, 1, levels) assert qarr.shape == arr.shape assert qarr.dtype == np.dtype('int64') for i in range(arr.shape[0]): for j in range(arr.shape[1]): ref = min(levels - 1, int(np.floor(10 * (1 + max(min(arr[i, j], 1), -1))))) assert qarr[i, j] == ref qarr = mmcv.quantize(arr, -1, 1, 20, dtype=np.uint8) assert qarr.shape == arr.shape assert qarr.dtype == np.dtype('uint8') with pytest.raises(ValueError): mmcv.quantize(arr, -1, 1, levels=0) with pytest.raises(ValueError): mmcv.quantize(arr, -1, 1, levels=10.0) with pytest.raises(ValueError): mmcv.quantize(arr, 2, 1, levels) def test_dequantize(): levels = 20 qarr = np.random.randint(levels, size=(10, 10)) arr = mmcv.dequantize(qarr, -1, 1, levels) assert arr.shape == qarr.shape assert arr.dtype == np.dtype('float64') for i in range(qarr.shape[0]): for j in range(qarr.shape[1]): assert arr[i, j] == (qarr[i, j] + 0.5) / 10 - 1 arr = mmcv.dequantize(qarr, -1, 1, levels, dtype=np.float32) assert arr.shape == qarr.shape assert arr.dtype == np.dtype('float32') with pytest.raises(ValueError): mmcv.dequantize(arr, -1, 1, levels=0) with pytest.raises(ValueError): mmcv.dequantize(arr, -1, 1, levels=10.0) with pytest.raises(ValueError): mmcv.dequantize(arr, 2, 1, levels) def test_joint(): arr = np.random.randn(100, 100) levels = 1000 qarr = mmcv.quantize(arr, -1, 1, levels) recover = mmcv.dequantize(qarr, -1, 1, levels) assert np.abs(recover[arr < -1] + 0.999).max() < 1e-6 assert np.abs(recover[arr > 1] - 0.999).max() < 1e-6 assert np.abs((recover - arr)[(arr >= -1) & (arr <= 1)]).max() <= 1e-3 arr = np.clip(np.random.randn(100) / 1000, -0.01, 0.01) levels = 99 qarr = mmcv.quantize(arr, -1, 1, levels) recover = mmcv.dequantize(qarr, -1, 1, levels) assert np.all(recover == 0) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_build_layers.py ================================================ import numpy as np import pytest import torch import torch.nn as nn from mmcv.cnn.bricks import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS, PADDING_LAYERS, PLUGIN_LAYERS, build_activation_layer, build_conv_layer, build_norm_layer, build_padding_layer, build_plugin_layer, build_upsample_layer, is_norm) from mmcv.cnn.bricks.norm import infer_abbr as infer_norm_abbr from mmcv.cnn.bricks.plugin import infer_abbr as infer_plugin_abbr from mmcv.cnn.bricks.upsample import PixelShufflePack from mmcv.utils.parrots_wrapper import _BatchNorm def test_build_conv_layer(): with pytest.raises(TypeError): # cfg must be a dict cfg = 'Conv2d' build_conv_layer(cfg) with pytest.raises(KeyError): # `type` must be in cfg cfg = dict(kernel_size=3) build_conv_layer(cfg) with pytest.raises(KeyError): # unsupported conv type cfg = dict(type='FancyConv') build_conv_layer(cfg) kwargs = dict( in_channels=4, out_channels=8, kernel_size=3, groups=2, dilation=2) cfg = None layer = build_conv_layer(cfg, **kwargs) assert isinstance(layer, nn.Conv2d) assert layer.in_channels == kwargs['in_channels'] assert layer.out_channels == kwargs['out_channels'] assert layer.kernel_size == (kwargs['kernel_size'], kwargs['kernel_size']) assert layer.groups == kwargs['groups'] assert layer.dilation == (kwargs['dilation'], kwargs['dilation']) cfg = dict(type='Conv') layer = build_conv_layer(cfg, **kwargs) assert isinstance(layer, nn.Conv2d) assert layer.in_channels == kwargs['in_channels'] assert layer.out_channels == kwargs['out_channels'] assert layer.kernel_size == (kwargs['kernel_size'], kwargs['kernel_size']) assert layer.groups == kwargs['groups'] assert layer.dilation == (kwargs['dilation'], kwargs['dilation']) cfg = dict(type='deconv') layer = build_conv_layer(cfg, **kwargs) assert isinstance(layer, nn.ConvTranspose2d) assert layer.in_channels == kwargs['in_channels'] assert layer.out_channels == kwargs['out_channels'] assert layer.kernel_size == (kwargs['kernel_size'], kwargs['kernel_size']) assert layer.groups == kwargs['groups'] assert layer.dilation == (kwargs['dilation'], kwargs['dilation']) for type_name, module in CONV_LAYERS.module_dict.items(): cfg = dict(type=type_name) layer = build_conv_layer(cfg, **kwargs) assert isinstance(layer, module) assert layer.in_channels == kwargs['in_channels'] assert layer.out_channels == kwargs['out_channels'] def test_infer_norm_abbr(): with pytest.raises(TypeError): # class_type must be a class infer_norm_abbr(0) class MyNorm: _abbr_ = 'mn' assert infer_norm_abbr(MyNorm) == 'mn' class FancyBatchNorm: pass assert infer_norm_abbr(FancyBatchNorm) == 'bn' class FancyInstanceNorm: pass assert infer_norm_abbr(FancyInstanceNorm) == 'in' class FancyLayerNorm: pass assert infer_norm_abbr(FancyLayerNorm) == 'ln' class FancyGroupNorm: pass assert infer_norm_abbr(FancyGroupNorm) == 'gn' class FancyNorm: pass assert infer_norm_abbr(FancyNorm) == 'norm_layer' def test_build_norm_layer(): with pytest.raises(TypeError): # cfg must be a dict cfg = 'BN' build_norm_layer(cfg, 3) with pytest.raises(KeyError): # `type` must be in cfg cfg = dict() build_norm_layer(cfg, 3) with pytest.raises(KeyError): # unsupported norm type cfg = dict(type='FancyNorm') build_norm_layer(cfg, 3) with pytest.raises(AssertionError): # postfix must be int or str cfg = dict(type='BN') build_norm_layer(cfg, 3, postfix=[1, 2]) with pytest.raises(AssertionError): # `num_groups` must be in cfg when using 'GN' cfg = dict(type='GN') build_norm_layer(cfg, 3) # test each type of norm layer in norm_cfg abbr_mapping = { 'BN': 'bn', 'BN1d': 'bn', 'BN2d': 'bn', 'BN3d': 'bn', 'SyncBN': 'bn', 'GN': 'gn', 'LN': 'ln', 'IN': 'in', 'IN1d': 'in', 'IN2d': 'in', 'IN3d': 'in', } for type_name, module in NORM_LAYERS.module_dict.items(): if type_name == 'MMSyncBN': # skip MMSyncBN continue for postfix in ['_test', 1]: cfg = dict(type=type_name) if type_name == 'GN': cfg['num_groups'] = 2 name, layer = build_norm_layer(cfg, 3, postfix=postfix) assert name == abbr_mapping[type_name] + str(postfix) assert isinstance(layer, module) if type_name == 'GN': assert layer.num_channels == 3 assert layer.num_groups == cfg['num_groups'] elif type_name != 'LN': assert layer.num_features == 3 def test_build_activation_layer(): with pytest.raises(TypeError): # cfg must be a dict cfg = 'ReLU' build_activation_layer(cfg) with pytest.raises(KeyError): # `type` must be in cfg cfg = dict() build_activation_layer(cfg) with pytest.raises(KeyError): # unsupported activation type cfg = dict(type='FancyReLU') build_activation_layer(cfg) # test each type of activation layer in activation_cfg for type_name, module in ACTIVATION_LAYERS.module_dict.items(): cfg['type'] = type_name layer = build_activation_layer(cfg) assert isinstance(layer, module) # sanity check for Clamp act = build_activation_layer(dict(type='Clamp')) x = torch.randn(10) * 1000 y = act(x) assert np.logical_and((y >= -1).numpy(), (y <= 1).numpy()).all() act = build_activation_layer(dict(type='Clip', min=0)) y = act(x) assert np.logical_and((y >= 0).numpy(), (y <= 1).numpy()).all() act = build_activation_layer(dict(type='Clamp', max=0)) y = act(x) assert np.logical_and((y >= -1).numpy(), (y <= 0).numpy()).all() def test_build_padding_layer(): with pytest.raises(TypeError): # cfg must be a dict cfg = 'reflect' build_padding_layer(cfg) with pytest.raises(KeyError): # `type` must be in cfg cfg = dict() build_padding_layer(cfg) with pytest.raises(KeyError): # unsupported activation type cfg = dict(type='FancyPad') build_padding_layer(cfg) for type_name, module in PADDING_LAYERS.module_dict.items(): cfg['type'] = type_name layer = build_padding_layer(cfg, 2) assert isinstance(layer, module) input_x = torch.randn(1, 2, 5, 5) cfg = dict(type='reflect') padding_layer = build_padding_layer(cfg, 2) res = padding_layer(input_x) assert res.shape == (1, 2, 9, 9) def test_upsample_layer(): with pytest.raises(TypeError): # cfg must be a dict cfg = 'bilinear' build_upsample_layer(cfg) with pytest.raises(KeyError): # `type` must be in cfg cfg = dict() build_upsample_layer(cfg) with pytest.raises(KeyError): # unsupported activation type cfg = dict(type='FancyUpsample') build_upsample_layer(cfg) for type_name in ['nearest', 'bilinear']: cfg['type'] = type_name layer = build_upsample_layer(cfg) assert isinstance(layer, nn.Upsample) assert layer.mode == type_name cfg = dict( type='deconv', in_channels=3, out_channels=3, kernel_size=3, stride=2) layer = build_upsample_layer(cfg) assert isinstance(layer, nn.ConvTranspose2d) cfg = dict(type='deconv') kwargs = dict(in_channels=3, out_channels=3, kernel_size=3, stride=2) layer = build_upsample_layer(cfg, **kwargs) assert isinstance(layer, nn.ConvTranspose2d) assert layer.in_channels == kwargs['in_channels'] assert layer.out_channels == kwargs['out_channels'] assert layer.kernel_size == (kwargs['kernel_size'], kwargs['kernel_size']) assert layer.stride == (kwargs['stride'], kwargs['stride']) layer = build_upsample_layer(cfg, 3, 3, 3, 2) assert isinstance(layer, nn.ConvTranspose2d) assert layer.in_channels == kwargs['in_channels'] assert layer.out_channels == kwargs['out_channels'] assert layer.kernel_size == (kwargs['kernel_size'], kwargs['kernel_size']) assert layer.stride == (kwargs['stride'], kwargs['stride']) cfg = dict( type='pixel_shuffle', in_channels=3, out_channels=3, scale_factor=2, upsample_kernel=3) layer = build_upsample_layer(cfg) assert isinstance(layer, PixelShufflePack) assert layer.scale_factor == 2 assert layer.upsample_kernel == 3 def test_pixel_shuffle_pack(): x_in = torch.rand(2, 3, 10, 10) pixel_shuffle = PixelShufflePack(3, 3, scale_factor=2, upsample_kernel=3) assert pixel_shuffle.upsample_conv.kernel_size == (3, 3) x_out = pixel_shuffle(x_in) assert x_out.shape == (2, 3, 20, 20) def test_is_norm(): norm_set1 = [ nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.InstanceNorm1d, nn.InstanceNorm2d, nn.InstanceNorm3d, nn.LayerNorm ] norm_set2 = [nn.GroupNorm] for norm_type in norm_set1: layer = norm_type(3) assert is_norm(layer) assert not is_norm(layer, exclude=(norm_type, )) for norm_type in norm_set2: layer = norm_type(3, 6) assert is_norm(layer) assert not is_norm(layer, exclude=(norm_type, )) class MyNorm(nn.BatchNorm2d): pass layer = MyNorm(3) assert is_norm(layer) assert not is_norm(layer, exclude=_BatchNorm) assert not is_norm(layer, exclude=(_BatchNorm, )) layer = nn.Conv2d(3, 8, 1) assert not is_norm(layer) with pytest.raises(TypeError): layer = nn.BatchNorm1d(3) is_norm(layer, exclude='BN') with pytest.raises(TypeError): layer = nn.BatchNorm1d(3) is_norm(layer, exclude=('BN', )) def test_infer_plugin_abbr(): with pytest.raises(TypeError): # class_type must be a class infer_plugin_abbr(0) class MyPlugin: _abbr_ = 'mp' assert infer_plugin_abbr(MyPlugin) == 'mp' class FancyPlugin: pass assert infer_plugin_abbr(FancyPlugin) == 'fancy_plugin' def test_build_plugin_layer(): with pytest.raises(TypeError): # cfg must be a dict cfg = 'Plugin' build_plugin_layer(cfg) with pytest.raises(KeyError): # `type` must be in cfg cfg = dict() build_plugin_layer(cfg) with pytest.raises(KeyError): # unsupported plugin type cfg = dict(type='FancyPlugin') build_plugin_layer(cfg) with pytest.raises(AssertionError): # postfix must be int or str cfg = dict(type='ConvModule') build_plugin_layer(cfg, postfix=[1, 2]) # test ContextBlock for postfix in ['', '_test', 1]: cfg = dict(type='ContextBlock') name, layer = build_plugin_layer( cfg, postfix=postfix, in_channels=16, ratio=1. / 4) assert name == 'context_block' + str(postfix) assert isinstance(layer, PLUGIN_LAYERS.module_dict['ContextBlock']) # test GeneralizedAttention for postfix in ['', '_test', 1]: cfg = dict(type='GeneralizedAttention') name, layer = build_plugin_layer(cfg, postfix=postfix, in_channels=16) assert name == 'gen_attention_block' + str(postfix) assert isinstance(layer, PLUGIN_LAYERS.module_dict['GeneralizedAttention']) # test NonLocal2d for postfix in ['', '_test', 1]: cfg = dict(type='NonLocal2d') name, layer = build_plugin_layer(cfg, postfix=postfix, in_channels=16) assert name == 'nonlocal_block' + str(postfix) assert isinstance(layer, PLUGIN_LAYERS.module_dict['NonLocal2d']) # test ConvModule for postfix in ['', '_test', 1]: cfg = dict(type='ConvModule') name, layer = build_plugin_layer( cfg, postfix=postfix, in_channels=16, out_channels=4, kernel_size=3) assert name == 'conv_block' + str(postfix) assert isinstance(layer, PLUGIN_LAYERS.module_dict['ConvModule']) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_context_block.py ================================================ import pytest import torch from mmcv.cnn.bricks import ContextBlock def test_context_block(): with pytest.raises(AssertionError): # pooling_type should be in ['att', 'avg'] ContextBlock(16, 1. / 4, pooling_type='unsupport_type') with pytest.raises(AssertionError): # fusion_types should be of type list or tuple ContextBlock(16, 1. / 4, fusion_types='unsupport_type') with pytest.raises(AssertionError): # fusion_types should be in ['channel_add', 'channel_mul'] ContextBlock(16, 1. / 4, fusion_types=('unsupport_type', )) # test pooling_type='att' imgs = torch.randn(2, 16, 20, 20) context_block = ContextBlock(16, 1. / 4, pooling_type='att') out = context_block(imgs) assert context_block.conv_mask.in_channels == 16 assert context_block.conv_mask.out_channels == 1 assert out.shape == imgs.shape # test pooling_type='avg' imgs = torch.randn(2, 16, 20, 20) context_block = ContextBlock(16, 1. / 4, pooling_type='avg') out = context_block(imgs) assert hasattr(context_block, 'avg_pool') assert out.shape == imgs.shape # test fusion_types=('channel_add',) imgs = torch.randn(2, 16, 20, 20) context_block = ContextBlock(16, 1. / 4, fusion_types=('channel_add', )) out = context_block(imgs) assert context_block.channel_add_conv is not None assert context_block.channel_mul_conv is None assert out.shape == imgs.shape # test fusion_types=('channel_mul',) imgs = torch.randn(2, 16, 20, 20) context_block = ContextBlock(16, 1. / 4, fusion_types=('channel_mul', )) out = context_block(imgs) assert context_block.channel_add_conv is None assert context_block.channel_mul_conv is not None assert out.shape == imgs.shape # test fusion_types=('channel_add', 'channel_mul') imgs = torch.randn(2, 16, 20, 20) context_block = ContextBlock( 16, 1. / 4, fusion_types=('channel_add', 'channel_mul')) out = context_block(imgs) assert context_block.channel_add_conv is not None assert context_block.channel_mul_conv is not None assert out.shape == imgs.shape ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_conv2d_adaptive_padding.py ================================================ import torch from mmcv.cnn.bricks import Conv2dAdaptivePadding def test_conv2d_samepadding(): # test Conv2dAdaptivePadding with stride=1 inputs = torch.rand((1, 3, 28, 28)) conv = Conv2dAdaptivePadding(3, 3, kernel_size=3, stride=1) output = conv(inputs) assert output.shape == inputs.shape inputs = torch.rand((1, 3, 13, 13)) conv = Conv2dAdaptivePadding(3, 3, kernel_size=3, stride=1) output = conv(inputs) assert output.shape == inputs.shape # test Conv2dAdaptivePadding with stride=2 inputs = torch.rand((1, 3, 28, 28)) conv = Conv2dAdaptivePadding(3, 3, kernel_size=3, stride=2) output = conv(inputs) assert output.shape == torch.Size([1, 3, 14, 14]) inputs = torch.rand((1, 3, 13, 13)) conv = Conv2dAdaptivePadding(3, 3, kernel_size=3, stride=2) output = conv(inputs) assert output.shape == torch.Size([1, 3, 7, 7]) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_conv_module.py ================================================ import warnings from unittest.mock import patch import pytest import torch import torch.nn as nn from mmcv.cnn.bricks import CONV_LAYERS, ConvModule, HSigmoid, HSwish @CONV_LAYERS.register_module() class ExampleConv(nn.Module): def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, norm_cfg=None): super(ExampleConv, self).__init__() self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = kernel_size self.stride = stride self.padding = padding self.dilation = dilation self.groups = groups self.bias = bias self.norm_cfg = norm_cfg self.output_padding = (0, 0, 0) self.transposed = False self.conv0 = nn.Conv2d(in_channels, out_channels, kernel_size) self.init_weights() def forward(self, x): x = self.conv0(x) return x def init_weights(self): nn.init.constant_(self.conv0.weight, 0) def test_conv_module(): with pytest.raises(AssertionError): # conv_cfg must be a dict or None conv_cfg = 'conv' ConvModule(3, 8, 2, conv_cfg=conv_cfg) with pytest.raises(AssertionError): # norm_cfg must be a dict or None norm_cfg = 'norm' ConvModule(3, 8, 2, norm_cfg=norm_cfg) with pytest.raises(KeyError): # softmax is not supported act_cfg = dict(type='softmax') ConvModule(3, 8, 2, act_cfg=act_cfg) # conv + norm + act conv = ConvModule(3, 8, 2, norm_cfg=dict(type='BN')) assert conv.with_activation assert hasattr(conv, 'activate') assert conv.with_norm assert hasattr(conv, 'norm') x = torch.rand(1, 3, 256, 256) output = conv(x) assert output.shape == (1, 8, 255, 255) # conv + act conv = ConvModule(3, 8, 2) assert conv.with_activation assert hasattr(conv, 'activate') assert not conv.with_norm assert conv.norm is None x = torch.rand(1, 3, 256, 256) output = conv(x) assert output.shape == (1, 8, 255, 255) # conv conv = ConvModule(3, 8, 2, act_cfg=None) assert not conv.with_norm assert conv.norm is None assert not conv.with_activation assert not hasattr(conv, 'activate') x = torch.rand(1, 3, 256, 256) output = conv(x) assert output.shape == (1, 8, 255, 255) # conv with its own `init_weights` method conv_module = ConvModule( 3, 8, 2, conv_cfg=dict(type='ExampleConv'), act_cfg=None) assert torch.equal(conv_module.conv.conv0.weight, torch.zeros(8, 3, 2, 2)) # with_spectral_norm=True conv = ConvModule(3, 8, 3, padding=1, with_spectral_norm=True) assert hasattr(conv.conv, 'weight_orig') output = conv(x) assert output.shape == (1, 8, 256, 256) # padding_mode='reflect' conv = ConvModule(3, 8, 3, padding=1, padding_mode='reflect') assert isinstance(conv.padding_layer, nn.ReflectionPad2d) output = conv(x) assert output.shape == (1, 8, 256, 256) # non-existing padding mode with pytest.raises(KeyError): conv = ConvModule(3, 8, 3, padding=1, padding_mode='non_exists') # leaky relu conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='LeakyReLU')) assert isinstance(conv.activate, nn.LeakyReLU) output = conv(x) assert output.shape == (1, 8, 256, 256) # tanh conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='Tanh')) assert isinstance(conv.activate, nn.Tanh) output = conv(x) assert output.shape == (1, 8, 256, 256) # Sigmoid conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='Sigmoid')) assert isinstance(conv.activate, nn.Sigmoid) output = conv(x) assert output.shape == (1, 8, 256, 256) # PReLU conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='PReLU')) assert isinstance(conv.activate, nn.PReLU) output = conv(x) assert output.shape == (1, 8, 256, 256) # HSwish conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='HSwish')) assert isinstance(conv.activate, HSwish) output = conv(x) assert output.shape == (1, 8, 256, 256) # HSigmoid conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='HSigmoid')) assert isinstance(conv.activate, HSigmoid) output = conv(x) assert output.shape == (1, 8, 256, 256) def test_bias(): # bias: auto, without norm conv = ConvModule(3, 8, 2) assert conv.conv.bias is not None # bias: auto, with norm conv = ConvModule(3, 8, 2, norm_cfg=dict(type='BN')) assert conv.conv.bias is None # bias: False, without norm conv = ConvModule(3, 8, 2, bias=False) assert conv.conv.bias is None # bias: True, with batch norm with pytest.warns(UserWarning) as record: ConvModule(3, 8, 2, bias=True, norm_cfg=dict(type='BN')) assert len(record) == 1 assert record[0].message.args[ 0] == 'Unnecessary conv bias before batch/instance norm' # bias: True, with instance norm with pytest.warns(UserWarning) as record: ConvModule(3, 8, 2, bias=True, norm_cfg=dict(type='IN')) assert len(record) == 1 assert record[0].message.args[ 0] == 'Unnecessary conv bias before batch/instance norm' # bias: True, with other norm with pytest.warns(UserWarning) as record: norm_cfg = dict(type='GN', num_groups=1) ConvModule(3, 8, 2, bias=True, norm_cfg=norm_cfg) warnings.warn('No warnings') assert len(record) == 1 assert record[0].message.args[0] == 'No warnings' def conv_forward(self, x): return x + '_conv' def bn_forward(self, x): return x + '_bn' def relu_forward(self, x): return x + '_relu' @patch('torch.nn.ReLU.forward', relu_forward) @patch('torch.nn.BatchNorm2d.forward', bn_forward) @patch('torch.nn.Conv2d.forward', conv_forward) def test_order(): with pytest.raises(AssertionError): # order must be a tuple order = ['conv', 'norm', 'act'] ConvModule(3, 8, 2, order=order) with pytest.raises(AssertionError): # length of order must be 3 order = ('conv', 'norm') ConvModule(3, 8, 2, order=order) with pytest.raises(AssertionError): # order must be an order of 'conv', 'norm', 'act' order = ('conv', 'norm', 'norm') ConvModule(3, 8, 2, order=order) with pytest.raises(AssertionError): # order must be an order of 'conv', 'norm', 'act' order = ('conv', 'norm', 'something') ConvModule(3, 8, 2, order=order) # ('conv', 'norm', 'act') conv = ConvModule(3, 8, 2, norm_cfg=dict(type='BN')) out = conv('input') assert out == 'input_conv_bn_relu' # ('norm', 'conv', 'act') conv = ConvModule( 3, 8, 2, norm_cfg=dict(type='BN'), order=('norm', 'conv', 'act')) out = conv('input') assert out == 'input_bn_conv_relu' # ('conv', 'norm', 'act'), activate=False conv = ConvModule(3, 8, 2, norm_cfg=dict(type='BN')) out = conv('input', activate=False) assert out == 'input_conv_bn' # ('conv', 'norm', 'act'), activate=False conv = ConvModule(3, 8, 2, norm_cfg=dict(type='BN')) out = conv('input', norm=False) assert out == 'input_conv_relu' ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_depthwise_seperable_conv_module.py ================================================ import pytest import torch import torch.nn as nn from mmcv.cnn.bricks import DepthwiseSeparableConvModule def test_depthwise_separable_conv(): with pytest.raises(AssertionError): # conv_cfg must be a dict or None DepthwiseSeparableConvModule(4, 8, 2, groups=2) # test default config conv = DepthwiseSeparableConvModule(3, 8, 2) assert conv.depthwise_conv.conv.groups == 3 assert conv.pointwise_conv.conv.kernel_size == (1, 1) assert not conv.depthwise_conv.with_norm assert not conv.pointwise_conv.with_norm assert conv.depthwise_conv.activate.__class__.__name__ == 'ReLU' assert conv.pointwise_conv.activate.__class__.__name__ == 'ReLU' x = torch.rand(1, 3, 256, 256) output = conv(x) assert output.shape == (1, 8, 255, 255) # test dw_norm_cfg conv = DepthwiseSeparableConvModule(3, 8, 2, dw_norm_cfg=dict(type='BN')) assert conv.depthwise_conv.norm_name == 'bn' assert not conv.pointwise_conv.with_norm x = torch.rand(1, 3, 256, 256) output = conv(x) assert output.shape == (1, 8, 255, 255) # test pw_norm_cfg conv = DepthwiseSeparableConvModule(3, 8, 2, pw_norm_cfg=dict(type='BN')) assert not conv.depthwise_conv.with_norm assert conv.pointwise_conv.norm_name == 'bn' x = torch.rand(1, 3, 256, 256) output = conv(x) assert output.shape == (1, 8, 255, 255) # test norm_cfg conv = DepthwiseSeparableConvModule(3, 8, 2, norm_cfg=dict(type='BN')) assert conv.depthwise_conv.norm_name == 'bn' assert conv.pointwise_conv.norm_name == 'bn' x = torch.rand(1, 3, 256, 256) output = conv(x) assert output.shape == (1, 8, 255, 255) # add test for ['norm', 'conv', 'act'] conv = DepthwiseSeparableConvModule(3, 8, 2, order=('norm', 'conv', 'act')) x = torch.rand(1, 3, 256, 256) output = conv(x) assert output.shape == (1, 8, 255, 255) conv = DepthwiseSeparableConvModule( 3, 8, 3, padding=1, with_spectral_norm=True) assert hasattr(conv.depthwise_conv.conv, 'weight_orig') assert hasattr(conv.pointwise_conv.conv, 'weight_orig') output = conv(x) assert output.shape == (1, 8, 256, 256) conv = DepthwiseSeparableConvModule( 3, 8, 3, padding=1, padding_mode='reflect') assert isinstance(conv.depthwise_conv.padding_layer, nn.ReflectionPad2d) output = conv(x) assert output.shape == (1, 8, 256, 256) # test dw_act_cfg conv = DepthwiseSeparableConvModule( 3, 8, 3, padding=1, dw_act_cfg=dict(type='LeakyReLU')) assert conv.depthwise_conv.activate.__class__.__name__ == 'LeakyReLU' assert conv.pointwise_conv.activate.__class__.__name__ == 'ReLU' output = conv(x) assert output.shape == (1, 8, 256, 256) # test pw_act_cfg conv = DepthwiseSeparableConvModule( 3, 8, 3, padding=1, pw_act_cfg=dict(type='LeakyReLU')) assert conv.depthwise_conv.activate.__class__.__name__ == 'ReLU' assert conv.pointwise_conv.activate.__class__.__name__ == 'LeakyReLU' output = conv(x) assert output.shape == (1, 8, 256, 256) # test act_cfg conv = DepthwiseSeparableConvModule( 3, 8, 3, padding=1, act_cfg=dict(type='LeakyReLU')) assert conv.depthwise_conv.activate.__class__.__name__ == 'LeakyReLU' assert conv.pointwise_conv.activate.__class__.__name__ == 'LeakyReLU' output = conv(x) assert output.shape == (1, 8, 256, 256) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_flops_counter.py ================================================ import pytest import torch import torch.nn as nn from mmcv.cnn import get_model_complexity_info from mmcv.cnn.utils.flops_counter import flops_to_string, params_to_string try: from StringIO import StringIO except ImportError: from io import StringIO # yapf: disable gt_results = [ {'model': nn.Conv1d(3, 8, 3), 'input': (3, 16), 'flops': 1120.0, 'params': 80.0}, # noqa: E501 {'model': nn.Conv2d(3, 8, 3), 'input': (3, 16, 16), 'flops': 43904.0, 'params': 224.0}, # noqa: E501 {'model': nn.Conv3d(3, 8, 3), 'input': (3, 3, 16, 16), 'flops': 128576.0, 'params': 656.0}, # noqa: E501 {'model': nn.ReLU(), 'input': (3, 16, 16), 'flops': 768.0, 'params': 0}, # noqa: E501 {'model': nn.PReLU(), 'input': (3, 16, 16), 'flops': 768.0, 'params': 1}, # noqa: E501 {'model': nn.ELU(), 'input': (3, 16, 16), 'flops': 768.0, 'params': 0}, # noqa: E501 {'model': nn.LeakyReLU(), 'input': (3, 16, 16), 'flops': 768.0, 'params': 0}, # noqa: E501 {'model': nn.ReLU6(), 'input': (3, 16, 16), 'flops': 768.0, 'params': 0}, # noqa: E501 {'model': nn.MaxPool1d(2), 'input': (3, 16), 'flops': 48.0, 'params': 0}, # noqa: E501 {'model': nn.MaxPool2d(2), 'input': (3, 16, 16), 'flops': 768.0, 'params': 0}, # noqa: E501 {'model': nn.MaxPool3d(2), 'input': (3, 3, 16, 16), 'flops': 2304.0, 'params': 0}, # noqa: E501 {'model': nn.AvgPool1d(2), 'input': (3, 16), 'flops': 48.0, 'params': 0}, # noqa: E501 {'model': nn.AvgPool2d(2), 'input': (3, 16, 16), 'flops': 768.0, 'params': 0}, # noqa: E501 {'model': nn.AvgPool3d(2), 'input': (3, 3, 16, 16), 'flops': 2304.0, 'params': 0}, # noqa: E501 {'model': nn.AdaptiveMaxPool1d(2), 'input': (3, 16), 'flops': 48.0, 'params': 0}, # noqa: E501 {'model': nn.AdaptiveMaxPool2d(2), 'input': (3, 16, 16), 'flops': 768.0, 'params': 0}, # noqa: E501 {'model': nn.AdaptiveMaxPool3d(2), 'input': (3, 3, 16, 16), 'flops': 2304.0, 'params': 0}, # noqa: E501 {'model': nn.AdaptiveAvgPool1d(2), 'input': (3, 16), 'flops': 48.0, 'params': 0}, # noqa: E501 {'model': nn.AdaptiveAvgPool2d(2), 'input': (3, 16, 16), 'flops': 768.0, 'params': 0}, # noqa: E501 {'model': nn.AdaptiveAvgPool3d(2), 'input': (3, 3, 16, 16), 'flops': 2304.0, 'params': 0}, # noqa: E501 {'model': nn.BatchNorm1d(3), 'input': (3, 16), 'flops': 96.0, 'params': 6.0}, # noqa: E501 {'model': nn.BatchNorm2d(3), 'input': (3, 16, 16), 'flops': 1536.0, 'params': 6.0}, # noqa: E501 {'model': nn.BatchNorm3d(3), 'input': (3, 3, 16, 16), 'flops': 4608.0, 'params': 6.0}, # noqa: E501 {'model': nn.GroupNorm(2, 6), 'input': (6, 16, 16), 'flops': 3072.0, 'params': 12.0}, # noqa: E501 {'model': nn.InstanceNorm1d(3, affine=True), 'input': (3, 16), 'flops': 96.0, 'params': 6.0}, # noqa: E501 {'model': nn.InstanceNorm2d(3, affine=True), 'input': (3, 16, 16), 'flops': 1536.0, 'params': 6.0}, # noqa: E501 {'model': nn.InstanceNorm3d(3, affine=True), 'input': (3, 3, 16, 16), 'flops': 4608.0, 'params': 6.0}, # noqa: E501 {'model': nn.LayerNorm((3, 16, 16)), 'input': (3, 16, 16), 'flops': 1536.0, 'params': 1536.0}, # noqa: E501 {'model': nn.LayerNorm((3, 16, 16), elementwise_affine=False), 'input': (3, 16, 16), 'flops': 768.0, 'params': 0}, # noqa: E501 {'model': nn.Linear(1024, 2), 'input': (1024, ), 'flops': 2048.0, 'params': 2050.0}, # noqa: E501 {'model': nn.ConvTranspose2d(3, 8, 3), 'input': (3, 16, 16), 'flops': 57888, 'params': 224.0}, # noqa: E501 {'model': nn.Upsample((32, 32)), 'input': (3, 16, 16), 'flops': 3072.0, 'params': 0} # noqa: E501 ] # yapf: enable class ExampleModel(nn.Module): def __init__(self): super().__init__() self.conv2d = nn.Conv2d(3, 8, 3) def forward(self, imgs): x = torch.randn((1, *imgs)) return self.conv2d(x) def input_constructor(x): return dict(imgs=x) def test_flops_counter(): with pytest.raises(AssertionError): # input_res should be a tuple model = nn.Conv2d(3, 8, 3) input_res = [1, 3, 16, 16] get_model_complexity_info(model, input_res) with pytest.raises(AssertionError): # len(input_res) >= 2 model = nn.Conv2d(3, 8, 3) input_res = tuple() get_model_complexity_info(model, input_res) # test common layers for item in gt_results: model = item['model'] input = item['input'] flops, params = get_model_complexity_info( model, input, as_strings=False, print_per_layer_stat=False) assert flops == item['flops'] and params == item['params'] # test input constructor model = ExampleModel() x = (3, 16, 16) flops, params = get_model_complexity_info( model, x, as_strings=False, print_per_layer_stat=False, input_constructor=input_constructor) assert flops == 43904.0 and params == 224.0 # test output string model = nn.Conv3d(3, 8, 3) x = (3, 3, 512, 512) flops, params = get_model_complexity_info( model, x, print_per_layer_stat=False) assert flops == '0.17 GFLOPs' and params == str(656) # test print per layer status model = nn.Conv1d(3, 8, 3) x = (3, 16) out = StringIO() get_model_complexity_info(model, x, ost=out) assert out.getvalue() == \ 'Conv1d(0.0 M, 100.000% Params, 0.0 GFLOPs, 100.000% FLOPs, 3, 8, kernel_size=(3,), stride=(1,))\n' # noqa: E501 # test when model is not a common instance model = nn.Sequential(nn.Conv2d(3, 8, 3), nn.Flatten(), nn.Linear(1568, 2)) x = (3, 16, 16) flops, params = get_model_complexity_info( model, x, as_strings=False, print_per_layer_stat=True) assert flops == 47040.0 and params == 3362 def test_flops_to_string(): flops = 6.54321 * 10.**9 assert flops_to_string(flops) == '6.54 GFLOPs' assert flops_to_string(flops, 'MFLOPs') == '6543.21 MFLOPs' assert flops_to_string(flops, 'KFLOPs') == '6543210.0 KFLOPs' assert flops_to_string(flops, 'FLOPs') == '6543210000.0 FLOPs' assert flops_to_string(flops, precision=4) == '6.5432 GFLOPs' flops = 6.54321 * 10.**9 assert flops_to_string(flops, None) == '6.54 GFLOPs' flops = 3.21 * 10.**7 assert flops_to_string(flops, None) == '32.1 MFLOPs' flops = 5.4 * 10.**3 assert flops_to_string(flops, None) == '5.4 KFLOPs' flops = 987 assert flops_to_string(flops, None) == '987 FLOPs' def test_params_to_string(): num_params = 3.21 * 10.**7 assert params_to_string(num_params) == '32.1 M' num_params = 4.56 * 10.**5 assert params_to_string(num_params) == '456.0 k' num_params = 7.89 * 10.**2 assert params_to_string(num_params) == '789.0' num_params = 6.54321 * 10.**7 assert params_to_string(num_params, 'M') == '65.43 M' assert params_to_string(num_params, 'K') == '65432.1 K' assert params_to_string(num_params, '') == '65432100.0' assert params_to_string(num_params, precision=4) == '65.4321 M' ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_fuse_conv_bn.py ================================================ import torch import torch.nn as nn from mmcv.cnn import ConvModule, fuse_conv_bn def test_fuse_conv_bn(): inputs = torch.rand((1, 3, 5, 5)) modules = nn.ModuleList() modules.append(nn.BatchNorm2d(3)) modules.append(ConvModule(3, 5, 3, norm_cfg=dict(type='BN'))) modules.append(ConvModule(5, 5, 3, norm_cfg=dict(type='BN'))) modules = nn.Sequential(*modules) fused_modules = fuse_conv_bn(modules) assert torch.equal(modules(inputs), fused_modules(inputs)) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_generalized_attention.py ================================================ import torch from mmcv.cnn.bricks import GeneralizedAttention def test_context_block(): # test attention_type='1000' imgs = torch.randn(2, 16, 20, 20) gen_attention_block = GeneralizedAttention(16, attention_type='1000') assert gen_attention_block.query_conv.in_channels == 16 assert gen_attention_block.key_conv.in_channels == 16 assert gen_attention_block.key_conv.in_channels == 16 out = gen_attention_block(imgs) assert out.shape == imgs.shape # test attention_type='0100' imgs = torch.randn(2, 16, 20, 20) gen_attention_block = GeneralizedAttention(16, attention_type='0100') assert gen_attention_block.query_conv.in_channels == 16 assert gen_attention_block.appr_geom_fc_x.in_features == 8 assert gen_attention_block.appr_geom_fc_y.in_features == 8 out = gen_attention_block(imgs) assert out.shape == imgs.shape # test attention_type='0010' imgs = torch.randn(2, 16, 20, 20) gen_attention_block = GeneralizedAttention(16, attention_type='0010') assert gen_attention_block.key_conv.in_channels == 16 assert hasattr(gen_attention_block, 'appr_bias') out = gen_attention_block(imgs) assert out.shape == imgs.shape # test attention_type='0001' imgs = torch.randn(2, 16, 20, 20) gen_attention_block = GeneralizedAttention(16, attention_type='0001') assert gen_attention_block.appr_geom_fc_x.in_features == 8 assert gen_attention_block.appr_geom_fc_y.in_features == 8 assert hasattr(gen_attention_block, 'geom_bias') out = gen_attention_block(imgs) assert out.shape == imgs.shape # test spatial_range >= 0 imgs = torch.randn(2, 256, 20, 20) gen_attention_block = GeneralizedAttention(256, spatial_range=10) assert hasattr(gen_attention_block, 'local_constraint_map') out = gen_attention_block(imgs) assert out.shape == imgs.shape # test q_stride > 1 imgs = torch.randn(2, 16, 20, 20) gen_attention_block = GeneralizedAttention(16, q_stride=2) assert gen_attention_block.q_downsample is not None out = gen_attention_block(imgs) assert out.shape == imgs.shape # test kv_stride > 1 imgs = torch.randn(2, 16, 20, 20) gen_attention_block = GeneralizedAttention(16, kv_stride=2) assert gen_attention_block.kv_downsample is not None out = gen_attention_block(imgs) assert out.shape == imgs.shape # test fp16 with attention_type='1111' if torch.cuda.is_available(): imgs = torch.randn(2, 16, 20, 20).cuda().to(torch.half) gen_attention_block = GeneralizedAttention( 16, spatial_range=-1, num_heads=8, attention_type='1111', kv_stride=2) gen_attention_block.cuda().type(torch.half) out = gen_attention_block(imgs) assert out.shape == imgs.shape ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_hsigmoid.py ================================================ import pytest import torch from mmcv.cnn.bricks import HSigmoid def test_hsigmoid(): # test assertion divisor can not be zero with pytest.raises(AssertionError): HSigmoid(divisor=0) # test with default parameters act = HSigmoid() input_shape = torch.Size([1, 3, 64, 64]) input = torch.randn(input_shape) output = act(input) expected_output = torch.min( torch.max((input + 3) / 6, torch.zeros(input_shape)), torch.ones(input_shape)) # test output shape assert output.shape == expected_output.shape # test output value assert torch.equal(output, expected_output) # test with designated parameters act = HSigmoid(1, 2, 0, 1) input_shape = torch.Size([1, 3, 64, 64]) input = torch.randn(input_shape) output = act(input) expected_output = torch.min( torch.max((input + 1) / 2, torch.zeros(input_shape)), torch.ones(input_shape)) # test output shape assert output.shape == expected_output.shape # test output value assert torch.equal(output, expected_output) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_hswish.py ================================================ import torch from torch.nn.functional import relu6 from mmcv.cnn.bricks import HSwish def test_hswish(): # test inplace act = HSwish(inplace=True) assert act.act.inplace act = HSwish() assert not act.act.inplace input = torch.randn(1, 3, 64, 64) expected_output = input * relu6(input + 3) / 6 output = act(input) # test output shape assert output.shape == expected_output.shape # test output value assert torch.equal(output, expected_output) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_model_registry.py ================================================ import torch.nn as nn import mmcv from mmcv.cnn import MODELS, build_model_from_cfg def test_build_model_from_cfg(): BACKBONES = mmcv.Registry('backbone', build_func=build_model_from_cfg) @BACKBONES.register_module() class ResNet(nn.Module): def __init__(self, depth, stages=4): super().__init__() self.depth = depth self.stages = stages def forward(self, x): return x @BACKBONES.register_module() class ResNeXt(nn.Module): def __init__(self, depth, stages=4): super().__init__() self.depth = depth self.stages = stages def forward(self, x): return x cfg = dict(type='ResNet', depth=50) model = BACKBONES.build(cfg) assert isinstance(model, ResNet) assert model.depth == 50 and model.stages == 4 cfg = dict(type='ResNeXt', depth=50, stages=3) model = BACKBONES.build(cfg) assert isinstance(model, ResNeXt) assert model.depth == 50 and model.stages == 3 cfg = [ dict(type='ResNet', depth=50), dict(type='ResNeXt', depth=50, stages=3) ] model = BACKBONES.build(cfg) assert isinstance(model, nn.Sequential) assert isinstance(model[0], ResNet) assert model[0].depth == 50 and model[0].stages == 4 assert isinstance(model[1], ResNeXt) assert model[1].depth == 50 and model[1].stages == 3 # test inherit `build_func` from parent NEW_MODELS = mmcv.Registry('models', parent=MODELS, scope='new') assert NEW_MODELS.build_func is build_model_from_cfg # test specify `build_func` def pseudo_build(cfg): return cfg NEW_MODELS = mmcv.Registry( 'models', parent=MODELS, build_func=pseudo_build) assert NEW_MODELS.build_func is pseudo_build ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_non_local.py ================================================ import pytest import torch import torch.nn as nn from mmcv.cnn import NonLocal1d, NonLocal2d, NonLocal3d from mmcv.cnn.bricks.non_local import _NonLocalNd def test_nonlocal(): with pytest.raises(ValueError): # mode should be in ['embedded_gaussian', 'dot_product'] _NonLocalNd(3, mode='unsupport_mode') # _NonLocalNd with zero initialization _NonLocalNd(3) _NonLocalNd(3, norm_cfg=dict(type='BN')) # _NonLocalNd without zero initialization _NonLocalNd(3, zeros_init=False) _NonLocalNd(3, norm_cfg=dict(type='BN'), zeros_init=False) def test_nonlocal3d(): # NonLocal3d with 'embedded_gaussian' mode imgs = torch.randn(2, 3, 10, 20, 20) nonlocal_3d = NonLocal3d(3) if torch.__version__ == 'parrots': if torch.cuda.is_available(): # NonLocal is only implemented on gpu in parrots imgs = imgs.cuda() nonlocal_3d.cuda() out = nonlocal_3d(imgs) assert out.shape == imgs.shape # NonLocal3d with 'dot_product' mode nonlocal_3d = NonLocal3d(3, mode='dot_product') assert nonlocal_3d.mode == 'dot_product' if torch.__version__ == 'parrots': if torch.cuda.is_available(): nonlocal_3d.cuda() out = nonlocal_3d(imgs) assert out.shape == imgs.shape # NonLocal3d with 'concatenation' mode nonlocal_3d = NonLocal3d(3, mode='concatenation') assert nonlocal_3d.mode == 'concatenation' if torch.__version__ == 'parrots': if torch.cuda.is_available(): nonlocal_3d.cuda() out = nonlocal_3d(imgs) assert out.shape == imgs.shape # NonLocal3d with 'gaussian' mode nonlocal_3d = NonLocal3d(3, mode='gaussian') assert not hasattr(nonlocal_3d, 'phi') assert nonlocal_3d.mode == 'gaussian' if torch.__version__ == 'parrots': if torch.cuda.is_available(): nonlocal_3d.cuda() out = nonlocal_3d(imgs) assert out.shape == imgs.shape # NonLocal3d with 'gaussian' mode and sub_sample nonlocal_3d = NonLocal3d(3, mode='gaussian', sub_sample=True) assert isinstance(nonlocal_3d.g, nn.Sequential) and len(nonlocal_3d.g) == 2 assert isinstance(nonlocal_3d.g[1], nn.MaxPool3d) assert nonlocal_3d.g[1].kernel_size == (1, 2, 2) assert isinstance(nonlocal_3d.phi, nn.MaxPool3d) if torch.__version__ == 'parrots': if torch.cuda.is_available(): nonlocal_3d.cuda() out = nonlocal_3d(imgs) assert out.shape == imgs.shape # NonLocal3d with 'dot_product' mode and sub_sample nonlocal_3d = NonLocal3d(3, mode='dot_product', sub_sample=True) for m in [nonlocal_3d.g, nonlocal_3d.phi]: assert isinstance(m, nn.Sequential) and len(m) == 2 assert isinstance(m[1], nn.MaxPool3d) assert m[1].kernel_size == (1, 2, 2) if torch.__version__ == 'parrots': if torch.cuda.is_available(): nonlocal_3d.cuda() out = nonlocal_3d(imgs) assert out.shape == imgs.shape def test_nonlocal2d(): # NonLocal2d with 'embedded_gaussian' mode imgs = torch.randn(2, 3, 20, 20) nonlocal_2d = NonLocal2d(3) if torch.__version__ == 'parrots': if torch.cuda.is_available(): imgs = imgs.cuda() nonlocal_2d.cuda() out = nonlocal_2d(imgs) assert out.shape == imgs.shape # NonLocal2d with 'dot_product' mode imgs = torch.randn(2, 3, 20, 20) nonlocal_2d = NonLocal2d(3, mode='dot_product') if torch.__version__ == 'parrots': if torch.cuda.is_available(): imgs = imgs.cuda() nonlocal_2d.cuda() out = nonlocal_2d(imgs) assert out.shape == imgs.shape # NonLocal2d with 'concatenation' mode imgs = torch.randn(2, 3, 20, 20) nonlocal_2d = NonLocal2d(3, mode='concatenation') if torch.__version__ == 'parrots': if torch.cuda.is_available(): imgs = imgs.cuda() nonlocal_2d.cuda() out = nonlocal_2d(imgs) assert out.shape == imgs.shape # NonLocal2d with 'gaussian' mode imgs = torch.randn(2, 3, 20, 20) nonlocal_2d = NonLocal2d(3, mode='gaussian') assert not hasattr(nonlocal_2d, 'phi') if torch.__version__ == 'parrots': if torch.cuda.is_available(): imgs = imgs.cuda() nonlocal_2d.cuda() out = nonlocal_2d(imgs) assert out.shape == imgs.shape # NonLocal2d with 'gaussian' mode and sub_sample nonlocal_2d = NonLocal2d(3, mode='gaussian', sub_sample=True) assert isinstance(nonlocal_2d.g, nn.Sequential) and len(nonlocal_2d.g) == 2 assert isinstance(nonlocal_2d.g[1], nn.MaxPool2d) assert nonlocal_2d.g[1].kernel_size == (2, 2) assert isinstance(nonlocal_2d.phi, nn.MaxPool2d) if torch.__version__ == 'parrots': if torch.cuda.is_available(): nonlocal_2d.cuda() out = nonlocal_2d(imgs) assert out.shape == imgs.shape # NonLocal2d with 'dot_product' mode and sub_sample nonlocal_2d = NonLocal2d(3, mode='dot_product', sub_sample=True) for m in [nonlocal_2d.g, nonlocal_2d.phi]: assert isinstance(m, nn.Sequential) and len(m) == 2 assert isinstance(m[1], nn.MaxPool2d) assert m[1].kernel_size == (2, 2) if torch.__version__ == 'parrots': if torch.cuda.is_available(): nonlocal_2d.cuda() out = nonlocal_2d(imgs) assert out.shape == imgs.shape def test_nonlocal1d(): # NonLocal1d with 'embedded_gaussian' mode imgs = torch.randn(2, 3, 20) nonlocal_1d = NonLocal1d(3) if torch.__version__ == 'parrots': if torch.cuda.is_available(): imgs = imgs.cuda() nonlocal_1d.cuda() out = nonlocal_1d(imgs) assert out.shape == imgs.shape # NonLocal1d with 'dot_product' mode imgs = torch.randn(2, 3, 20) nonlocal_1d = NonLocal1d(3, mode='dot_product') if torch.__version__ == 'parrots': if torch.cuda.is_available(): imgs = imgs.cuda() nonlocal_1d.cuda() out = nonlocal_1d(imgs) assert out.shape == imgs.shape # NonLocal1d with 'concatenation' mode imgs = torch.randn(2, 3, 20) nonlocal_1d = NonLocal1d(3, mode='concatenation') if torch.__version__ == 'parrots': if torch.cuda.is_available(): imgs = imgs.cuda() nonlocal_1d.cuda() out = nonlocal_1d(imgs) assert out.shape == imgs.shape # NonLocal1d with 'gaussian' mode imgs = torch.randn(2, 3, 20) nonlocal_1d = NonLocal1d(3, mode='gaussian') assert not hasattr(nonlocal_1d, 'phi') if torch.__version__ == 'parrots': if torch.cuda.is_available(): imgs = imgs.cuda() nonlocal_1d.cuda() out = nonlocal_1d(imgs) assert out.shape == imgs.shape # NonLocal1d with 'gaussian' mode and sub_sample nonlocal_1d = NonLocal1d(3, mode='gaussian', sub_sample=True) assert isinstance(nonlocal_1d.g, nn.Sequential) and len(nonlocal_1d.g) == 2 assert isinstance(nonlocal_1d.g[1], nn.MaxPool1d) assert nonlocal_1d.g[1].kernel_size == 2 assert isinstance(nonlocal_1d.phi, nn.MaxPool1d) if torch.__version__ == 'parrots': if torch.cuda.is_available(): nonlocal_1d.cuda() out = nonlocal_1d(imgs) assert out.shape == imgs.shape # NonLocal1d with 'dot_product' mode and sub_sample nonlocal_1d = NonLocal1d(3, mode='dot_product', sub_sample=True) for m in [nonlocal_1d.g, nonlocal_1d.phi]: assert isinstance(m, nn.Sequential) and len(m) == 2 assert isinstance(m[1], nn.MaxPool1d) assert m[1].kernel_size == 2 if torch.__version__ == 'parrots': if torch.cuda.is_available(): nonlocal_1d.cuda() out = nonlocal_1d(imgs) assert out.shape == imgs.shape ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_revert_syncbn.py ================================================ import os import platform import numpy as np import pytest import torch import torch.distributed as dist from mmcv.cnn.bricks import ConvModule from mmcv.cnn.utils import revert_sync_batchnorm if platform.system() == 'Windows': import regex as re else: import re def test_revert_syncbn(): conv = ConvModule(3, 8, 2, norm_cfg=dict(type='SyncBN')) x = torch.randn(1, 3, 10, 10) # Expect a ValueError prompting that SyncBN is not supported on CPU with pytest.raises(ValueError): y = conv(x) conv = revert_sync_batchnorm(conv) y = conv(x) assert y.shape == (1, 8, 9, 9) def test_revert_mmsyncbn(): if 'SLURM_NTASKS' not in os.environ or int(os.environ['SLURM_NTASKS']) < 2: print('Must run on slurm with more than 1 process!\n' 'srun -p test --gres=gpu:2 -n2') return rank = int(os.environ['SLURM_PROCID']) world_size = int(os.environ['SLURM_NTASKS']) local_rank = int(os.environ['SLURM_LOCALID']) node_list = str(os.environ['SLURM_NODELIST']) node_parts = re.findall('[0-9]+', node_list) os.environ['MASTER_ADDR'] = (f'{node_parts[1]}.{node_parts[2]}' + f'.{node_parts[3]}.{node_parts[4]}') os.environ['MASTER_PORT'] = '12341' os.environ['WORLD_SIZE'] = str(world_size) os.environ['RANK'] = str(rank) dist.init_process_group('nccl') torch.cuda.set_device(local_rank) x = torch.randn(1, 3, 10, 10).cuda() dist.broadcast(x, src=0) conv = ConvModule(3, 8, 2, norm_cfg=dict(type='MMSyncBN')).cuda() conv.eval() y_mmsyncbn = conv(x).detach().cpu().numpy() conv = revert_sync_batchnorm(conv) y_bn = conv(x).detach().cpu().numpy() assert np.all(np.isclose(y_bn, y_mmsyncbn, 1e-3)) conv, x = conv.to('cpu'), x.to('cpu') y_bn_cpu = conv(x).detach().numpy() assert np.all(np.isclose(y_bn, y_bn_cpu, 1e-3)) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_scale.py ================================================ import torch from mmcv.cnn.bricks import Scale def test_scale(): # test default scale scale = Scale() assert scale.scale.data == 1. assert scale.scale.dtype == torch.float x = torch.rand(1, 3, 64, 64) output = scale(x) assert output.shape == (1, 3, 64, 64) # test given scale scale = Scale(10.) assert scale.scale.data == 10. assert scale.scale.dtype == torch.float x = torch.rand(1, 3, 64, 64) output = scale(x) assert output.shape == (1, 3, 64, 64) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_swish.py ================================================ import torch import torch.nn.functional as F from mmcv.cnn.bricks import Swish def test_swish(): act = Swish() input = torch.randn(1, 3, 64, 64) expected_output = input * F.sigmoid(input) output = act(input) # test output shape assert output.shape == expected_output.shape # test output value assert torch.equal(output, expected_output) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_transformer.py ================================================ import copy import pytest import torch from mmcv.cnn.bricks.drop import DropPath from mmcv.cnn.bricks.transformer import (FFN, AdaptivePadding, BaseTransformerLayer, MultiheadAttention, PatchEmbed, PatchMerging, TransformerLayerSequence) from mmcv.runner import ModuleList def test_adaptive_padding(): for padding in ('same', 'corner'): kernel_size = 16 stride = 16 dilation = 1 input = torch.rand(1, 1, 15, 17) adap_pad = AdaptivePadding( kernel_size=kernel_size, stride=stride, dilation=dilation, padding=padding) out = adap_pad(input) # padding to divisible by 16 assert (out.shape[2], out.shape[3]) == (16, 32) input = torch.rand(1, 1, 16, 17) out = adap_pad(input) # padding to divisible by 16 assert (out.shape[2], out.shape[3]) == (16, 32) kernel_size = (2, 2) stride = (2, 2) dilation = (1, 1) adap_pad = AdaptivePadding( kernel_size=kernel_size, stride=stride, dilation=dilation, padding=padding) input = torch.rand(1, 1, 11, 13) out = adap_pad(input) # padding to divisible by 2 assert (out.shape[2], out.shape[3]) == (12, 14) kernel_size = (2, 2) stride = (10, 10) dilation = (1, 1) adap_pad = AdaptivePadding( kernel_size=kernel_size, stride=stride, dilation=dilation, padding=padding) input = torch.rand(1, 1, 10, 13) out = adap_pad(input) # no padding assert (out.shape[2], out.shape[3]) == (10, 13) kernel_size = (11, 11) adap_pad = AdaptivePadding( kernel_size=kernel_size, stride=stride, dilation=dilation, padding=padding) input = torch.rand(1, 1, 11, 13) out = adap_pad(input) # all padding assert (out.shape[2], out.shape[3]) == (21, 21) # test padding as kernel is (7,9) input = torch.rand(1, 1, 11, 13) stride = (3, 4) kernel_size = (4, 5) dilation = (2, 2) # actually (7, 9) adap_pad = AdaptivePadding( kernel_size=kernel_size, stride=stride, dilation=dilation, padding=padding) dilation_out = adap_pad(input) assert (dilation_out.shape[2], dilation_out.shape[3]) == (16, 21) kernel_size = (7, 9) dilation = (1, 1) adap_pad = AdaptivePadding( kernel_size=kernel_size, stride=stride, dilation=dilation, padding=padding) kernel79_out = adap_pad(input) assert (kernel79_out.shape[2], kernel79_out.shape[3]) == (16, 21) assert kernel79_out.shape == dilation_out.shape # assert only support "same" "corner" with pytest.raises(AssertionError): AdaptivePadding( kernel_size=kernel_size, stride=stride, dilation=dilation, padding=1) def test_patch_embed(): B = 2 H = 3 W = 4 C = 3 embed_dims = 10 kernel_size = 3 stride = 1 dummy_input = torch.rand(B, C, H, W) patch_merge_1 = PatchEmbed( in_channels=C, embed_dims=embed_dims, kernel_size=kernel_size, stride=stride, padding=0, dilation=1, norm_cfg=None) x1, shape = patch_merge_1(dummy_input) # test out shape assert x1.shape == (2, 2, 10) # test outsize is correct assert shape == (1, 2) # test L = out_h * out_w assert shape[0] * shape[1] == x1.shape[1] B = 2 H = 10 W = 10 C = 3 embed_dims = 10 kernel_size = 5 stride = 2 dummy_input = torch.rand(B, C, H, W) # test dilation patch_merge_2 = PatchEmbed( in_channels=C, embed_dims=embed_dims, kernel_size=kernel_size, stride=stride, padding=0, dilation=2, norm_cfg=None, ) x2, shape = patch_merge_2(dummy_input) # test out shape assert x2.shape == (2, 1, 10) # test outsize is correct assert shape == (1, 1) # test L = out_h * out_w assert shape[0] * shape[1] == x2.shape[1] stride = 2 input_size = (10, 10) dummy_input = torch.rand(B, C, H, W) # test stride and norm patch_merge_3 = PatchEmbed( in_channels=C, embed_dims=embed_dims, kernel_size=kernel_size, stride=stride, padding=0, dilation=2, norm_cfg=dict(type='LN'), input_size=input_size) x3, shape = patch_merge_3(dummy_input) # test out shape assert x3.shape == (2, 1, 10) # test outsize is correct assert shape == (1, 1) # test L = out_h * out_w assert shape[0] * shape[1] == x3.shape[1] # test the init_out_size with nn.Unfold assert patch_merge_3.init_out_size[1] == (input_size[0] - 2 * 4 - 1) // 2 + 1 assert patch_merge_3.init_out_size[0] == (input_size[0] - 2 * 4 - 1) // 2 + 1 H = 11 W = 12 input_size = (H, W) dummy_input = torch.rand(B, C, H, W) # test stride and norm patch_merge_3 = PatchEmbed( in_channels=C, embed_dims=embed_dims, kernel_size=kernel_size, stride=stride, padding=0, dilation=2, norm_cfg=dict(type='LN'), input_size=input_size) _, shape = patch_merge_3(dummy_input) # when input_size equal to real input # the out_size should be equal to `init_out_size` assert shape == patch_merge_3.init_out_size input_size = (H, W) dummy_input = torch.rand(B, C, H, W) # test stride and norm patch_merge_3 = PatchEmbed( in_channels=C, embed_dims=embed_dims, kernel_size=kernel_size, stride=stride, padding=0, dilation=2, norm_cfg=dict(type='LN'), input_size=input_size) _, shape = patch_merge_3(dummy_input) # when input_size equal to real input # the out_size should be equal to `init_out_size` assert shape == patch_merge_3.init_out_size # test adap padding for padding in ('same', 'corner'): in_c = 2 embed_dims = 3 B = 2 # test stride is 1 input_size = (5, 5) kernel_size = (5, 5) stride = (1, 1) dilation = 1 bias = False x = torch.rand(B, in_c, *input_size) patch_embed = PatchEmbed( in_channels=in_c, embed_dims=embed_dims, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, bias=bias) x_out, out_size = patch_embed(x) assert x_out.size() == (B, 25, 3) assert out_size == (5, 5) assert x_out.size(1) == out_size[0] * out_size[1] # test kernel_size == stride input_size = (5, 5) kernel_size = (5, 5) stride = (5, 5) dilation = 1 bias = False x = torch.rand(B, in_c, *input_size) patch_embed = PatchEmbed( in_channels=in_c, embed_dims=embed_dims, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, bias=bias) x_out, out_size = patch_embed(x) assert x_out.size() == (B, 1, 3) assert out_size == (1, 1) assert x_out.size(1) == out_size[0] * out_size[1] # test kernel_size == stride input_size = (6, 5) kernel_size = (5, 5) stride = (5, 5) dilation = 1 bias = False x = torch.rand(B, in_c, *input_size) patch_embed = PatchEmbed( in_channels=in_c, embed_dims=embed_dims, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, bias=bias) x_out, out_size = patch_embed(x) assert x_out.size() == (B, 2, 3) assert out_size == (2, 1) assert x_out.size(1) == out_size[0] * out_size[1] # test different kernel_size with different stride input_size = (6, 5) kernel_size = (6, 2) stride = (6, 2) dilation = 1 bias = False x = torch.rand(B, in_c, *input_size) patch_embed = PatchEmbed( in_channels=in_c, embed_dims=embed_dims, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, bias=bias) x_out, out_size = patch_embed(x) assert x_out.size() == (B, 3, 3) assert out_size == (1, 3) assert x_out.size(1) == out_size[0] * out_size[1] def test_patch_merging(): # Test the model with int padding in_c = 3 out_c = 4 kernel_size = 3 stride = 3 padding = 1 dilation = 1 bias = False # test the case `pad_to_stride` is False patch_merge = PatchMerging( in_channels=in_c, out_channels=out_c, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, bias=bias) B, L, C = 1, 100, 3 input_size = (10, 10) x = torch.rand(B, L, C) x_out, out_size = patch_merge(x, input_size) assert x_out.size() == (1, 16, 4) assert out_size == (4, 4) # assert out size is consistent with real output assert x_out.size(1) == out_size[0] * out_size[1] in_c = 4 out_c = 5 kernel_size = 6 stride = 3 padding = 2 dilation = 2 bias = False patch_merge = PatchMerging( in_channels=in_c, out_channels=out_c, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, bias=bias) B, L, C = 1, 100, 4 input_size = (10, 10) x = torch.rand(B, L, C) x_out, out_size = patch_merge(x, input_size) assert x_out.size() == (1, 4, 5) assert out_size == (2, 2) # assert out size is consistent with real output assert x_out.size(1) == out_size[0] * out_size[1] # Test with adaptive padding for padding in ('same', 'corner'): in_c = 2 out_c = 3 B = 2 # test stride is 1 input_size = (5, 5) kernel_size = (5, 5) stride = (1, 1) dilation = 1 bias = False L = input_size[0] * input_size[1] x = torch.rand(B, L, in_c) patch_merge = PatchMerging( in_channels=in_c, out_channels=out_c, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, bias=bias) x_out, out_size = patch_merge(x, input_size) assert x_out.size() == (B, 25, 3) assert out_size == (5, 5) assert x_out.size(1) == out_size[0] * out_size[1] # test kernel_size == stride input_size = (5, 5) kernel_size = (5, 5) stride = (5, 5) dilation = 1 bias = False L = input_size[0] * input_size[1] x = torch.rand(B, L, in_c) patch_merge = PatchMerging( in_channels=in_c, out_channels=out_c, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, bias=bias) x_out, out_size = patch_merge(x, input_size) assert x_out.size() == (B, 1, 3) assert out_size == (1, 1) assert x_out.size(1) == out_size[0] * out_size[1] # test kernel_size == stride input_size = (6, 5) kernel_size = (5, 5) stride = (5, 5) dilation = 1 bias = False L = input_size[0] * input_size[1] x = torch.rand(B, L, in_c) patch_merge = PatchMerging( in_channels=in_c, out_channels=out_c, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, bias=bias) x_out, out_size = patch_merge(x, input_size) assert x_out.size() == (B, 2, 3) assert out_size == (2, 1) assert x_out.size(1) == out_size[0] * out_size[1] # test different kernel_size with different stride input_size = (6, 5) kernel_size = (6, 2) stride = (6, 2) dilation = 1 bias = False L = input_size[0] * input_size[1] x = torch.rand(B, L, in_c) patch_merge = PatchMerging( in_channels=in_c, out_channels=out_c, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, bias=bias) x_out, out_size = patch_merge(x, input_size) assert x_out.size() == (B, 3, 3) assert out_size == (1, 3) assert x_out.size(1) == out_size[0] * out_size[1] def test_multiheadattention(): MultiheadAttention( embed_dims=5, num_heads=5, attn_drop=0, proj_drop=0, dropout_layer=dict(type='Dropout', drop_prob=0.), batch_first=True) batch_dim = 2 embed_dim = 5 num_query = 100 attn_batch_first = MultiheadAttention( embed_dims=5, num_heads=5, attn_drop=0, proj_drop=0, dropout_layer=dict(type='DropPath', drop_prob=0.), batch_first=True) attn_query_first = MultiheadAttention( embed_dims=5, num_heads=5, attn_drop=0, proj_drop=0, dropout_layer=dict(type='DropPath', drop_prob=0.), batch_first=False) param_dict = dict(attn_query_first.named_parameters()) for n, v in attn_batch_first.named_parameters(): param_dict[n].data = v.data input_batch_first = torch.rand(batch_dim, num_query, embed_dim) input_query_first = input_batch_first.transpose(0, 1) assert torch.allclose( attn_query_first(input_query_first).sum(), attn_batch_first(input_batch_first).sum()) key_batch_first = torch.rand(batch_dim, num_query, embed_dim) key_query_first = key_batch_first.transpose(0, 1) assert torch.allclose( attn_query_first(input_query_first, key_query_first).sum(), attn_batch_first(input_batch_first, key_batch_first).sum()) identity = torch.ones_like(input_query_first) # check deprecated arguments can be used normally assert torch.allclose( attn_query_first( input_query_first, key_query_first, residual=identity).sum(), attn_batch_first(input_batch_first, key_batch_first).sum() + identity.sum() - input_batch_first.sum()) assert torch.allclose( attn_query_first( input_query_first, key_query_first, identity=identity).sum(), attn_batch_first(input_batch_first, key_batch_first).sum() + identity.sum() - input_batch_first.sum()) attn_query_first( input_query_first, key_query_first, identity=identity).sum(), def test_ffn(): with pytest.raises(AssertionError): # num_fcs should be no less than 2 FFN(num_fcs=1) FFN(dropout=0, add_residual=True) ffn = FFN(dropout=0, add_identity=True) input_tensor = torch.rand(2, 20, 256) input_tensor_nbc = input_tensor.transpose(0, 1) assert torch.allclose(ffn(input_tensor).sum(), ffn(input_tensor_nbc).sum()) residual = torch.rand_like(input_tensor) torch.allclose( ffn(input_tensor, residual=residual).sum(), ffn(input_tensor).sum() + residual.sum() - input_tensor.sum()) torch.allclose( ffn(input_tensor, identity=residual).sum(), ffn(input_tensor).sum() + residual.sum() - input_tensor.sum()) @pytest.mark.skipif(not torch.cuda.is_available(), reason='Cuda not available') def test_basetransformerlayer_cuda(): # To test if the BaseTransformerLayer's behaviour remains # consistent after being deepcopied operation_order = ('self_attn', 'ffn') baselayer = BaseTransformerLayer( operation_order=operation_order, batch_first=True, attn_cfgs=dict( type='MultiheadAttention', embed_dims=256, num_heads=8, ), ) baselayers = ModuleList([copy.deepcopy(baselayer) for _ in range(2)]) baselayers.to('cuda') x = torch.rand(2, 10, 256).cuda() for m in baselayers: x = m(x) assert x.shape == torch.Size([2, 10, 256]) @pytest.mark.parametrize('embed_dims', [False, 256]) def test_basetransformerlayer(embed_dims): attn_cfgs = dict(type='MultiheadAttention', embed_dims=256, num_heads=8), if embed_dims: ffn_cfgs = dict( type='FFN', embed_dims=embed_dims, feedforward_channels=1024, num_fcs=2, ffn_drop=0., act_cfg=dict(type='ReLU', inplace=True), ) else: ffn_cfgs = dict( type='FFN', feedforward_channels=1024, num_fcs=2, ffn_drop=0., act_cfg=dict(type='ReLU', inplace=True), ) feedforward_channels = 2048 ffn_dropout = 0.1 operation_order = ('self_attn', 'norm', 'ffn', 'norm') # test deprecated_args baselayer = BaseTransformerLayer( attn_cfgs=attn_cfgs, ffn_cfgs=ffn_cfgs, feedforward_channels=feedforward_channels, ffn_dropout=ffn_dropout, operation_order=operation_order) assert baselayer.batch_first is False assert baselayer.ffns[0].feedforward_channels == feedforward_channels attn_cfgs = dict(type='MultiheadAttention', num_heads=8, embed_dims=256), feedforward_channels = 2048 ffn_dropout = 0.1 operation_order = ('self_attn', 'norm', 'ffn', 'norm') baselayer = BaseTransformerLayer( attn_cfgs=attn_cfgs, feedforward_channels=feedforward_channels, ffn_dropout=ffn_dropout, operation_order=operation_order, batch_first=True) assert baselayer.attentions[0].batch_first in_tensor = torch.rand(2, 10, 256) baselayer(in_tensor) def test_transformerlayersequence(): squeue = TransformerLayerSequence( num_layers=6, transformerlayers=dict( type='BaseTransformerLayer', attn_cfgs=[ dict( type='MultiheadAttention', embed_dims=256, num_heads=8, dropout=0.1), dict(type='MultiheadAttention', embed_dims=256, num_heads=4) ], feedforward_channels=1024, ffn_dropout=0.1, operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm'))) assert len(squeue.layers) == 6 assert squeue.pre_norm is False with pytest.raises(AssertionError): # if transformerlayers is a list, len(transformerlayers) # should be equal to num_layers TransformerLayerSequence( num_layers=6, transformerlayers=[ dict( type='BaseTransformerLayer', attn_cfgs=[ dict( type='MultiheadAttention', embed_dims=256, num_heads=8, dropout=0.1), dict(type='MultiheadAttention', embed_dims=256) ], feedforward_channels=1024, ffn_dropout=0.1, operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm')) ]) def test_drop_path(): drop_path = DropPath(drop_prob=0) test_in = torch.rand(2, 3, 4, 5) assert test_in is drop_path(test_in) drop_path = DropPath(drop_prob=0.1) drop_path.training = False test_in = torch.rand(2, 3, 4, 5) assert test_in is drop_path(test_in) drop_path.training = True assert test_in is not drop_path(test_in) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_weight_init.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import random from tempfile import TemporaryDirectory import numpy as np import pytest import torch from scipy import stats from torch import nn from mmcv.cnn import (Caffe2XavierInit, ConstantInit, KaimingInit, NormalInit, PretrainedInit, TruncNormalInit, UniformInit, XavierInit, bias_init_with_prob, caffe2_xavier_init, constant_init, initialize, kaiming_init, normal_init, trunc_normal_init, uniform_init, xavier_init) def test_constant_init(): conv_module = nn.Conv2d(3, 16, 3) constant_init(conv_module, 0.1) assert conv_module.weight.allclose( torch.full_like(conv_module.weight, 0.1)) assert conv_module.bias.allclose(torch.zeros_like(conv_module.bias)) conv_module_no_bias = nn.Conv2d(3, 16, 3, bias=False) constant_init(conv_module_no_bias, 0.1) assert conv_module.weight.allclose( torch.full_like(conv_module.weight, 0.1)) def test_xavier_init(): conv_module = nn.Conv2d(3, 16, 3) xavier_init(conv_module, bias=0.1) assert conv_module.bias.allclose(torch.full_like(conv_module.bias, 0.1)) xavier_init(conv_module, distribution='uniform') # TODO: sanity check of weight distribution, e.g. mean, std with pytest.raises(AssertionError): xavier_init(conv_module, distribution='student-t') conv_module_no_bias = nn.Conv2d(3, 16, 3, bias=False) xavier_init(conv_module_no_bias) def test_normal_init(): conv_module = nn.Conv2d(3, 16, 3) normal_init(conv_module, bias=0.1) # TODO: sanity check of weight distribution, e.g. mean, std assert conv_module.bias.allclose(torch.full_like(conv_module.bias, 0.1)) conv_module_no_bias = nn.Conv2d(3, 16, 3, bias=False) normal_init(conv_module_no_bias) # TODO: sanity check distribution, e.g. mean, std def test_trunc_normal_init(): def _random_float(a, b): return (b - a) * random.random() + a def _is_trunc_normal(tensor, mean, std, a, b): # scipy's trunc norm is suited for data drawn from N(0, 1), # so we need to transform our data to test it using scipy. z_samples = (tensor.view(-1) - mean) / std z_samples = z_samples.tolist() a0 = (a - mean) / std b0 = (b - mean) / std p_value = stats.kstest(z_samples, 'truncnorm', args=(a0, b0))[1] return p_value > 0.0001 conv_module = nn.Conv2d(3, 16, 3) mean = _random_float(-3, 3) std = _random_float(.01, 1) a = _random_float(mean - 2 * std, mean) b = _random_float(mean, mean + 2 * std) trunc_normal_init(conv_module, mean, std, a, b, bias=0.1) assert _is_trunc_normal(conv_module.weight, mean, std, a, b) assert conv_module.bias.allclose(torch.full_like(conv_module.bias, 0.1)) conv_module_no_bias = nn.Conv2d(3, 16, 3, bias=False) trunc_normal_init(conv_module_no_bias) # TODO: sanity check distribution, e.g. mean, std def test_uniform_init(): conv_module = nn.Conv2d(3, 16, 3) uniform_init(conv_module, bias=0.1) # TODO: sanity check of weight distribution, e.g. mean, std assert conv_module.bias.allclose(torch.full_like(conv_module.bias, 0.1)) conv_module_no_bias = nn.Conv2d(3, 16, 3, bias=False) uniform_init(conv_module_no_bias) def test_kaiming_init(): conv_module = nn.Conv2d(3, 16, 3) kaiming_init(conv_module, bias=0.1) # TODO: sanity check of weight distribution, e.g. mean, std assert conv_module.bias.allclose(torch.full_like(conv_module.bias, 0.1)) kaiming_init(conv_module, distribution='uniform') with pytest.raises(AssertionError): kaiming_init(conv_module, distribution='student-t') conv_module_no_bias = nn.Conv2d(3, 16, 3, bias=False) kaiming_init(conv_module_no_bias) def test_caffe_xavier_init(): conv_module = nn.Conv2d(3, 16, 3) caffe2_xavier_init(conv_module) def test_bias_init_with_prob(): conv_module = nn.Conv2d(3, 16, 3) prior_prob = 0.1 normal_init(conv_module, bias=bias_init_with_prob(0.1)) # TODO: sanity check of weight distribution, e.g. mean, std bias = float(-np.log((1 - prior_prob) / prior_prob)) assert conv_module.bias.allclose(torch.full_like(conv_module.bias, bias)) def test_constaninit(): """test ConstantInit class.""" model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2)) func = ConstantInit(val=1, bias=2, layer='Conv2d') func(model) assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 1.)) assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 2.)) assert not torch.equal(model[2].weight, torch.full(model[2].weight.shape, 1.)) assert not torch.equal(model[2].bias, torch.full(model[2].bias.shape, 2.)) func = ConstantInit(val=3, bias_prob=0.01, layer='Linear') func(model) res = bias_init_with_prob(0.01) assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 1.)) assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 3.)) assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 2.)) assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, res)) # test layer key with base class name model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Conv1d(1, 2, 1)) func = ConstantInit(val=4., bias=5., layer='_ConvNd') func(model) assert torch.all(model[0].weight == 4.) assert torch.all(model[2].weight == 4.) assert torch.all(model[0].bias == 5.) assert torch.all(model[2].bias == 5.) # test bias input type with pytest.raises(TypeError): func = ConstantInit(val=1, bias='1') # test bias_prob type with pytest.raises(TypeError): func = ConstantInit(val=1, bias_prob='1') # test layer input type with pytest.raises(TypeError): func = ConstantInit(val=1, layer=1) def test_xavierinit(): """test XavierInit class.""" model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2)) func = XavierInit(bias=0.1, layer='Conv2d') func(model) assert model[0].bias.allclose(torch.full_like(model[2].bias, 0.1)) assert not model[2].bias.allclose(torch.full_like(model[0].bias, 0.1)) constant_func = ConstantInit(val=0, bias=0, layer=['Conv2d', 'Linear']) func = XavierInit(gain=100, bias_prob=0.01, layer=['Conv2d', 'Linear']) model.apply(constant_func) assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 0.)) assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 0.)) assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 0.)) assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, 0.)) res = bias_init_with_prob(0.01) func(model) assert not torch.equal(model[0].weight, torch.full(model[0].weight.shape, 0.)) assert not torch.equal(model[2].weight, torch.full(model[2].weight.shape, 0.)) assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, res)) assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, res)) # test layer key with base class name model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Conv1d(1, 2, 1)) func = ConstantInit(val=4., bias=5., layer='_ConvNd') func(model) assert torch.all(model[0].weight == 4.) assert torch.all(model[2].weight == 4.) assert torch.all(model[0].bias == 5.) assert torch.all(model[2].bias == 5.) func = XavierInit(gain=100, bias_prob=0.01, layer='_ConvNd') func(model) assert not torch.all(model[0].weight == 4.) assert not torch.all(model[2].weight == 4.) assert torch.all(model[0].bias == res) assert torch.all(model[2].bias == res) # test bias input type with pytest.raises(TypeError): func = XavierInit(bias='0.1', layer='Conv2d') # test layer inpur type with pytest.raises(TypeError): func = XavierInit(bias=0.1, layer=1) def test_normalinit(): """test Normalinit class.""" model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2)) func = NormalInit(mean=100, std=1e-5, bias=200, layer=['Conv2d', 'Linear']) func(model) assert model[0].weight.allclose(torch.tensor(100.)) assert model[2].weight.allclose(torch.tensor(100.)) assert model[0].bias.allclose(torch.tensor(200.)) assert model[2].bias.allclose(torch.tensor(200.)) func = NormalInit( mean=300, std=1e-5, bias_prob=0.01, layer=['Conv2d', 'Linear']) res = bias_init_with_prob(0.01) func(model) assert model[0].weight.allclose(torch.tensor(300.)) assert model[2].weight.allclose(torch.tensor(300.)) assert model[0].bias.allclose(torch.tensor(res)) assert model[2].bias.allclose(torch.tensor(res)) # test layer key with base class name model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Conv1d(1, 2, 1)) func = NormalInit(mean=300, std=1e-5, bias_prob=0.01, layer='_ConvNd') func(model) assert model[0].weight.allclose(torch.tensor(300.)) assert model[2].weight.allclose(torch.tensor(300.)) assert torch.all(model[0].bias == res) assert torch.all(model[2].bias == res) def test_truncnormalinit(): """test TruncNormalInit class.""" model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2)) func = TruncNormalInit( mean=100, std=1e-5, bias=200, a=0, b=200, layer=['Conv2d', 'Linear']) func(model) assert model[0].weight.allclose(torch.tensor(100.)) assert model[2].weight.allclose(torch.tensor(100.)) assert model[0].bias.allclose(torch.tensor(200.)) assert model[2].bias.allclose(torch.tensor(200.)) func = TruncNormalInit( mean=300, std=1e-5, a=100, b=400, bias_prob=0.01, layer=['Conv2d', 'Linear']) res = bias_init_with_prob(0.01) func(model) assert model[0].weight.allclose(torch.tensor(300.)) assert model[2].weight.allclose(torch.tensor(300.)) assert model[0].bias.allclose(torch.tensor(res)) assert model[2].bias.allclose(torch.tensor(res)) # test layer key with base class name model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Conv1d(1, 2, 1)) func = TruncNormalInit( mean=300, std=1e-5, a=100, b=400, bias_prob=0.01, layer='_ConvNd') func(model) assert model[0].weight.allclose(torch.tensor(300.)) assert model[2].weight.allclose(torch.tensor(300.)) assert torch.all(model[0].bias == res) assert torch.all(model[2].bias == res) def test_uniforminit(): """"test UniformInit class.""" model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2)) func = UniformInit(a=1, b=1, bias=2, layer=['Conv2d', 'Linear']) func(model) assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 1.)) assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 1.)) assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 2.)) assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, 2.)) func = UniformInit(a=100, b=100, layer=['Conv2d', 'Linear'], bias=10) func(model) assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 100.)) assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 100.)) assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 10.)) assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, 10.)) # test layer key with base class name model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Conv1d(1, 2, 1)) func = UniformInit(a=100, b=100, bias_prob=0.01, layer='_ConvNd') res = bias_init_with_prob(0.01) func(model) assert torch.all(model[0].weight == 100.) assert torch.all(model[2].weight == 100.) assert torch.all(model[0].bias == res) assert torch.all(model[2].bias == res) def test_kaiminginit(): """test KaimingInit class.""" model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2)) func = KaimingInit(bias=0.1, layer='Conv2d') func(model) assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 0.1)) assert not torch.equal(model[2].bias, torch.full(model[2].bias.shape, 0.1)) func = KaimingInit(a=100, bias=10, layer=['Conv2d', 'Linear']) constant_func = ConstantInit(val=0, bias=0, layer=['Conv2d', 'Linear']) model.apply(constant_func) assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 0.)) assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 0.)) assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 0.)) assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, 0.)) func(model) assert not torch.equal(model[0].weight, torch.full(model[0].weight.shape, 0.)) assert not torch.equal(model[2].weight, torch.full(model[2].weight.shape, 0.)) assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 10.)) assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, 10.)) # test layer key with base class name model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Conv1d(1, 2, 1)) func = KaimingInit(bias=0.1, layer='_ConvNd') func(model) assert torch.all(model[0].bias == 0.1) assert torch.all(model[2].bias == 0.1) func = KaimingInit(a=100, bias=10, layer='_ConvNd') constant_func = ConstantInit(val=0, bias=0, layer='_ConvNd') model.apply(constant_func) assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 0.)) assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 0.)) assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 0.)) assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, 0.)) func(model) assert not torch.equal(model[0].weight, torch.full(model[0].weight.shape, 0.)) assert not torch.equal(model[2].weight, torch.full(model[2].weight.shape, 0.)) assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 10.)) assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, 10.)) def test_caffe2xavierinit(): """test Caffe2XavierInit.""" model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2)) func = Caffe2XavierInit(bias=0.1, layer='Conv2d') func(model) assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 0.1)) assert not torch.equal(model[2].bias, torch.full(model[2].bias.shape, 0.1)) class FooModule(nn.Module): def __init__(self): super().__init__() self.linear = nn.Linear(1, 2) self.conv2d = nn.Conv2d(3, 1, 3) self.conv2d_2 = nn.Conv2d(3, 2, 3) def test_pretrainedinit(): """test PretrainedInit class.""" modelA = FooModule() constant_func = ConstantInit(val=1, bias=2, layer=['Conv2d', 'Linear']) modelA.apply(constant_func) modelB = FooModule() funcB = PretrainedInit(checkpoint='modelA.pth') modelC = nn.Linear(1, 2) funcC = PretrainedInit(checkpoint='modelA.pth', prefix='linear.') with TemporaryDirectory(): torch.save(modelA.state_dict(), 'modelA.pth') funcB(modelB) assert torch.equal(modelB.linear.weight, torch.full(modelB.linear.weight.shape, 1.)) assert torch.equal(modelB.linear.bias, torch.full(modelB.linear.bias.shape, 2.)) assert torch.equal(modelB.conv2d.weight, torch.full(modelB.conv2d.weight.shape, 1.)) assert torch.equal(modelB.conv2d.bias, torch.full(modelB.conv2d.bias.shape, 2.)) assert torch.equal(modelB.conv2d_2.weight, torch.full(modelB.conv2d_2.weight.shape, 1.)) assert torch.equal(modelB.conv2d_2.bias, torch.full(modelB.conv2d_2.bias.shape, 2.)) funcC(modelC) assert torch.equal(modelC.weight, torch.full(modelC.weight.shape, 1.)) assert torch.equal(modelC.bias, torch.full(modelC.bias.shape, 2.)) def test_initialize(): model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2)) foonet = FooModule() # test layer key init_cfg = dict(type='Constant', layer=['Conv2d', 'Linear'], val=1, bias=2) initialize(model, init_cfg) assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 1.)) assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 1.)) assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 2.)) assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, 2.)) assert init_cfg == dict( type='Constant', layer=['Conv2d', 'Linear'], val=1, bias=2) # test init_cfg with list type init_cfg = [ dict(type='Constant', layer='Conv2d', val=1, bias=2), dict(type='Constant', layer='Linear', val=3, bias=4) ] initialize(model, init_cfg) assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 1.)) assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 3.)) assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 2.)) assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, 4.)) assert init_cfg == [ dict(type='Constant', layer='Conv2d', val=1, bias=2), dict(type='Constant', layer='Linear', val=3, bias=4) ] # test layer key and override key init_cfg = dict( type='Constant', val=1, bias=2, layer=['Conv2d', 'Linear'], override=dict(type='Constant', name='conv2d_2', val=3, bias=4)) initialize(foonet, init_cfg) assert torch.equal(foonet.linear.weight, torch.full(foonet.linear.weight.shape, 1.)) assert torch.equal(foonet.linear.bias, torch.full(foonet.linear.bias.shape, 2.)) assert torch.equal(foonet.conv2d.weight, torch.full(foonet.conv2d.weight.shape, 1.)) assert torch.equal(foonet.conv2d.bias, torch.full(foonet.conv2d.bias.shape, 2.)) assert torch.equal(foonet.conv2d_2.weight, torch.full(foonet.conv2d_2.weight.shape, 3.)) assert torch.equal(foonet.conv2d_2.bias, torch.full(foonet.conv2d_2.bias.shape, 4.)) assert init_cfg == dict( type='Constant', val=1, bias=2, layer=['Conv2d', 'Linear'], override=dict(type='Constant', name='conv2d_2', val=3, bias=4)) # test override key init_cfg = dict( type='Constant', val=5, bias=6, override=dict(name='conv2d_2')) initialize(foonet, init_cfg) assert not torch.equal(foonet.linear.weight, torch.full(foonet.linear.weight.shape, 5.)) assert not torch.equal(foonet.linear.bias, torch.full(foonet.linear.bias.shape, 6.)) assert not torch.equal(foonet.conv2d.weight, torch.full(foonet.conv2d.weight.shape, 5.)) assert not torch.equal(foonet.conv2d.bias, torch.full(foonet.conv2d.bias.shape, 6.)) assert torch.equal(foonet.conv2d_2.weight, torch.full(foonet.conv2d_2.weight.shape, 5.)) assert torch.equal(foonet.conv2d_2.bias, torch.full(foonet.conv2d_2.bias.shape, 6.)) assert init_cfg == dict( type='Constant', val=5, bias=6, override=dict(name='conv2d_2')) init_cfg = dict( type='Pretrained', checkpoint='modelA.pth', override=dict(type='Constant', name='conv2d_2', val=3, bias=4)) modelA = FooModule() constant_func = ConstantInit(val=1, bias=2, layer=['Conv2d', 'Linear']) modelA.apply(constant_func) with TemporaryDirectory(): torch.save(modelA.state_dict(), 'modelA.pth') initialize(foonet, init_cfg) assert torch.equal(foonet.linear.weight, torch.full(foonet.linear.weight.shape, 1.)) assert torch.equal(foonet.linear.bias, torch.full(foonet.linear.bias.shape, 2.)) assert torch.equal(foonet.conv2d.weight, torch.full(foonet.conv2d.weight.shape, 1.)) assert torch.equal(foonet.conv2d.bias, torch.full(foonet.conv2d.bias.shape, 2.)) assert torch.equal(foonet.conv2d_2.weight, torch.full(foonet.conv2d_2.weight.shape, 3.)) assert torch.equal(foonet.conv2d_2.bias, torch.full(foonet.conv2d_2.bias.shape, 4.)) assert init_cfg == dict( type='Pretrained', checkpoint='modelA.pth', override=dict(type='Constant', name='conv2d_2', val=3, bias=4)) # test init_cfg type with pytest.raises(TypeError): init_cfg = 'init_cfg' initialize(foonet, init_cfg) # test override value type with pytest.raises(TypeError): init_cfg = dict( type='Constant', val=1, bias=2, layer=['Conv2d', 'Linear'], override='conv') initialize(foonet, init_cfg) # test override name with pytest.raises(RuntimeError): init_cfg = dict( type='Constant', val=1, bias=2, layer=['Conv2d', 'Linear'], override=dict(type='Constant', name='conv2d_3', val=3, bias=4)) initialize(foonet, init_cfg) # test list override name with pytest.raises(RuntimeError): init_cfg = dict( type='Constant', val=1, bias=2, layer=['Conv2d', 'Linear'], override=[ dict(type='Constant', name='conv2d', val=3, bias=4), dict(type='Constant', name='conv2d_3', val=5, bias=6) ]) initialize(foonet, init_cfg) # test override with args except type key with pytest.raises(ValueError): init_cfg = dict( type='Constant', val=1, bias=2, override=dict(name='conv2d_2', val=3, bias=4)) initialize(foonet, init_cfg) # test override without name with pytest.raises(ValueError): init_cfg = dict( type='Constant', val=1, bias=2, override=dict(type='Constant', val=3, bias=4)) initialize(foonet, init_cfg) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_wrappers.py ================================================ from unittest.mock import patch import pytest import torch import torch.nn as nn from mmcv.cnn.bricks import (Conv2d, Conv3d, ConvTranspose2d, ConvTranspose3d, Linear, MaxPool2d, MaxPool3d) if torch.__version__ != 'parrots': torch_version = '1.1' else: torch_version = 'parrots' @patch('torch.__version__', torch_version) @pytest.mark.parametrize( 'in_w,in_h,in_channel,out_channel,kernel_size,stride,padding,dilation', [(10, 10, 1, 1, 3, 1, 0, 1), (20, 20, 3, 3, 5, 2, 1, 2)]) def test_conv2d(in_w, in_h, in_channel, out_channel, kernel_size, stride, padding, dilation): """ CommandLine: xdoctest -m tests/test_wrappers.py test_conv2d """ # train mode # wrapper op with 0-dim input x_empty = torch.randn(0, in_channel, in_h, in_w) torch.manual_seed(0) wrapper = Conv2d( in_channel, out_channel, kernel_size, stride=stride, padding=padding, dilation=dilation) wrapper_out = wrapper(x_empty) # torch op with 3-dim input as shape reference x_normal = torch.randn(3, in_channel, in_h, in_w).requires_grad_(True) torch.manual_seed(0) ref = nn.Conv2d( in_channel, out_channel, kernel_size, stride=stride, padding=padding, dilation=dilation) ref_out = ref(x_normal) assert wrapper_out.shape[0] == 0 assert wrapper_out.shape[1:] == ref_out.shape[1:] wrapper_out.sum().backward() assert wrapper.weight.grad is not None assert wrapper.weight.grad.shape == wrapper.weight.shape assert torch.equal(wrapper(x_normal), ref_out) # eval mode x_empty = torch.randn(0, in_channel, in_h, in_w) wrapper = Conv2d( in_channel, out_channel, kernel_size, stride=stride, padding=padding, dilation=dilation) wrapper.eval() wrapper(x_empty) @patch('torch.__version__', torch_version) @pytest.mark.parametrize( 'in_w,in_h,in_t,in_channel,out_channel,kernel_size,stride,padding,dilation', # noqa: E501 [(10, 10, 10, 1, 1, 3, 1, 0, 1), (20, 20, 20, 3, 3, 5, 2, 1, 2)]) def test_conv3d(in_w, in_h, in_t, in_channel, out_channel, kernel_size, stride, padding, dilation): """ CommandLine: xdoctest -m tests/test_wrappers.py test_conv3d """ # train mode # wrapper op with 0-dim input x_empty = torch.randn(0, in_channel, in_t, in_h, in_w) torch.manual_seed(0) wrapper = Conv3d( in_channel, out_channel, kernel_size, stride=stride, padding=padding, dilation=dilation) wrapper_out = wrapper(x_empty) # torch op with 3-dim input as shape reference x_normal = torch.randn(3, in_channel, in_t, in_h, in_w).requires_grad_(True) torch.manual_seed(0) ref = nn.Conv3d( in_channel, out_channel, kernel_size, stride=stride, padding=padding, dilation=dilation) ref_out = ref(x_normal) assert wrapper_out.shape[0] == 0 assert wrapper_out.shape[1:] == ref_out.shape[1:] wrapper_out.sum().backward() assert wrapper.weight.grad is not None assert wrapper.weight.grad.shape == wrapper.weight.shape assert torch.equal(wrapper(x_normal), ref_out) # eval mode x_empty = torch.randn(0, in_channel, in_t, in_h, in_w) wrapper = Conv3d( in_channel, out_channel, kernel_size, stride=stride, padding=padding, dilation=dilation) wrapper.eval() wrapper(x_empty) @patch('torch.__version__', torch_version) @pytest.mark.parametrize( 'in_w,in_h,in_channel,out_channel,kernel_size,stride,padding,dilation', [(10, 10, 1, 1, 3, 1, 0, 1), (20, 20, 3, 3, 5, 2, 1, 2)]) def test_conv_transposed_2d(in_w, in_h, in_channel, out_channel, kernel_size, stride, padding, dilation): # wrapper op with 0-dim input x_empty = torch.randn(0, in_channel, in_h, in_w, requires_grad=True) # out padding must be smaller than either stride or dilation op = min(stride, dilation) - 1 if torch.__version__ == 'parrots': op = 0 torch.manual_seed(0) wrapper = ConvTranspose2d( in_channel, out_channel, kernel_size, stride=stride, padding=padding, dilation=dilation, output_padding=op) wrapper_out = wrapper(x_empty) # torch op with 3-dim input as shape reference x_normal = torch.randn(3, in_channel, in_h, in_w) torch.manual_seed(0) ref = nn.ConvTranspose2d( in_channel, out_channel, kernel_size, stride=stride, padding=padding, dilation=dilation, output_padding=op) ref_out = ref(x_normal) assert wrapper_out.shape[0] == 0 assert wrapper_out.shape[1:] == ref_out.shape[1:] wrapper_out.sum().backward() assert wrapper.weight.grad is not None assert wrapper.weight.grad.shape == wrapper.weight.shape assert torch.equal(wrapper(x_normal), ref_out) # eval mode x_empty = torch.randn(0, in_channel, in_h, in_w) wrapper = ConvTranspose2d( in_channel, out_channel, kernel_size, stride=stride, padding=padding, dilation=dilation, output_padding=op) wrapper.eval() wrapper(x_empty) @patch('torch.__version__', torch_version) @pytest.mark.parametrize( 'in_w,in_h,in_t,in_channel,out_channel,kernel_size,stride,padding,dilation', # noqa: E501 [(10, 10, 10, 1, 1, 3, 1, 0, 1), (20, 20, 20, 3, 3, 5, 2, 1, 2)]) def test_conv_transposed_3d(in_w, in_h, in_t, in_channel, out_channel, kernel_size, stride, padding, dilation): # wrapper op with 0-dim input x_empty = torch.randn(0, in_channel, in_t, in_h, in_w, requires_grad=True) # out padding must be smaller than either stride or dilation op = min(stride, dilation) - 1 torch.manual_seed(0) wrapper = ConvTranspose3d( in_channel, out_channel, kernel_size, stride=stride, padding=padding, dilation=dilation, output_padding=op) wrapper_out = wrapper(x_empty) # torch op with 3-dim input as shape reference x_normal = torch.randn(3, in_channel, in_t, in_h, in_w) torch.manual_seed(0) ref = nn.ConvTranspose3d( in_channel, out_channel, kernel_size, stride=stride, padding=padding, dilation=dilation, output_padding=op) ref_out = ref(x_normal) assert wrapper_out.shape[0] == 0 assert wrapper_out.shape[1:] == ref_out.shape[1:] wrapper_out.sum().backward() assert wrapper.weight.grad is not None assert wrapper.weight.grad.shape == wrapper.weight.shape assert torch.equal(wrapper(x_normal), ref_out) # eval mode x_empty = torch.randn(0, in_channel, in_t, in_h, in_w) wrapper = ConvTranspose3d( in_channel, out_channel, kernel_size, stride=stride, padding=padding, dilation=dilation, output_padding=op) wrapper.eval() wrapper(x_empty) @patch('torch.__version__', torch_version) @pytest.mark.parametrize( 'in_w,in_h,in_channel,out_channel,kernel_size,stride,padding,dilation', [(10, 10, 1, 1, 3, 1, 0, 1), (20, 20, 3, 3, 5, 2, 1, 2)]) def test_max_pool_2d(in_w, in_h, in_channel, out_channel, kernel_size, stride, padding, dilation): # wrapper op with 0-dim input x_empty = torch.randn(0, in_channel, in_h, in_w, requires_grad=True) wrapper = MaxPool2d( kernel_size, stride=stride, padding=padding, dilation=dilation) wrapper_out = wrapper(x_empty) # torch op with 3-dim input as shape reference x_normal = torch.randn(3, in_channel, in_h, in_w) ref = nn.MaxPool2d( kernel_size, stride=stride, padding=padding, dilation=dilation) ref_out = ref(x_normal) assert wrapper_out.shape[0] == 0 assert wrapper_out.shape[1:] == ref_out.shape[1:] assert torch.equal(wrapper(x_normal), ref_out) @patch('torch.__version__', torch_version) @pytest.mark.parametrize( 'in_w,in_h,in_t,in_channel,out_channel,kernel_size,stride,padding,dilation', # noqa: E501 [(10, 10, 10, 1, 1, 3, 1, 0, 1), (20, 20, 20, 3, 3, 5, 2, 1, 2)]) @pytest.mark.skipif( torch.__version__ == 'parrots' and not torch.cuda.is_available(), reason='parrots requires CUDA support') def test_max_pool_3d(in_w, in_h, in_t, in_channel, out_channel, kernel_size, stride, padding, dilation): # wrapper op with 0-dim input x_empty = torch.randn(0, in_channel, in_t, in_h, in_w, requires_grad=True) wrapper = MaxPool3d( kernel_size, stride=stride, padding=padding, dilation=dilation) if torch.__version__ == 'parrots': x_empty = x_empty.cuda() wrapper_out = wrapper(x_empty) # torch op with 3-dim input as shape reference x_normal = torch.randn(3, in_channel, in_t, in_h, in_w) ref = nn.MaxPool3d( kernel_size, stride=stride, padding=padding, dilation=dilation) if torch.__version__ == 'parrots': x_normal = x_normal.cuda() ref_out = ref(x_normal) assert wrapper_out.shape[0] == 0 assert wrapper_out.shape[1:] == ref_out.shape[1:] assert torch.equal(wrapper(x_normal), ref_out) @patch('torch.__version__', torch_version) @pytest.mark.parametrize('in_w,in_h,in_feature,out_feature', [(10, 10, 1, 1), (20, 20, 3, 3)]) def test_linear(in_w, in_h, in_feature, out_feature): # wrapper op with 0-dim input x_empty = torch.randn(0, in_feature, requires_grad=True) torch.manual_seed(0) wrapper = Linear(in_feature, out_feature) wrapper_out = wrapper(x_empty) # torch op with 3-dim input as shape reference x_normal = torch.randn(3, in_feature) torch.manual_seed(0) ref = nn.Linear(in_feature, out_feature) ref_out = ref(x_normal) assert wrapper_out.shape[0] == 0 assert wrapper_out.shape[1:] == ref_out.shape[1:] wrapper_out.sum().backward() assert wrapper.weight.grad is not None assert wrapper.weight.grad.shape == wrapper.weight.shape assert torch.equal(wrapper(x_normal), ref_out) # eval mode x_empty = torch.randn(0, in_feature) wrapper = Linear(in_feature, out_feature) wrapper.eval() wrapper(x_empty) @patch('mmcv.cnn.bricks.wrappers.TORCH_VERSION', (1, 10)) def test_nn_op_forward_called(): for m in ['Conv2d', 'ConvTranspose2d', 'MaxPool2d']: with patch(f'torch.nn.{m}.forward') as nn_module_forward: # randn input x_empty = torch.randn(0, 3, 10, 10) wrapper = eval(m)(3, 2, 1) wrapper(x_empty) nn_module_forward.assert_called_with(x_empty) # non-randn input x_normal = torch.randn(1, 3, 10, 10) wrapper = eval(m)(3, 2, 1) wrapper(x_normal) nn_module_forward.assert_called_with(x_normal) for m in ['Conv3d', 'ConvTranspose3d', 'MaxPool3d']: with patch(f'torch.nn.{m}.forward') as nn_module_forward: # randn input x_empty = torch.randn(0, 3, 10, 10, 10) wrapper = eval(m)(3, 2, 1) wrapper(x_empty) nn_module_forward.assert_called_with(x_empty) # non-randn input x_normal = torch.randn(1, 3, 10, 10, 10) wrapper = eval(m)(3, 2, 1) wrapper(x_normal) nn_module_forward.assert_called_with(x_normal) with patch('torch.nn.Linear.forward') as nn_module_forward: # randn input x_empty = torch.randn(0, 3) wrapper = Linear(3, 3) wrapper(x_empty) nn_module_forward.assert_called_with(x_empty) # non-randn input x_normal = torch.randn(1, 3) wrapper = Linear(3, 3) wrapper(x_normal) nn_module_forward.assert_called_with(x_normal) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_fileclient.py ================================================ import os import os.path as osp import sys import tempfile from contextlib import contextmanager from copy import deepcopy from pathlib import Path from unittest.mock import MagicMock, patch import pytest import mmcv from mmcv import BaseStorageBackend, FileClient from mmcv.utils import has_method sys.modules['ceph'] = MagicMock() sys.modules['petrel_client'] = MagicMock() sys.modules['petrel_client.client'] = MagicMock() sys.modules['mc'] = MagicMock() @contextmanager def build_temporary_directory(): """Build a temporary directory containing many files to test ``FileClient.list_dir_or_file``. . \n | -- dir1 \n | -- | -- text3.txt \n | -- dir2 \n | -- | -- dir3 \n | -- | -- | -- text4.txt \n | -- | -- img.jpg \n | -- text1.txt \n | -- text2.txt \n """ with tempfile.TemporaryDirectory() as tmp_dir: text1 = Path(tmp_dir) / 'text1.txt' text1.open('w').write('text1') text2 = Path(tmp_dir) / 'text2.txt' text2.open('w').write('text2') dir1 = Path(tmp_dir) / 'dir1' dir1.mkdir() text3 = dir1 / 'text3.txt' text3.open('w').write('text3') dir2 = Path(tmp_dir) / 'dir2' dir2.mkdir() jpg1 = dir2 / 'img.jpg' jpg1.open('wb').write(b'img') dir3 = dir2 / 'dir3' dir3.mkdir() text4 = dir3 / 'text4.txt' text4.open('w').write('text4') yield tmp_dir @contextmanager def delete_and_reset_method(obj, method): method_obj = deepcopy(getattr(type(obj), method)) try: delattr(type(obj), method) yield finally: setattr(type(obj), method, method_obj) class MockS3Client: def __init__(self, enable_mc=True): self.enable_mc = enable_mc def Get(self, filepath): with open(filepath, 'rb') as f: content = f.read() return content class MockPetrelClient: def __init__(self, enable_mc=True, enable_multi_cluster=False): self.enable_mc = enable_mc self.enable_multi_cluster = enable_multi_cluster def Get(self, filepath): with open(filepath, 'rb') as f: content = f.read() return content def put(self): pass def delete(self): pass def contains(self): pass def isdir(self): pass def list(self, dir_path): for entry in os.scandir(dir_path): if not entry.name.startswith('.') and entry.is_file(): yield entry.name elif osp.isdir(entry.path): yield entry.name + '/' class MockMemcachedClient: def __init__(self, server_list_cfg, client_cfg): pass def Get(self, filepath, buffer): with open(filepath, 'rb') as f: buffer.content = f.read() class TestFileClient: @classmethod def setup_class(cls): cls.test_data_dir = Path(__file__).parent / 'data' cls.img_path = cls.test_data_dir / 'color.jpg' cls.img_shape = (300, 400, 3) cls.text_path = cls.test_data_dir / 'filelist.txt' def test_error(self): with pytest.raises(ValueError): FileClient('hadoop') def test_disk_backend(self): disk_backend = FileClient('disk') # test `name` attribute assert disk_backend.name == 'HardDiskBackend' # test `allow_symlink` attribute assert disk_backend.allow_symlink # test `get` # input path is Path object img_bytes = disk_backend.get(self.img_path) img = mmcv.imfrombytes(img_bytes) assert self.img_path.open('rb').read() == img_bytes assert img.shape == self.img_shape # input path is str img_bytes = disk_backend.get(str(self.img_path)) img = mmcv.imfrombytes(img_bytes) assert self.img_path.open('rb').read() == img_bytes assert img.shape == self.img_shape # test `get_text` # input path is Path object value_buf = disk_backend.get_text(self.text_path) assert self.text_path.open('r').read() == value_buf # input path is str value_buf = disk_backend.get_text(str(self.text_path)) assert self.text_path.open('r').read() == value_buf with tempfile.TemporaryDirectory() as tmp_dir: # test `put` filepath1 = Path(tmp_dir) / 'test.jpg' disk_backend.put(b'disk', filepath1) assert filepath1.open('rb').read() == b'disk' # test the `mkdir_or_exist` behavior in `put` _filepath1 = Path(tmp_dir) / 'not_existed_dir1' / 'test.jpg' disk_backend.put(b'disk', _filepath1) assert _filepath1.open('rb').read() == b'disk' # test `put_text` filepath2 = Path(tmp_dir) / 'test.txt' disk_backend.put_text('disk', filepath2) assert filepath2.open('r').read() == 'disk' # test the `mkdir_or_exist` behavior in `put_text` _filepath2 = Path(tmp_dir) / 'not_existed_dir2' / 'test.txt' disk_backend.put_text('disk', _filepath2) assert _filepath2.open('r').read() == 'disk' # test `isfile` assert disk_backend.isfile(filepath2) assert not disk_backend.isfile(Path(tmp_dir) / 'not/existed/path') # test `remove` disk_backend.remove(filepath2) # test `exists` assert not disk_backend.exists(filepath2) # test `get_local_path` # if the backend is disk, `get_local_path` just return the input with disk_backend.get_local_path(filepath1) as path: assert str(filepath1) == path assert osp.isfile(filepath1) # test `join_path` disk_dir = '/path/of/your/directory' assert disk_backend.join_path(disk_dir, 'file') == \ osp.join(disk_dir, 'file') assert disk_backend.join_path(disk_dir, 'dir', 'file') == \ osp.join(disk_dir, 'dir', 'file') # test `list_dir_or_file` with build_temporary_directory() as tmp_dir: # 1. list directories and files assert set(disk_backend.list_dir_or_file(tmp_dir)) == set( ['dir1', 'dir2', 'text1.txt', 'text2.txt']) # 2. list directories and files recursively assert set(disk_backend.list_dir_or_file( tmp_dir, recursive=True)) == set([ 'dir1', osp.join('dir1', 'text3.txt'), 'dir2', osp.join('dir2', 'dir3'), osp.join('dir2', 'dir3', 'text4.txt'), osp.join('dir2', 'img.jpg'), 'text1.txt', 'text2.txt' ]) # 3. only list directories assert set( disk_backend.list_dir_or_file( tmp_dir, list_file=False)) == set(['dir1', 'dir2']) with pytest.raises( TypeError, match='`suffix` should be None when `list_dir` is True'): # Exception is raised among the `list_dir_or_file` of client, # so we need to invode the client to trigger the exception disk_backend.client.list_dir_or_file( tmp_dir, list_file=False, suffix='.txt') # 4. only list directories recursively assert set( disk_backend.list_dir_or_file( tmp_dir, list_file=False, recursive=True)) == set( ['dir1', 'dir2', osp.join('dir2', 'dir3')]) # 5. only list files assert set(disk_backend.list_dir_or_file( tmp_dir, list_dir=False)) == set(['text1.txt', 'text2.txt']) # 6. only list files recursively assert set( disk_backend.list_dir_or_file( tmp_dir, list_dir=False, recursive=True)) == set([ osp.join('dir1', 'text3.txt'), osp.join('dir2', 'dir3', 'text4.txt'), osp.join('dir2', 'img.jpg'), 'text1.txt', 'text2.txt' ]) # 7. only list files ending with suffix assert set( disk_backend.list_dir_or_file( tmp_dir, list_dir=False, suffix='.txt')) == set(['text1.txt', 'text2.txt']) assert set( disk_backend.list_dir_or_file( tmp_dir, list_dir=False, suffix=('.txt', '.jpg'))) == set(['text1.txt', 'text2.txt']) with pytest.raises( TypeError, match='`suffix` must be a string or tuple of strings'): disk_backend.client.list_dir_or_file( tmp_dir, list_dir=False, suffix=['.txt', '.jpg']) # 8. only list files ending with suffix recursively assert set( disk_backend.list_dir_or_file( tmp_dir, list_dir=False, suffix='.txt', recursive=True)) == set([ osp.join('dir1', 'text3.txt'), osp.join('dir2', 'dir3', 'text4.txt'), 'text1.txt', 'text2.txt' ]) # 7. only list files ending with suffix assert set( disk_backend.list_dir_or_file( tmp_dir, list_dir=False, suffix=('.txt', '.jpg'), recursive=True)) == set([ osp.join('dir1', 'text3.txt'), osp.join('dir2', 'dir3', 'text4.txt'), osp.join('dir2', 'img.jpg'), 'text1.txt', 'text2.txt' ]) @patch('ceph.S3Client', MockS3Client) def test_ceph_backend(self): ceph_backend = FileClient('ceph') # test `allow_symlink` attribute assert not ceph_backend.allow_symlink # input path is Path object with pytest.raises(NotImplementedError): ceph_backend.get_text(self.text_path) # input path is str with pytest.raises(NotImplementedError): ceph_backend.get_text(str(self.text_path)) # input path is Path object img_bytes = ceph_backend.get(self.img_path) img = mmcv.imfrombytes(img_bytes) assert img.shape == self.img_shape # input path is str img_bytes = ceph_backend.get(str(self.img_path)) img = mmcv.imfrombytes(img_bytes) assert img.shape == self.img_shape # `path_mapping` is either None or dict with pytest.raises(AssertionError): FileClient('ceph', path_mapping=1) # test `path_mapping` ceph_path = 's3://user/data' ceph_backend = FileClient( 'ceph', path_mapping={str(self.test_data_dir): ceph_path}) ceph_backend.client._client.Get = MagicMock( return_value=ceph_backend.client._client.Get(self.img_path)) img_bytes = ceph_backend.get(self.img_path) img = mmcv.imfrombytes(img_bytes) assert img.shape == self.img_shape ceph_backend.client._client.Get.assert_called_with( str(self.img_path).replace(str(self.test_data_dir), ceph_path)) @patch('petrel_client.client.Client', MockPetrelClient) @pytest.mark.parametrize('backend,prefix', [('petrel', None), (None, 's3')]) def test_petrel_backend(self, backend, prefix): petrel_backend = FileClient(backend=backend, prefix=prefix) # test `allow_symlink` attribute assert not petrel_backend.allow_symlink # input path is Path object img_bytes = petrel_backend.get(self.img_path) img = mmcv.imfrombytes(img_bytes) assert img.shape == self.img_shape # input path is str img_bytes = petrel_backend.get(str(self.img_path)) img = mmcv.imfrombytes(img_bytes) assert img.shape == self.img_shape # `path_mapping` is either None or dict with pytest.raises(AssertionError): FileClient('petrel', path_mapping=1) # test `_map_path` petrel_dir = 's3://user/data' petrel_backend = FileClient( 'petrel', path_mapping={str(self.test_data_dir): petrel_dir}) assert petrel_backend.client._map_path(str(self.img_path)) == \ str(self.img_path).replace(str(self.test_data_dir), petrel_dir) petrel_path = f'{petrel_dir}/test.jpg' petrel_backend = FileClient('petrel') # test `_format_path` assert petrel_backend.client._format_path('s3://user\\data\\test.jpg')\ == petrel_path # test `get` with patch.object( petrel_backend.client._client, 'Get', return_value=b'petrel') as mock_get: assert petrel_backend.get(petrel_path) == b'petrel' mock_get.assert_called_once_with(petrel_path) # test `get_text` with patch.object( petrel_backend.client._client, 'Get', return_value=b'petrel') as mock_get: assert petrel_backend.get_text(petrel_path) == 'petrel' mock_get.assert_called_once_with(petrel_path) # test `put` with patch.object(petrel_backend.client._client, 'put') as mock_put: petrel_backend.put(b'petrel', petrel_path) mock_put.assert_called_once_with(petrel_path, b'petrel') # test `put_text` with patch.object(petrel_backend.client._client, 'put') as mock_put: petrel_backend.put_text('petrel', petrel_path) mock_put.assert_called_once_with(petrel_path, b'petrel') # test `remove` assert has_method(petrel_backend.client._client, 'delete') # raise Exception if `delete` is not implemented with delete_and_reset_method(petrel_backend.client._client, 'delete'): assert not has_method(petrel_backend.client._client, 'delete') with pytest.raises(NotImplementedError): petrel_backend.remove(petrel_path) with patch.object(petrel_backend.client._client, 'delete') as mock_delete: petrel_backend.remove(petrel_path) mock_delete.assert_called_once_with(petrel_path) # test `exists` assert has_method(petrel_backend.client._client, 'contains') assert has_method(petrel_backend.client._client, 'isdir') # raise Exception if `delete` is not implemented with delete_and_reset_method(petrel_backend.client._client, 'contains'), delete_and_reset_method( petrel_backend.client._client, 'isdir'): assert not has_method(petrel_backend.client._client, 'contains') assert not has_method(petrel_backend.client._client, 'isdir') with pytest.raises(NotImplementedError): petrel_backend.exists(petrel_path) with patch.object( petrel_backend.client._client, 'contains', return_value=True) as mock_contains: assert petrel_backend.exists(petrel_path) mock_contains.assert_called_once_with(petrel_path) # test `isdir` assert has_method(petrel_backend.client._client, 'isdir') with delete_and_reset_method(petrel_backend.client._client, 'isdir'): assert not has_method(petrel_backend.client._client, 'isdir') with pytest.raises(NotImplementedError): petrel_backend.isdir(petrel_path) with patch.object( petrel_backend.client._client, 'isdir', return_value=True) as mock_isdir: assert petrel_backend.isdir(petrel_dir) mock_isdir.assert_called_once_with(petrel_dir) # test `isfile` assert has_method(petrel_backend.client._client, 'contains') with delete_and_reset_method(petrel_backend.client._client, 'contains'): assert not has_method(petrel_backend.client._client, 'contains') with pytest.raises(NotImplementedError): petrel_backend.isfile(petrel_path) with patch.object( petrel_backend.client._client, 'contains', return_value=True) as mock_contains: assert petrel_backend.isfile(petrel_path) mock_contains.assert_called_once_with(petrel_path) # test `join_path` assert petrel_backend.join_path(petrel_dir, 'file') == \ f'{petrel_dir}/file' assert petrel_backend.join_path(f'{petrel_dir}/', 'file') == \ f'{petrel_dir}/file' assert petrel_backend.join_path(petrel_dir, 'dir', 'file') == \ f'{petrel_dir}/dir/file' # test `get_local_path` with patch.object(petrel_backend.client._client, 'Get', return_value=b'petrel') as mock_get, \ patch.object(petrel_backend.client._client, 'contains', return_value=True) as mock_contains: with petrel_backend.get_local_path(petrel_path) as path: assert Path(path).open('rb').read() == b'petrel' # exist the with block and path will be released assert not osp.isfile(path) mock_get.assert_called_once_with(petrel_path) mock_contains.assert_called_once_with(petrel_path) # test `list_dir_or_file` assert has_method(petrel_backend.client._client, 'list') with delete_and_reset_method(petrel_backend.client._client, 'list'): assert not has_method(petrel_backend.client._client, 'list') with pytest.raises(NotImplementedError): list(petrel_backend.list_dir_or_file(petrel_dir)) with build_temporary_directory() as tmp_dir: # 1. list directories and files assert set(petrel_backend.list_dir_or_file(tmp_dir)) == set( ['dir1', 'dir2', 'text1.txt', 'text2.txt']) # 2. list directories and files recursively assert set( petrel_backend.list_dir_or_file( tmp_dir, recursive=True)) == set([ 'dir1', '/'.join(('dir1', 'text3.txt')), 'dir2', '/'.join(('dir2', 'dir3')), '/'.join( ('dir2', 'dir3', 'text4.txt')), '/'.join( ('dir2', 'img.jpg')), 'text1.txt', 'text2.txt' ]) # 3. only list directories assert set( petrel_backend.list_dir_or_file( tmp_dir, list_file=False)) == set(['dir1', 'dir2']) with pytest.raises( TypeError, match=('`list_dir` should be False when `suffix` is not ' 'None')): # Exception is raised among the `list_dir_or_file` of client, # so we need to invode the client to trigger the exception petrel_backend.client.list_dir_or_file( tmp_dir, list_file=False, suffix='.txt') # 4. only list directories recursively assert set( petrel_backend.list_dir_or_file( tmp_dir, list_file=False, recursive=True)) == set( ['dir1', 'dir2', '/'.join(('dir2', 'dir3'))]) # 5. only list files assert set( petrel_backend.list_dir_or_file(tmp_dir, list_dir=False)) == set( ['text1.txt', 'text2.txt']) # 6. only list files recursively assert set( petrel_backend.list_dir_or_file( tmp_dir, list_dir=False, recursive=True)) == set([ '/'.join(('dir1', 'text3.txt')), '/'.join( ('dir2', 'dir3', 'text4.txt')), '/'.join( ('dir2', 'img.jpg')), 'text1.txt', 'text2.txt' ]) # 7. only list files ending with suffix assert set( petrel_backend.list_dir_or_file( tmp_dir, list_dir=False, suffix='.txt')) == set(['text1.txt', 'text2.txt']) assert set( petrel_backend.list_dir_or_file( tmp_dir, list_dir=False, suffix=('.txt', '.jpg'))) == set(['text1.txt', 'text2.txt']) with pytest.raises( TypeError, match='`suffix` must be a string or tuple of strings'): petrel_backend.client.list_dir_or_file( tmp_dir, list_dir=False, suffix=['.txt', '.jpg']) # 8. only list files ending with suffix recursively assert set( petrel_backend.list_dir_or_file( tmp_dir, list_dir=False, suffix='.txt', recursive=True)) == set([ '/'.join(('dir1', 'text3.txt')), '/'.join( ('dir2', 'dir3', 'text4.txt')), 'text1.txt', 'text2.txt' ]) # 7. only list files ending with suffix assert set( petrel_backend.list_dir_or_file( tmp_dir, list_dir=False, suffix=('.txt', '.jpg'), recursive=True)) == set([ '/'.join(('dir1', 'text3.txt')), '/'.join( ('dir2', 'dir3', 'text4.txt')), '/'.join( ('dir2', 'img.jpg')), 'text1.txt', 'text2.txt' ]) @patch('mc.MemcachedClient.GetInstance', MockMemcachedClient) @patch('mc.pyvector', MagicMock) @patch('mc.ConvertBuffer', lambda x: x.content) def test_memcached_backend(self): mc_cfg = dict(server_list_cfg='', client_cfg='', sys_path=None) mc_backend = FileClient('memcached', **mc_cfg) # test `allow_symlink` attribute assert not mc_backend.allow_symlink # input path is Path object with pytest.raises(NotImplementedError): mc_backend.get_text(self.text_path) # input path is str with pytest.raises(NotImplementedError): mc_backend.get_text(str(self.text_path)) # input path is Path object img_bytes = mc_backend.get(self.img_path) img = mmcv.imfrombytes(img_bytes) assert img.shape == self.img_shape # input path is str img_bytes = mc_backend.get(str(self.img_path)) img = mmcv.imfrombytes(img_bytes) assert img.shape == self.img_shape def test_lmdb_backend(self): lmdb_path = self.test_data_dir / 'demo.lmdb' # db_path is Path object lmdb_backend = FileClient('lmdb', db_path=lmdb_path) # test `allow_symlink` attribute assert not lmdb_backend.allow_symlink with pytest.raises(NotImplementedError): lmdb_backend.get_text(self.text_path) img_bytes = lmdb_backend.get('baboon') img = mmcv.imfrombytes(img_bytes) assert img.shape == (120, 125, 3) # db_path is str lmdb_backend = FileClient('lmdb', db_path=str(lmdb_path)) with pytest.raises(NotImplementedError): lmdb_backend.get_text(str(self.text_path)) img_bytes = lmdb_backend.get('baboon') img = mmcv.imfrombytes(img_bytes) assert img.shape == (120, 125, 3) @pytest.mark.parametrize('backend,prefix', [('http', None), (None, 'http')]) def test_http_backend(self, backend, prefix): http_backend = FileClient(backend=backend, prefix=prefix) img_url = 'https://raw.githubusercontent.com/open-mmlab/mmcv/' \ 'master/tests/data/color.jpg' text_url = 'https://raw.githubusercontent.com/open-mmlab/mmcv/' \ 'master/tests/data/filelist.txt' # test `allow_symlink` attribute assert not http_backend.allow_symlink # input is path or Path object with pytest.raises(Exception): http_backend.get(self.img_path) with pytest.raises(Exception): http_backend.get(str(self.img_path)) with pytest.raises(Exception): http_backend.get_text(self.text_path) with pytest.raises(Exception): http_backend.get_text(str(self.text_path)) # input url is http image img_bytes = http_backend.get(img_url) img = mmcv.imfrombytes(img_bytes) assert img.shape == self.img_shape # input url is http text value_buf = http_backend.get_text(text_url) assert self.text_path.open('r').read() == value_buf # test `_get_local_path` # exist the with block and path will be released with http_backend.get_local_path(img_url) as path: assert mmcv.imread(path).shape == self.img_shape assert not osp.isfile(path) def test_new_magic_method(self): class DummyBackend1(BaseStorageBackend): def get(self, filepath): return filepath def get_text(self, filepath, encoding='utf-8'): return filepath FileClient.register_backend('dummy_backend', DummyBackend1) client1 = FileClient(backend='dummy_backend') client2 = FileClient(backend='dummy_backend') assert client1 is client2 # if a backend is overwrote, it will disable the singleton pattern for # the backend class DummyBackend2(BaseStorageBackend): def get(self, filepath): pass def get_text(self, filepath): pass FileClient.register_backend('dummy_backend', DummyBackend2, force=True) client3 = FileClient(backend='dummy_backend') client4 = FileClient(backend='dummy_backend') assert client3 is not client4 def test_parse_uri_prefix(self): # input path is None with pytest.raises(AssertionError): FileClient.parse_uri_prefix(None) # input path is list with pytest.raises(AssertionError): FileClient.parse_uri_prefix([]) # input path is Path object assert FileClient.parse_uri_prefix(self.img_path) is None # input path is str assert FileClient.parse_uri_prefix(str(self.img_path)) is None # input path starts with https img_url = 'https://raw.githubusercontent.com/open-mmlab/mmcv/' \ 'master/tests/data/color.jpg' assert FileClient.parse_uri_prefix(img_url) == 'https' # input path starts with s3 img_url = 's3://your_bucket/img.png' assert FileClient.parse_uri_prefix(img_url) == 's3' # input path starts with clusterName:s3 img_url = 'clusterName:s3://your_bucket/img.png' assert FileClient.parse_uri_prefix(img_url) == 's3' def test_infer_client(self): # HardDiskBackend file_client_args = {'backend': 'disk'} client = FileClient.infer_client(file_client_args) assert client.name == 'HardDiskBackend' client = FileClient.infer_client(uri=self.img_path) assert client.name == 'HardDiskBackend' # PetrelBackend file_client_args = {'backend': 'petrel'} client = FileClient.infer_client(file_client_args) assert client.name == 'PetrelBackend' uri = 's3://user_data' client = FileClient.infer_client(uri=uri) assert client.name == 'PetrelBackend' def test_register_backend(self): # name must be a string with pytest.raises(TypeError): class TestClass1: pass FileClient.register_backend(1, TestClass1) # module must be a class with pytest.raises(TypeError): FileClient.register_backend('int', 0) # module must be a subclass of BaseStorageBackend with pytest.raises(TypeError): class TestClass1: pass FileClient.register_backend('TestClass1', TestClass1) class ExampleBackend(BaseStorageBackend): def get(self, filepath): return filepath def get_text(self, filepath, encoding='utf-8'): return filepath FileClient.register_backend('example', ExampleBackend) example_backend = FileClient('example') assert example_backend.get(self.img_path) == self.img_path assert example_backend.get_text(self.text_path) == self.text_path assert 'example' in FileClient._backends class Example2Backend(BaseStorageBackend): def get(self, filepath): return b'bytes2' def get_text(self, filepath, encoding='utf-8'): return 'text2' # force=False with pytest.raises(KeyError): FileClient.register_backend('example', Example2Backend) FileClient.register_backend('example', Example2Backend, force=True) example_backend = FileClient('example') assert example_backend.get(self.img_path) == b'bytes2' assert example_backend.get_text(self.text_path) == 'text2' @FileClient.register_backend(name='example3') class Example3Backend(BaseStorageBackend): def get(self, filepath): return b'bytes3' def get_text(self, filepath, encoding='utf-8'): return 'text3' example_backend = FileClient('example3') assert example_backend.get(self.img_path) == b'bytes3' assert example_backend.get_text(self.text_path) == 'text3' assert 'example3' in FileClient._backends # force=False with pytest.raises(KeyError): @FileClient.register_backend(name='example3') class Example4Backend(BaseStorageBackend): def get(self, filepath): return b'bytes4' def get_text(self, filepath, encoding='utf-8'): return 'text4' @FileClient.register_backend(name='example3', force=True) class Example5Backend(BaseStorageBackend): def get(self, filepath): return b'bytes5' def get_text(self, filepath, encoding='utf-8'): return 'text5' example_backend = FileClient('example3') assert example_backend.get(self.img_path) == b'bytes5' assert example_backend.get_text(self.text_path) == 'text5' # prefixes is a str class Example6Backend(BaseStorageBackend): def get(self, filepath): return b'bytes6' def get_text(self, filepath, encoding='utf-8'): return 'text6' FileClient.register_backend( 'example4', Example6Backend, force=True, prefixes='example4_prefix') example_backend = FileClient('example4') assert example_backend.get(self.img_path) == b'bytes6' assert example_backend.get_text(self.text_path) == 'text6' example_backend = FileClient(prefix='example4_prefix') assert example_backend.get(self.img_path) == b'bytes6' assert example_backend.get_text(self.text_path) == 'text6' example_backend = FileClient('example4', prefix='example4_prefix') assert example_backend.get(self.img_path) == b'bytes6' assert example_backend.get_text(self.text_path) == 'text6' # prefixes is a list of str class Example7Backend(BaseStorageBackend): def get(self, filepath): return b'bytes7' def get_text(self, filepath, encoding='utf-8'): return 'text7' FileClient.register_backend( 'example5', Example7Backend, force=True, prefixes=['example5_prefix1', 'example5_prefix2']) example_backend = FileClient('example5') assert example_backend.get(self.img_path) == b'bytes7' assert example_backend.get_text(self.text_path) == 'text7' example_backend = FileClient(prefix='example5_prefix1') assert example_backend.get(self.img_path) == b'bytes7' assert example_backend.get_text(self.text_path) == 'text7' example_backend = FileClient(prefix='example5_prefix2') assert example_backend.get(self.img_path) == b'bytes7' assert example_backend.get_text(self.text_path) == 'text7' # backend has a higher priority than prefixes class Example8Backend(BaseStorageBackend): def get(self, filepath): return b'bytes8' def get_text(self, filepath, encoding='utf-8'): return 'text8' FileClient.register_backend( 'example6', Example8Backend, force=True, prefixes='example6_prefix') example_backend = FileClient('example6') assert example_backend.get(self.img_path) == b'bytes8' assert example_backend.get_text(self.text_path) == 'text8' example_backend = FileClient('example6', prefix='example4_prefix') assert example_backend.get(self.img_path) == b'bytes8' assert example_backend.get_text(self.text_path) == 'text8' ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_fileio.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import os import os.path as osp import sys import tempfile from unittest.mock import MagicMock, patch import pytest import mmcv from mmcv.fileio.file_client import HTTPBackend, PetrelBackend sys.modules['petrel_client'] = MagicMock() sys.modules['petrel_client.client'] = MagicMock() def _test_handler(file_format, test_obj, str_checker, mode='r+'): # dump to a string dump_str = mmcv.dump(test_obj, file_format=file_format) str_checker(dump_str) # load/dump with filenames from disk tmp_filename = osp.join(tempfile.gettempdir(), 'mmcv_test_dump') mmcv.dump(test_obj, tmp_filename, file_format=file_format) assert osp.isfile(tmp_filename) load_obj = mmcv.load(tmp_filename, file_format=file_format) assert load_obj == test_obj os.remove(tmp_filename) # load/dump with filename from petrel method = 'put' if 'b' in mode else 'put_text' with patch.object(PetrelBackend, method, return_value=None) as mock_method: filename = 's3://path/of/your/file' mmcv.dump(test_obj, filename, file_format=file_format) mock_method.assert_called() # json load/dump with a file-like object with tempfile.NamedTemporaryFile(mode, delete=False) as f: tmp_filename = f.name mmcv.dump(test_obj, f, file_format=file_format) assert osp.isfile(tmp_filename) with open(tmp_filename, mode) as f: load_obj = mmcv.load(f, file_format=file_format) assert load_obj == test_obj os.remove(tmp_filename) # automatically inference the file format from the given filename tmp_filename = osp.join(tempfile.gettempdir(), 'mmcv_test_dump.' + file_format) mmcv.dump(test_obj, tmp_filename) assert osp.isfile(tmp_filename) load_obj = mmcv.load(tmp_filename) assert load_obj == test_obj os.remove(tmp_filename) obj_for_test = [{'a': 'abc', 'b': 1}, 2, 'c'] def test_json(): def json_checker(dump_str): assert dump_str in [ '[{"a": "abc", "b": 1}, 2, "c"]', '[{"b": 1, "a": "abc"}, 2, "c"]' ] _test_handler('json', obj_for_test, json_checker) def test_yaml(): def yaml_checker(dump_str): assert dump_str in [ '- {a: abc, b: 1}\n- 2\n- c\n', '- {b: 1, a: abc}\n- 2\n- c\n', '- a: abc\n b: 1\n- 2\n- c\n', '- b: 1\n a: abc\n- 2\n- c\n' ] _test_handler('yaml', obj_for_test, yaml_checker) def test_pickle(): def pickle_checker(dump_str): import pickle assert pickle.loads(dump_str) == obj_for_test _test_handler('pickle', obj_for_test, pickle_checker, mode='rb+') def test_exception(): test_obj = [{'a': 'abc', 'b': 1}, 2, 'c'] with pytest.raises(ValueError): mmcv.dump(test_obj) with pytest.raises(TypeError): mmcv.dump(test_obj, 'tmp.txt') def test_register_handler(): @mmcv.register_handler('txt') class TxtHandler1(mmcv.BaseFileHandler): def load_from_fileobj(self, file): return file.read() def dump_to_fileobj(self, obj, file): file.write(str(obj)) def dump_to_str(self, obj, **kwargs): return str(obj) @mmcv.register_handler(['txt1', 'txt2']) class TxtHandler2(mmcv.BaseFileHandler): def load_from_fileobj(self, file): return file.read() def dump_to_fileobj(self, obj, file): file.write('\n') file.write(str(obj)) def dump_to_str(self, obj, **kwargs): return str(obj) content = mmcv.load(osp.join(osp.dirname(__file__), 'data/filelist.txt')) assert content == '1.jpg\n2.jpg\n3.jpg\n4.jpg\n5.jpg' tmp_filename = osp.join(tempfile.gettempdir(), 'mmcv_test.txt2') mmcv.dump(content, tmp_filename) with open(tmp_filename, 'r') as f: written = f.read() os.remove(tmp_filename) assert written == '\n' + content def test_list_from_file(): # get list from disk filename = osp.join(osp.dirname(__file__), 'data/filelist.txt') filelist = mmcv.list_from_file(filename) assert filelist == ['1.jpg', '2.jpg', '3.jpg', '4.jpg', '5.jpg'] filelist = mmcv.list_from_file(filename, prefix='a/') assert filelist == ['a/1.jpg', 'a/2.jpg', 'a/3.jpg', 'a/4.jpg', 'a/5.jpg'] filelist = mmcv.list_from_file(filename, offset=2) assert filelist == ['3.jpg', '4.jpg', '5.jpg'] filelist = mmcv.list_from_file(filename, max_num=2) assert filelist == ['1.jpg', '2.jpg'] filelist = mmcv.list_from_file(filename, offset=3, max_num=3) assert filelist == ['4.jpg', '5.jpg'] # get list from http with patch.object( HTTPBackend, 'get_text', return_value='1.jpg\n2.jpg\n3.jpg'): filename = 'http://path/of/your/file' filelist = mmcv.list_from_file( filename, file_client_args={'backend': 'http'}) assert filelist == ['1.jpg', '2.jpg', '3.jpg'] filelist = mmcv.list_from_file( filename, file_client_args={'prefix': 'http'}) assert filelist == ['1.jpg', '2.jpg', '3.jpg'] filelist = mmcv.list_from_file(filename) assert filelist == ['1.jpg', '2.jpg', '3.jpg'] # get list from petrel with patch.object( PetrelBackend, 'get_text', return_value='1.jpg\n2.jpg\n3.jpg'): filename = 's3://path/of/your/file' filelist = mmcv.list_from_file( filename, file_client_args={'backend': 'petrel'}) assert filelist == ['1.jpg', '2.jpg', '3.jpg'] filelist = mmcv.list_from_file( filename, file_client_args={'prefix': 's3'}) assert filelist == ['1.jpg', '2.jpg', '3.jpg'] filelist = mmcv.list_from_file(filename) assert filelist == ['1.jpg', '2.jpg', '3.jpg'] def test_dict_from_file(): # get dict from disk filename = osp.join(osp.dirname(__file__), 'data/mapping.txt') mapping = mmcv.dict_from_file(filename) assert mapping == {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} mapping = mmcv.dict_from_file(filename, key_type=int) assert mapping == {1: 'cat', 2: ['dog', 'cow'], 3: 'panda'} # get dict from http with patch.object( HTTPBackend, 'get_text', return_value='1 cat\n2 dog cow\n3 panda'): filename = 'http://path/of/your/file' mapping = mmcv.dict_from_file( filename, file_client_args={'backend': 'http'}) assert mapping == {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} mapping = mmcv.dict_from_file( filename, file_client_args={'prefix': 'http'}) assert mapping == {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} mapping = mmcv.dict_from_file(filename) assert mapping == {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} # get dict from petrel with patch.object( PetrelBackend, 'get_text', return_value='1 cat\n2 dog cow\n3 panda'): filename = 's3://path/of/your/file' mapping = mmcv.dict_from_file( filename, file_client_args={'backend': 'petrel'}) assert mapping == {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} mapping = mmcv.dict_from_file( filename, file_client_args={'prefix': 's3'}) assert mapping == {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} mapping = mmcv.dict_from_file(filename) assert mapping == {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_image/test_colorspace.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import cv2 import numpy as np import pytest from numpy.testing import assert_array_almost_equal, assert_array_equal import mmcv from mmcv.image.colorspace import (_convert_input_type_range, _convert_output_type_range) def test_bgr2gray(): in_img = np.random.rand(10, 10, 3).astype(np.float32) out_img = mmcv.bgr2gray(in_img) computed_gray = ( in_img[:, :, 0] * 0.114 + in_img[:, :, 1] * 0.587 + in_img[:, :, 2] * 0.299) assert_array_almost_equal(out_img, computed_gray, decimal=4) out_img_3d = mmcv.bgr2gray(in_img, True) assert out_img_3d.shape == (10, 10, 1) assert_array_almost_equal(out_img_3d[..., 0], out_img, decimal=4) def test_rgb2gray(): in_img = np.random.rand(10, 10, 3).astype(np.float32) out_img = mmcv.rgb2gray(in_img) computed_gray = ( in_img[:, :, 0] * 0.299 + in_img[:, :, 1] * 0.587 + in_img[:, :, 2] * 0.114) assert_array_almost_equal(out_img, computed_gray, decimal=4) out_img_3d = mmcv.rgb2gray(in_img, True) assert out_img_3d.shape == (10, 10, 1) assert_array_almost_equal(out_img_3d[..., 0], out_img, decimal=4) def test_gray2bgr(): in_img = np.random.rand(10, 10).astype(np.float32) out_img = mmcv.gray2bgr(in_img) assert out_img.shape == (10, 10, 3) for i in range(3): assert_array_almost_equal(out_img[..., i], in_img, decimal=4) def test_gray2rgb(): in_img = np.random.rand(10, 10).astype(np.float32) out_img = mmcv.gray2rgb(in_img) assert out_img.shape == (10, 10, 3) for i in range(3): assert_array_almost_equal(out_img[..., i], in_img, decimal=4) def test_bgr2rgb(): in_img = np.random.rand(10, 10, 3).astype(np.float32) out_img = mmcv.bgr2rgb(in_img) assert out_img.shape == in_img.shape assert_array_equal(out_img[..., 0], in_img[..., 2]) assert_array_equal(out_img[..., 1], in_img[..., 1]) assert_array_equal(out_img[..., 2], in_img[..., 0]) def test_rgb2bgr(): in_img = np.random.rand(10, 10, 3).astype(np.float32) out_img = mmcv.rgb2bgr(in_img) assert out_img.shape == in_img.shape assert_array_equal(out_img[..., 0], in_img[..., 2]) assert_array_equal(out_img[..., 1], in_img[..., 1]) assert_array_equal(out_img[..., 2], in_img[..., 0]) def test_bgr2hsv(): in_img = np.random.rand(10, 10, 3).astype(np.float32) out_img = mmcv.bgr2hsv(in_img) argmax = in_img.argmax(axis=2) computed_hsv = np.empty_like(in_img) for i in range(in_img.shape[0]): for j in range(in_img.shape[1]): b, g, r = in_img[i, j] v = max(r, g, b) s = (v - min(r, g, b)) / v if v != 0 else 0 if argmax[i, j] == 0: h = 240 + 60 * (r - g) / (v - min(r, g, b)) elif argmax[i, j] == 1: h = 120 + 60 * (b - r) / (v - min(r, g, b)) else: h = 60 * (g - b) / (v - min(r, g, b)) if h < 0: h += 360 computed_hsv[i, j, :] = [h, s, v] assert_array_almost_equal(out_img, computed_hsv, decimal=2) def test_convert_input_type_range(): with pytest.raises(TypeError): # The img type should be np.float32 or np.uint8 in_img = np.random.rand(10, 10, 3).astype(np.uint64) _convert_input_type_range(in_img) # np.float32 in_img = np.random.rand(10, 10, 3).astype(np.float32) out_img = _convert_input_type_range(in_img) assert out_img.dtype == np.float32 assert np.absolute(out_img).mean() < 1 # np.uint8 in_img = (np.random.rand(10, 10, 3) * 255).astype(np.uint8) out_img = _convert_input_type_range(in_img) assert out_img.dtype == np.float32 assert np.absolute(out_img).mean() < 1 def test_convert_output_type_range(): with pytest.raises(TypeError): # The dst_type should be np.float32 or np.uint8 in_img = np.random.rand(10, 10, 3).astype(np.float32) _convert_output_type_range(in_img, np.uint64) # np.float32 in_img = (np.random.rand(10, 10, 3) * 255).astype(np.float32) out_img = _convert_output_type_range(in_img, np.float32) assert out_img.dtype == np.float32 assert np.absolute(out_img).mean() < 1 # np.uint8 in_img = (np.random.rand(10, 10, 3) * 255).astype(np.float32) out_img = _convert_output_type_range(in_img, np.uint8) assert out_img.dtype == np.uint8 assert np.absolute(out_img).mean() > 1 def assert_image_almost_equal(x, y, atol=1): assert x.dtype == np.uint8 assert y.dtype == np.uint8 assert np.all(np.abs(x.astype(np.int32) - y.astype(np.int32)) <= atol) def test_rgb2ycbcr(): with pytest.raises(TypeError): # The img type should be np.float32 or np.uint8 in_img = np.random.rand(10, 10, 3).astype(np.uint64) mmcv.rgb2ycbcr(in_img) # float32 in_img = np.random.rand(10, 10, 3).astype(np.float32) out_img = mmcv.rgb2ycbcr(in_img) computed_ycbcr = np.empty_like(in_img) for i in range(in_img.shape[0]): for j in range(in_img.shape[1]): r, g, b = in_img[i, j] y = 16 + r * 65.481 + g * 128.553 + b * 24.966 cb = 128 - r * 37.797 - g * 74.203 + b * 112.0 cr = 128 + r * 112.0 - g * 93.786 - b * 18.214 computed_ycbcr[i, j, :] = [y, cb, cr] computed_ycbcr /= 255. assert_array_almost_equal(out_img, computed_ycbcr, decimal=2) # y_only=True out_img = mmcv.rgb2ycbcr(in_img, y_only=True) computed_y = np.empty_like(out_img, dtype=out_img.dtype) for i in range(in_img.shape[0]): for j in range(in_img.shape[1]): r, g, b = in_img[i, j] y = 16 + r * 65.481 + g * 128.553 + b * 24.966 computed_y[i, j] = y computed_y /= 255. assert_array_almost_equal(out_img, computed_y, decimal=2) # uint8 in_img = (np.random.rand(10, 10, 3) * 255).astype(np.uint8) out_img = mmcv.rgb2ycbcr(in_img) computed_ycbcr = np.empty_like(in_img) in_img = in_img / 255. for i in range(in_img.shape[0]): for j in range(in_img.shape[1]): r, g, b = in_img[i, j] y = 16 + r * 65.481 + g * 128.553 + b * 24.966 cb = 128 - r * 37.797 - g * 74.203 + b * 112.0 cr = 128 + r * 112.0 - g * 93.786 - b * 18.214 y, cb, cr = y.round(), cb.round(), cr.round() computed_ycbcr[i, j, :] = [y, cb, cr] assert_image_almost_equal(out_img, computed_ycbcr) # y_only=True in_img = (np.random.rand(10, 10, 3) * 255).astype(np.uint8) out_img = mmcv.rgb2ycbcr(in_img, y_only=True) computed_y = np.empty_like(out_img, dtype=out_img.dtype) in_img = in_img / 255. for i in range(in_img.shape[0]): for j in range(in_img.shape[1]): r, g, b = in_img[i, j] y = 16 + r * 65.481 + g * 128.553 + b * 24.966 y = y.round() computed_y[i, j] = y assert_image_almost_equal(out_img, computed_y) def test_bgr2ycbcr(): # float32 in_img = np.random.rand(10, 10, 3).astype(np.float32) out_img = mmcv.bgr2ycbcr(in_img) computed_ycbcr = np.empty_like(in_img) for i in range(in_img.shape[0]): for j in range(in_img.shape[1]): b, g, r = in_img[i, j] y = 16 + r * 65.481 + g * 128.553 + b * 24.966 cb = 128 - r * 37.797 - g * 74.203 + b * 112.0 cr = 128 + r * 112.0 - g * 93.786 - b * 18.214 computed_ycbcr[i, j, :] = [y, cb, cr] computed_ycbcr /= 255. assert_array_almost_equal(out_img, computed_ycbcr, decimal=2) # y_only=True in_img = np.random.rand(10, 10, 3).astype(np.float32) out_img = mmcv.bgr2ycbcr(in_img, y_only=True) computed_y = np.empty_like(out_img, dtype=out_img.dtype) for i in range(in_img.shape[0]): for j in range(in_img.shape[1]): b, g, r = in_img[i, j] y = 16 + r * 65.481 + g * 128.553 + b * 24.966 computed_y[i, j] = y computed_y /= 255. assert_array_almost_equal(out_img, computed_y, decimal=2) # uint8 in_img = (np.random.rand(10, 10, 3) * 255).astype(np.uint8) out_img = mmcv.bgr2ycbcr(in_img) computed_ycbcr = np.empty_like(in_img) in_img = in_img / 255. for i in range(in_img.shape[0]): for j in range(in_img.shape[1]): b, g, r = in_img[i, j] y = 16 + r * 65.481 + g * 128.553 + b * 24.966 cb = 128 - r * 37.797 - g * 74.203 + b * 112.0 cr = 128 + r * 112.0 - g * 93.786 - b * 18.214 y, cb, cr = y.round(), cb.round(), cr.round() computed_ycbcr[i, j, :] = [y, cb, cr] assert_image_almost_equal(out_img, computed_ycbcr) # y_only = True in_img = (np.random.rand(10, 10, 3) * 255).astype(np.uint8) out_img = mmcv.bgr2ycbcr(in_img, y_only=True) computed_y = np.empty_like(out_img, dtype=out_img.dtype) in_img = in_img / 255. for i in range(in_img.shape[0]): for j in range(in_img.shape[1]): b, g, r = in_img[i, j] y = 16 + r * 65.481 + g * 128.553 + b * 24.966 y = y.round() computed_y[i, j] = y assert_image_almost_equal(out_img, computed_y) def test_ycbcr2rgb(): with pytest.raises(TypeError): # The img type should be np.float32 or np.uint8 in_img = np.random.rand(10, 10, 3).astype(np.uint64) mmcv.ycbcr2rgb(in_img) # float32 in_img = np.random.rand(10, 10, 3).astype(np.float32) out_img = mmcv.ycbcr2rgb(in_img) computed_rgb = np.empty_like(in_img) in_img *= 255. for i in range(in_img.shape[0]): for j in range(in_img.shape[1]): y, cb, cr = in_img[i, j] r = -222.921 + y * 0.00456621 * 255 + cr * 0.00625893 * 255 g = 135.576 + y * 0.00456621 * 255 - cb * 0.00153632 * 255 - \ cr * 0.00318811 * 255 b = -276.836 + y * 0.00456621 * 255. + cb * 0.00791071 * 255 computed_rgb[i, j, :] = [r, g, b] computed_rgb /= 255. assert_array_almost_equal(out_img, computed_rgb, decimal=2) # uint8 in_img = (np.random.rand(10, 10, 3) * 255).astype(np.uint8) out_img = mmcv.ycbcr2rgb(in_img) computed_rgb = np.empty_like(in_img) for i in range(in_img.shape[0]): for j in range(in_img.shape[1]): y, cb, cr = in_img[i, j] r = -222.921 + y * 0.00456621 * 255 + cr * 0.00625893 * 255 g = 135.576 + y * 0.00456621 * 255 - cb * 0.00153632 * 255 - \ cr * 0.00318811 * 255 b = -276.836 + y * 0.00456621 * 255. + cb * 0.00791071 * 255 r, g, b = r.round(), g.round(), b.round() computed_rgb[i, j, :] = [r, g, b] assert_image_almost_equal(out_img, computed_rgb) def test_ycbcr2bgr(): # float32 in_img = np.random.rand(10, 10, 3).astype(np.float32) out_img = mmcv.ycbcr2bgr(in_img) computed_bgr = np.empty_like(in_img) in_img *= 255. for i in range(in_img.shape[0]): for j in range(in_img.shape[1]): y, cb, cr = in_img[i, j] r = -222.921 + y * 0.00456621 * 255 + cr * 0.00625893 * 255 g = 135.576 + y * 0.00456621 * 255 - cb * 0.00153632 * 255 - \ cr * 0.00318811 * 255 b = -276.836 + y * 0.00456621 * 255. + cb * 0.00791071 * 255 computed_bgr[i, j, :] = [b, g, r] computed_bgr /= 255. assert_array_almost_equal(out_img, computed_bgr, decimal=2) # uint8 in_img = (np.random.rand(10, 10, 3) * 255).astype(np.uint8) out_img = mmcv.ycbcr2bgr(in_img) computed_bgr = np.empty_like(in_img) for i in range(in_img.shape[0]): for j in range(in_img.shape[1]): y, cb, cr = in_img[i, j] r = -222.921 + y * 0.00456621 * 255 + cr * 0.00625893 * 255 g = 135.576 + y * 0.00456621 * 255 - cb * 0.00153632 * 255 - \ cr * 0.00318811 * 255 b = -276.836 + y * 0.00456621 * 255. + cb * 0.00791071 * 255 r, g, b = r.round(), g.round(), b.round() computed_bgr[i, j, :] = [b, g, r] assert_image_almost_equal(out_img, computed_bgr) def test_bgr2hls(): in_img = np.random.rand(10, 10, 3).astype(np.float32) out_img = mmcv.bgr2hls(in_img) argmax = in_img.argmax(axis=2) computed_hls = np.empty_like(in_img) for i in range(in_img.shape[0]): for j in range(in_img.shape[1]): b, g, r = in_img[i, j] maxc = max(r, g, b) minc = min(r, g, b) _l = (minc + maxc) / 2.0 if minc == maxc: h = 0.0 s = 0.0 if _l <= 0.5: s = (maxc - minc) / (maxc + minc) else: s = (maxc - minc) / (2.0 - maxc - minc) if argmax[i, j] == 2: h = 60 * (g - b) / (maxc - minc) elif argmax[i, j] == 1: h = 60 * (2.0 + (b - r) / (maxc - minc)) else: h = 60 * (4.0 + (r - g) / (maxc - minc)) if h < 0: h += 360 computed_hls[i, j, :] = [h, _l, s] assert_array_almost_equal(out_img, computed_hls, decimal=2) @pytest.mark.parametrize('src,dst,ref', [('bgr', 'gray', cv2.COLOR_BGR2GRAY), ('rgb', 'gray', cv2.COLOR_RGB2GRAY), ('bgr', 'rgb', cv2.COLOR_BGR2RGB), ('rgb', 'bgr', cv2.COLOR_RGB2BGR), ('bgr', 'hsv', cv2.COLOR_BGR2HSV), ('hsv', 'bgr', cv2.COLOR_HSV2BGR), ('bgr', 'hls', cv2.COLOR_BGR2HLS), ('hls', 'bgr', cv2.COLOR_HLS2BGR)]) def test_imconvert(src, dst, ref): img = np.random.rand(10, 10, 3).astype(np.float32) assert_array_equal(mmcv.imconvert(img, src, dst), cv2.cvtColor(img, ref)) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_image/test_geometric.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import os.path as osp import cv2 import numpy as np import pytest from numpy.testing import assert_array_equal import mmcv class TestGeometric: @classmethod def setup_class(cls): cls.data_dir = osp.join(osp.dirname(__file__), '../data') # the test img resolution is 400x300 cls.img_path = osp.join(cls.data_dir, 'color.jpg') cls.img = cv2.imread(cls.img_path) def test_imresize(self): resized_img = mmcv.imresize(self.img, (1000, 600)) assert resized_img.shape == (600, 1000, 3) resized_img, w_scale, h_scale = mmcv.imresize(self.img, (1000, 600), True) assert (resized_img.shape == (600, 1000, 3) and w_scale == 2.5 and h_scale == 2.0) resized_img_dst = np.empty((600, 1000, 3), dtype=self.img.dtype) resized_img = mmcv.imresize(self.img, (1000, 600), out=resized_img_dst) assert id(resized_img_dst) == id(resized_img) assert_array_equal(resized_img_dst, mmcv.imresize(self.img, (1000, 600))) for mode in ['nearest', 'bilinear', 'bicubic', 'area', 'lanczos']: resized_img = mmcv.imresize( self.img, (1000, 600), interpolation=mode) assert resized_img.shape == (600, 1000, 3) # test pillow resize for mode in [ 'nearest', 'bilinear', 'bicubic', 'box', 'lanczos', 'hamming' ]: resized_img = mmcv.imresize( self.img, (1000, 600), interpolation=mode, backend='pillow') assert resized_img.shape == (600, 1000, 3) # resize backend must be 'cv2' or 'pillow' with pytest.raises(ValueError): mmcv.imresize(self.img, (1000, 600), backend='not support') def test_imresize_to_multiple(self): # test size and keep_ratio = False resized_img = mmcv.imresize_to_multiple( self.img, divisor=16, size=(511, 513), keep_ratio=False) assert resized_img.shape == (528, 512, 3) resized_img = mmcv.imresize_to_multiple( self.img, divisor=(16, 32), size=(511, 513), keep_ratio=False) assert resized_img.shape == (544, 512, 3) # test size, keep_ratio = True, and return_scale resized_img, w_scale, h_scale = mmcv.imresize_to_multiple( self.img, divisor=16, size=(1000, 600), keep_ratio=True, return_scale=True) assert resized_img.shape == ( 608, 800, 3) and h_scale == 608 / 300 and w_scale == 800 / 400 resized_img, w_scale, h_scale = mmcv.imresize_to_multiple( self.img, divisor=(18, 16), size=(1000, 600), keep_ratio=True, return_scale=True) assert resized_img.shape == ( 608, 810, 3) and h_scale == 608 / 300 and w_scale == 810 / 400 # test scale_factor and return_scale resized_img, w_scale, h_scale = mmcv.imresize_to_multiple( self.img, divisor=16, scale_factor=2, return_scale=True) assert resized_img.shape == ( 608, 800, 3) and h_scale == 608 / 300 and w_scale == 800 / 400 resized_img, w_scale, h_scale = mmcv.imresize_to_multiple( self.img, divisor=16, scale_factor=(2, 3), return_scale=True) assert resized_img.shape == ( 912, 800, 3) and h_scale == 912 / 300 and w_scale == 800 / 400 resized_img, w_scale, h_scale = mmcv.imresize_to_multiple( self.img, divisor=(18, 16), scale_factor=(2, 3), return_scale=True) assert resized_img.shape == ( 912, 810, 3) and h_scale == 912 / 300 and w_scale == 810 / 400 # one of size and scale_factor should be given with pytest.raises(ValueError): mmcv.imresize_to_multiple( self.img, divisor=16, size=(1000, 600), scale_factor=2) with pytest.raises(ValueError): mmcv.imresize_to_multiple( self.img, divisor=16, size=None, scale_factor=None) def test_imresize_like(self): a = np.zeros((100, 200, 3)) resized_img = mmcv.imresize_like(self.img, a) assert resized_img.shape == (100, 200, 3) def test_rescale_size(self): new_size, scale_factor = mmcv.rescale_size((400, 300), 1.5, True) assert new_size == (600, 450) and scale_factor == 1.5 new_size, scale_factor = mmcv.rescale_size((400, 300), 0.934, True) assert new_size == (374, 280) and scale_factor == 0.934 new_size = mmcv.rescale_size((400, 300), 1.5) assert new_size == (600, 450) new_size = mmcv.rescale_size((400, 300), 0.934) assert new_size == (374, 280) new_size, scale_factor = mmcv.rescale_size((400, 300), (1000, 600), True) assert new_size == (800, 600) and scale_factor == 2.0 new_size, scale_factor = mmcv.rescale_size((400, 300), (180, 200), True) assert new_size == (200, 150) and scale_factor == 0.5 new_size = mmcv.rescale_size((400, 300), (1000, 600)) assert new_size == (800, 600) new_size = mmcv.rescale_size((400, 300), (180, 200)) assert new_size == (200, 150) with pytest.raises(ValueError): mmcv.rescale_size((400, 300), -0.5) with pytest.raises(TypeError): mmcv.rescale_size()((400, 300), [100, 100]) def test_imrescale(self): # rescale by a certain factor resized_img = mmcv.imrescale(self.img, 1.5) assert resized_img.shape == (450, 600, 3) resized_img = mmcv.imrescale(self.img, 0.934) assert resized_img.shape == (280, 374, 3) # rescale by a certain max_size # resize (400, 300) to (max_1000, max_600) resized_img = mmcv.imrescale(self.img, (1000, 600)) assert resized_img.shape == (600, 800, 3) resized_img, scale = mmcv.imrescale( self.img, (1000, 600), return_scale=True) assert resized_img.shape == (600, 800, 3) and scale == 2.0 # resize (400, 300) to (max_200, max_180) resized_img = mmcv.imrescale(self.img, (180, 200)) assert resized_img.shape == (150, 200, 3) resized_img, scale = mmcv.imrescale( self.img, (180, 200), return_scale=True) assert resized_img.shape == (150, 200, 3) and scale == 0.5 # test exceptions with pytest.raises(ValueError): mmcv.imrescale(self.img, -0.5) with pytest.raises(TypeError): mmcv.imrescale(self.img, [100, 100]) def test_imflip(self): # direction must be "horizontal" or "vertical" or "diagonal" with pytest.raises(AssertionError): mmcv.imflip(np.random.rand(80, 60, 3), direction='random') # test horizontal flip (color image) img = np.random.rand(80, 60, 3) h, w, c = img.shape flipped_img = mmcv.imflip(img) assert flipped_img.shape == img.shape for i in range(h): for j in range(w): for k in range(c): assert flipped_img[i, j, k] == img[i, w - 1 - j, k] # test vertical flip (color image) flipped_img = mmcv.imflip(img, direction='vertical') assert flipped_img.shape == img.shape for i in range(h): for j in range(w): for k in range(c): assert flipped_img[i, j, k] == img[h - 1 - i, j, k] # test diagonal flip (color image) flipped_img = mmcv.imflip(img, direction='diagonal') assert flipped_img.shape == img.shape for i in range(h): for j in range(w): for k in range(c): assert flipped_img[i, j, k] == img[h - 1 - i, w - 1 - j, k] # test horizontal flip (grayscale image) img = np.random.rand(80, 60) h, w = img.shape flipped_img = mmcv.imflip(img) assert flipped_img.shape == img.shape for i in range(h): for j in range(w): assert flipped_img[i, j] == img[i, w - 1 - j] # test vertical flip (grayscale image) flipped_img = mmcv.imflip(img, direction='vertical') assert flipped_img.shape == img.shape for i in range(h): for j in range(w): assert flipped_img[i, j] == img[h - 1 - i, j] # test diagonal flip (grayscale image) flipped_img = mmcv.imflip(img, direction='diagonal') assert flipped_img.shape == img.shape for i in range(h): for j in range(w): assert flipped_img[i, j] == img[h - 1 - i, w - 1 - j] def test_imflip_(self): # direction must be "horizontal" or "vertical" or "diagonal" with pytest.raises(AssertionError): mmcv.imflip_(np.random.rand(80, 60, 3), direction='random') # test horizontal flip (color image) img = np.random.rand(80, 60, 3) h, w, c = img.shape img_for_flip = img.copy() flipped_img = mmcv.imflip_(img_for_flip) assert flipped_img.shape == img.shape assert flipped_img.shape == img_for_flip.shape assert id(flipped_img) == id(img_for_flip) for i in range(h): for j in range(w): for k in range(c): assert flipped_img[i, j, k] == img[i, w - 1 - j, k] assert flipped_img[i, j, k] == img_for_flip[i, j, k] # test vertical flip (color image) img_for_flip = img.copy() flipped_img = mmcv.imflip_(img_for_flip, direction='vertical') assert flipped_img.shape == img.shape assert flipped_img.shape == img_for_flip.shape assert id(flipped_img) == id(img_for_flip) for i in range(h): for j in range(w): for k in range(c): assert flipped_img[i, j, k] == img[h - 1 - i, j, k] assert flipped_img[i, j, k] == img_for_flip[i, j, k] # test diagonal flip (color image) img_for_flip = img.copy() flipped_img = mmcv.imflip_(img_for_flip, direction='diagonal') assert flipped_img.shape == img.shape assert flipped_img.shape == img_for_flip.shape assert id(flipped_img) == id(img_for_flip) for i in range(h): for j in range(w): for k in range(c): assert flipped_img[i, j, k] == img[h - 1 - i, w - 1 - j, k] assert flipped_img[i, j, k] == img_for_flip[i, j, k] # test horizontal flip (grayscale image) img = np.random.rand(80, 60) h, w = img.shape img_for_flip = img.copy() flipped_img = mmcv.imflip_(img_for_flip) assert flipped_img.shape == img.shape assert flipped_img.shape == img_for_flip.shape assert id(flipped_img) == id(img_for_flip) for i in range(h): for j in range(w): assert flipped_img[i, j] == img[i, w - 1 - j] assert flipped_img[i, j] == img_for_flip[i, j] # test vertical flip (grayscale image) img_for_flip = img.copy() flipped_img = mmcv.imflip_(img_for_flip, direction='vertical') assert flipped_img.shape == img.shape assert flipped_img.shape == img_for_flip.shape assert id(flipped_img) == id(img_for_flip) for i in range(h): for j in range(w): assert flipped_img[i, j] == img[h - 1 - i, j] assert flipped_img[i, j] == img_for_flip[i, j] # test diagonal flip (grayscale image) img_for_flip = img.copy() flipped_img = mmcv.imflip_(img_for_flip, direction='diagonal') assert flipped_img.shape == img.shape assert flipped_img.shape == img_for_flip.shape assert id(flipped_img) == id(img_for_flip) for i in range(h): for j in range(w): assert flipped_img[i, j] == img[h - 1 - i, w - 1 - j] assert flipped_img[i, j] == img_for_flip[i, j] def test_imcrop(self): # yapf: disable bboxes = np.array([[100, 100, 199, 199], # center [0, 0, 150, 100], # left-top corner [250, 200, 399, 299], # right-bottom corner [0, 100, 399, 199], # wide [150, 0, 299, 299]]) # tall # yapf: enable # crop one bbox patch = mmcv.imcrop(self.img, bboxes[0, :]) patches = mmcv.imcrop(self.img, bboxes[[0], :]) assert patch.shape == (100, 100, 3) patch_path = osp.join(self.data_dir, 'patches') ref_patch = np.load(patch_path + '/0.npy') assert_array_equal(patch, ref_patch) assert isinstance(patches, list) and len(patches) == 1 assert_array_equal(patches[0], ref_patch) # crop with no scaling and padding patches = mmcv.imcrop(self.img, bboxes) assert len(patches) == bboxes.shape[0] for i in range(len(patches)): ref_patch = np.load(patch_path + f'/{i}.npy') assert_array_equal(patches[i], ref_patch) # crop with scaling and no padding patches = mmcv.imcrop(self.img, bboxes, 1.2) for i in range(len(patches)): ref_patch = np.load(patch_path + f'/scale_{i}.npy') assert_array_equal(patches[i], ref_patch) # crop with scaling and padding patches = mmcv.imcrop(self.img, bboxes, 1.2, pad_fill=[255, 255, 0]) for i in range(len(patches)): ref_patch = np.load(patch_path + f'/pad_{i}.npy') assert_array_equal(patches[i], ref_patch) patches = mmcv.imcrop(self.img, bboxes, 1.2, pad_fill=0) for i in range(len(patches)): ref_patch = np.load(patch_path + f'/pad0_{i}.npy') assert_array_equal(patches[i], ref_patch) def test_impad(self): # grayscale image img = np.random.rand(10, 10).astype(np.float32) padded_img = mmcv.impad(img, padding=(0, 0, 2, 5), pad_val=0) assert_array_equal(img, padded_img[:10, :10]) assert_array_equal( np.zeros((5, 12), dtype='float32'), padded_img[10:, :]) assert_array_equal( np.zeros((15, 2), dtype='float32'), padded_img[:, 10:]) # RGB image img = np.random.rand(10, 10, 3).astype(np.float32) padded_img = mmcv.impad(img, padding=(0, 0, 2, 5), pad_val=0) assert_array_equal(img, padded_img[:10, :10, :]) assert_array_equal( np.zeros((5, 12, 3), dtype='float32'), padded_img[10:, :, :]) assert_array_equal( np.zeros((15, 2, 3), dtype='float32'), padded_img[:, 10:, :]) # RGB image with different values for three channels. img = np.random.randint(256, size=(10, 10, 3)).astype('uint8') padded_img = mmcv.impad( img, padding=(0, 0, 2, 5), pad_val=(100, 110, 120)) assert_array_equal(img, padded_img[:10, :10, :]) assert_array_equal( np.array([100, 110, 120], dtype='uint8') * np.ones( (5, 12, 3), dtype='uint8'), padded_img[10:, :, :]) assert_array_equal( np.array([100, 110, 120], dtype='uint8') * np.ones( (15, 2, 3), dtype='uint8'), padded_img[:, 10:, :]) # Pad the grayscale image to shape (15, 12) img = np.random.rand(10, 10).astype(np.float32) padded_img = mmcv.impad(img, shape=(15, 12)) assert_array_equal(img, padded_img[:10, :10]) assert_array_equal( np.zeros((5, 12), dtype='float32'), padded_img[10:, :]) assert_array_equal( np.zeros((15, 2), dtype='float32'), padded_img[:, 10:]) # Pad the RGB image to shape (15, 12) img = np.random.rand(10, 10, 3).astype(np.float32) padded_img = mmcv.impad(img, shape=(15, 12)) assert_array_equal(img, padded_img[:10, :10, :]) assert_array_equal( np.zeros((5, 12, 3), dtype='float32'), padded_img[10:, :, :]) assert_array_equal( np.zeros((15, 2, 3), dtype='float32'), padded_img[:, 10:, :]) # Pad the RGB image to shape (15, 12) with different values for # three channels. img = np.random.randint(256, size=(10, 10, 3)).astype('uint8') padded_img = mmcv.impad(img, shape=(15, 12), pad_val=(100, 110, 120)) assert_array_equal(img, padded_img[:10, :10, :]) assert_array_equal( np.array([100, 110, 120], dtype='uint8') * np.ones( (5, 12, 3), dtype='uint8'), padded_img[10:, :, :]) assert_array_equal( np.array([100, 110, 120], dtype='uint8') * np.ones( (15, 2, 3), dtype='uint8'), padded_img[:, 10:, :]) # RGB image with padding=[5, 2] img = np.random.rand(10, 10, 3).astype(np.float32) padded_img = mmcv.impad(img, padding=(5, 2), pad_val=0) assert padded_img.shape == (14, 20, 3) assert_array_equal(img, padded_img[2:12, 5:15, :]) assert_array_equal( np.zeros((2, 5, 3), dtype='float32'), padded_img[:2, :5, :]) assert_array_equal( np.zeros((2, 5, 3), dtype='float32'), padded_img[12:, :5, :]) assert_array_equal( np.zeros((2, 5, 3), dtype='float32'), padded_img[:2, 15:, :]) assert_array_equal( np.zeros((2, 5, 3), dtype='float32'), padded_img[12:, 15:, :]) # RGB image with type(pad_val) = tuple pad_val = (0, 1, 2) img = np.random.rand(10, 10, 3).astype(np.float32) padded_img = mmcv.impad(img, padding=(0, 0, 5, 2), pad_val=pad_val) assert padded_img.shape == (12, 15, 3) assert_array_equal(img, padded_img[:10, :10, :]) assert_array_equal(pad_val[0] * np.ones((2, 15, 1), dtype='float32'), padded_img[10:, :, 0:1]) assert_array_equal(pad_val[1] * np.ones((2, 15, 1), dtype='float32'), padded_img[10:, :, 1:2]) assert_array_equal(pad_val[2] * np.ones((2, 15, 1), dtype='float32'), padded_img[10:, :, 2:3]) assert_array_equal(pad_val[0] * np.ones((12, 5, 1), dtype='float32'), padded_img[:, 10:, 0:1]) assert_array_equal(pad_val[1] * np.ones((12, 5, 1), dtype='float32'), padded_img[:, 10:, 1:2]) assert_array_equal(pad_val[2] * np.ones((12, 5, 1), dtype='float32'), padded_img[:, 10:, 2:3]) # test different padding mode with channel number = 3 for mode in ['constant', 'edge', 'reflect', 'symmetric']: img = np.random.rand(10, 10, 3).astype(np.float32) padded_img = mmcv.impad( img, padding=(0, 0, 5, 2), pad_val=pad_val, padding_mode=mode) assert padded_img.shape == (12, 15, 3) # test different padding mode with channel number = 1 for mode in ['constant', 'edge', 'reflect', 'symmetric']: img = np.random.rand(10, 10).astype(np.float32) padded_img = mmcv.impad( img, padding=(0, 0, 5, 2), pad_val=0, padding_mode=mode) assert padded_img.shape == (12, 15) # Padding must be a int or a 2, or 4 element tuple. with pytest.raises(ValueError): mmcv.impad(img, padding=(1, 1, 1)) # pad_val must be a int or a tuple with pytest.raises(TypeError): mmcv.impad(img, padding=(1, 1, 1, 1), pad_val='wrong') # When pad_val is a tuple, # len(pad_val) should be equal to img.shape[-1] img = np.random.rand(10, 10, 3).astype(np.float32) with pytest.raises(AssertionError): mmcv.impad(img, padding=3, pad_val=(100, 200)) with pytest.raises(AssertionError): mmcv.impad(img, padding=2, pad_val=0, padding_mode='unknown') with pytest.raises(AssertionError): mmcv.impad(img, shape=(12, 15), padding=(0, 0, 5, 2)) def test_impad_to_multiple(self): img = np.random.rand(11, 14, 3).astype(np.float32) padded_img = mmcv.impad_to_multiple(img, 4) assert padded_img.shape == (12, 16, 3) img = np.random.rand(20, 12).astype(np.float32) padded_img = mmcv.impad_to_multiple(img, 5) assert padded_img.shape == (20, 15) img = np.random.rand(20, 12).astype(np.float32) padded_img = mmcv.impad_to_multiple(img, 2) assert padded_img.shape == (20, 12) def test_cutout(self): img = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]).astype(np.uint8) # shape must be int or tuple with pytest.raises(AssertionError): mmcv.cutout(img, 2.5) # pad_val must be int or float or tuple with the same length # of img channels with pytest.raises(AssertionError): mmcv.cutout(img, 1, (1, 2, 3)) with pytest.raises(TypeError): mmcv.cutout(img, 1, None) # test cutout the whole img assert_array_equal(mmcv.cutout(img, 6), np.zeros_like(img)) # test not cutout assert_array_equal(mmcv.cutout(img, 0), img) # test cutout when shape is int np.random.seed(0) img_cutout = np.array([[1, 2, 3], [4, 0, 6], [7, 8, 9]]).astype(np.uint8) assert_array_equal(mmcv.cutout(img, 1), img_cutout) img_cutout = np.array([[1, 2, 3], [4, 10, 6], [7, 8, 9]]).astype(np.uint8) assert_array_equal(mmcv.cutout(img, 1, pad_val=10), img_cutout) # test cutout when shape is tuple np.random.seed(0) img_cutout = np.array([[1, 2, 3], [0, 0, 6], [7, 8, 9]]).astype(np.uint8) assert_array_equal(mmcv.cutout(img, (1, 2)), img_cutout) img_cutout = np.array([[1, 2, 3], [10, 10, 6], [7, 8, 9]]).astype(np.uint8) assert_array_equal(mmcv.cutout(img, (1, 2), pad_val=10), img_cutout) def test_imrotate(self): img = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]).astype(np.uint8) assert_array_equal(mmcv.imrotate(img, 0), img) img_r = np.array([[7, 4, 1], [8, 5, 2], [9, 6, 3]]) assert_array_equal(mmcv.imrotate(img, 90), img_r) img_r = np.array([[3, 6, 9], [2, 5, 8], [1, 4, 7]]) assert_array_equal(mmcv.imrotate(img, -90), img_r) img = np.array([[1, 2, 3, 4], [5, 6, 7, 8]]).astype(np.uint8) img_r = np.array([[0, 6, 2, 0], [0, 7, 3, 0]]) assert_array_equal(mmcv.imrotate(img, 90), img_r) img_r = np.array([[1, 0, 0, 0], [2, 0, 0, 0]]) assert_array_equal(mmcv.imrotate(img, 90, center=(0, 0)), img_r) img_r = np.array([[255, 6, 2, 255], [255, 7, 3, 255]]) assert_array_equal(mmcv.imrotate(img, 90, border_value=255), img_r) img_r = np.array([[5, 1], [6, 2], [7, 3], [8, 4]]) assert_array_equal(mmcv.imrotate(img, 90, auto_bound=True), img_r) with pytest.raises(ValueError): mmcv.imrotate(img, 90, center=(0, 0), auto_bound=True) def test_imshear(self): img = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]).astype(np.uint8) assert_array_equal(mmcv.imshear(img, 0), img) # magnitude=1, horizontal img_sheared = np.array([[1, 2, 3], [0, 4, 5], [0, 0, 7]], dtype=np.uint8) assert_array_equal(mmcv.imshear(img, 1), img_sheared) # magnitude=-1, vertical img_sheared = np.array([[1, 5, 9], [4, 8, 0], [7, 0, 0]], dtype=np.uint8) assert_array_equal(mmcv.imshear(img, -1, 'vertical'), img_sheared) # magnitude=1, vertical, borderValue=100 borderValue = 100 img_sheared = np.array( [[1, borderValue, borderValue], [4, 2, borderValue], [7, 5, 3]], dtype=np.uint8) assert_array_equal( mmcv.imshear(img, 1, 'vertical', borderValue), img_sheared) # magnitude=1, vertical, borderValue=100, img shape (h,w,3) img = np.stack([img, img, img], axis=-1) img_sheared = np.stack([img_sheared, img_sheared, img_sheared], axis=-1) assert_array_equal( mmcv.imshear(img, 1, 'vertical', borderValue), img_sheared) # test tuple format of borderValue assert_array_equal( mmcv.imshear(img, 1, 'vertical', (borderValue, borderValue, borderValue)), img_sheared) # test invalid length of borderValue with pytest.raises(AssertionError): mmcv.imshear(img, 0.5, 'horizontal', (borderValue, )) # test invalid type of borderValue with pytest.raises(ValueError): mmcv.imshear(img, 0.5, 'horizontal', [borderValue]) # test invalid value of direction with pytest.raises(AssertionError): mmcv.imshear(img, 0.5, 'diagonal') def test_imtranslate(self): img = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.uint8) assert_array_equal(mmcv.imtranslate(img, 0), img) # offset=1, horizontal img_translated = np.array([[128, 1, 2], [128, 4, 5], [128, 7, 8]], dtype=np.uint8) assert_array_equal( mmcv.imtranslate(img, 1, border_value=128), img_translated) # offset=-1, vertical img_translated = np.array([[4, 5, 6], [7, 8, 9], [0, 0, 0]], dtype=np.uint8) assert_array_equal( mmcv.imtranslate(img, -1, 'vertical'), img_translated) # offset=-2, horizontal img = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=np.uint8) img = np.stack([img, img, img], axis=-1) img_translated = [[3, 4, 128, 128], [7, 8, 128, 128]] img_translated = np.stack( [img_translated, img_translated, img_translated], axis=-1) assert_array_equal( mmcv.imtranslate(img, -2, border_value=128), img_translated) # offset=2, vertical border_value = (110, 120, 130) img_translated = np.stack([ np.ones((2, 4)) * border_value[0], np.ones((2, 4)) * border_value[1], np.ones((2, 4)) * border_value[2] ], axis=-1).astype(np.uint8) assert_array_equal( mmcv.imtranslate(img, 2, 'vertical', border_value), img_translated) # test invalid number elements in border_value with pytest.raises(AssertionError): mmcv.imtranslate(img, 1, border_value=(1, )) # test invalid type of border_value with pytest.raises(ValueError): mmcv.imtranslate(img, 1, border_value=[1, 2, 3]) # test invalid value of direction with pytest.raises(AssertionError): mmcv.imtranslate(img, 1, 'diagonal') ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_image/test_image_misc.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import numpy as np import pytest from numpy.testing import assert_array_equal import mmcv try: import torch except ImportError: torch = None @pytest.mark.skipif(torch is None, reason='requires torch library') def test_tensor2imgs(): # test tensor obj with pytest.raises(AssertionError): tensor = np.random.rand(2, 3, 3) mmcv.tensor2imgs(tensor) # test tensor ndim with pytest.raises(AssertionError): tensor = torch.randn(2, 3, 3) mmcv.tensor2imgs(tensor) # test tensor dim-1 with pytest.raises(AssertionError): tensor = torch.randn(2, 4, 3, 3) mmcv.tensor2imgs(tensor) # test mean length with pytest.raises(AssertionError): tensor = torch.randn(2, 3, 5, 5) mmcv.tensor2imgs(tensor, mean=(1, )) tensor = torch.randn(2, 1, 5, 5) mmcv.tensor2imgs(tensor, mean=(0, 0, 0)) # test std length with pytest.raises(AssertionError): tensor = torch.randn(2, 3, 5, 5) mmcv.tensor2imgs(tensor, std=(1, )) tensor = torch.randn(2, 1, 5, 5) mmcv.tensor2imgs(tensor, std=(1, 1, 1)) # test to_rgb with pytest.raises(AssertionError): tensor = torch.randn(2, 1, 5, 5) mmcv.tensor2imgs(tensor, mean=(0, ), std=(1, ), to_rgb=True) # test rgb=True tensor = torch.randn(2, 3, 5, 5) gts = [ t.cpu().numpy().transpose(1, 2, 0).astype(np.uint8) for t in tensor.flip(1) ] outputs = mmcv.tensor2imgs(tensor, to_rgb=True) for gt, output in zip(gts, outputs): assert_array_equal(gt, output) # test rgb=False tensor = torch.randn(2, 3, 5, 5) gts = [t.cpu().numpy().transpose(1, 2, 0).astype(np.uint8) for t in tensor] outputs = mmcv.tensor2imgs(tensor, to_rgb=False) for gt, output in zip(gts, outputs): assert_array_equal(gt, output) # test tensor channel 1 and rgb=False tensor = torch.randn(2, 1, 5, 5) gts = [t.squeeze(0).cpu().numpy().astype(np.uint8) for t in tensor] outputs = mmcv.tensor2imgs(tensor, to_rgb=False) for gt, output in zip(gts, outputs): assert_array_equal(gt, output) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_image/test_io.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import os import os.path as osp import sys import tempfile from pathlib import Path from unittest.mock import MagicMock, patch import cv2 import numpy as np import pytest from numpy.testing import assert_allclose, assert_array_equal import mmcv from mmcv.fileio.file_client import HTTPBackend, PetrelBackend class TestIO: @classmethod def setup_class(cls): cls.data_dir = osp.join(osp.dirname(__file__), '../data') # the test img resolution is 400x300 cls.img_path = osp.join(cls.data_dir, 'color.jpg') cls.img_path_obj = Path(cls.img_path) cls.gray_img_path = osp.join(cls.data_dir, 'grayscale.jpg') cls.gray_img_path_obj = Path(cls.gray_img_path) cls.gray_img_dim3_path = osp.join(cls.data_dir, 'grayscale_dim3.jpg') cls.gray_alpha_img_path = osp.join(cls.data_dir, 'gray_alpha.png') cls.palette_img_path = osp.join(cls.data_dir, 'palette.gif') cls.exif_img_path = osp.join(cls.data_dir, 'color_exif.jpg') cls.img = cv2.imread(cls.img_path) cls.tiff_path = osp.join(cls.data_dir, 'uint16-5channel.tif') # petrel s3 path cls.s3_path = 's3://path/of/your/file.jpg' # http path cls.http_path = 'http://path/of/your/file.jpg' # add mock package sys.modules['petrel_client'] = MagicMock() sys.modules['petrel_client.client'] = MagicMock() @classmethod def teardown_class(cls): # clean instances avoid to influence other unittest mmcv.FileClient._instances = {} def assert_img_equal(self, img, ref_img, ratio_thr=0.999): assert img.shape == ref_img.shape assert img.dtype == ref_img.dtype area = ref_img.shape[0] * ref_img.shape[1] diff = np.abs(img.astype('int32') - ref_img.astype('int32')) assert np.sum(diff <= 1) / float(area) > ratio_thr def test_imread(self): # backend cv2 mmcv.use_backend('cv2') # HardDiskBackend img_cv2_color_bgr = mmcv.imread(self.img_path) assert img_cv2_color_bgr.shape == (300, 400, 3) img_cv2_color_rgb = mmcv.imread(self.img_path, channel_order='rgb') assert img_cv2_color_rgb.shape == (300, 400, 3) assert_array_equal(img_cv2_color_rgb[:, :, ::-1], img_cv2_color_bgr) img_cv2_grayscale1 = mmcv.imread(self.img_path, 'grayscale') assert img_cv2_grayscale1.shape == (300, 400) img_cv2_grayscale2 = mmcv.imread(self.gray_img_path) assert img_cv2_grayscale2.shape == (300, 400, 3) img_cv2_unchanged = mmcv.imread(self.gray_img_path, 'unchanged') assert img_cv2_unchanged.shape == (300, 400) img_cv2_unchanged = mmcv.imread(img_cv2_unchanged) assert_array_equal(img_cv2_unchanged, mmcv.imread(img_cv2_unchanged)) img_cv2_color_bgr = mmcv.imread(self.img_path_obj) assert img_cv2_color_bgr.shape == (300, 400, 3) img_cv2_color_rgb = mmcv.imread(self.img_path_obj, channel_order='rgb') assert img_cv2_color_rgb.shape == (300, 400, 3) assert_array_equal(img_cv2_color_rgb[:, :, ::-1], img_cv2_color_bgr) img_cv2_grayscale1 = mmcv.imread(self.img_path_obj, 'grayscale') assert img_cv2_grayscale1.shape == (300, 400) img_cv2_grayscale2 = mmcv.imread(self.gray_img_path_obj) assert img_cv2_grayscale2.shape == (300, 400, 3) img_cv2_unchanged = mmcv.imread(self.gray_img_path_obj, 'unchanged') assert img_cv2_unchanged.shape == (300, 400) with pytest.raises(TypeError): mmcv.imread(1) # PetrelBackend img_cv2_color_bgr = mmcv.imread(self.img_path) with patch.object( PetrelBackend, 'get', return_value=img_cv2_color_bgr) as mock_method: img_cv2_color_bgr_petrel = mmcv.imread(self.s3_path, backend='cv2') img_cv2_color_bgr_petrel_with_args = mmcv.imread( self.s3_path, backend='cv2', file_client_args={'backend': 'petrel'}) mock_method.assert_called() assert_array_equal(img_cv2_color_bgr_petrel, img_cv2_color_bgr_petrel_with_args) # HTTPBackend img_cv2_color_bgr = mmcv.imread(self.img_path) with patch.object( HTTPBackend, 'get', return_value=img_cv2_color_bgr) as mock_method: img_cv2_color_bgr_http = mmcv.imread(self.http_path, backend='cv2') img_cv2_color_bgr_http_with_args = mmcv.imread( self.http_path, backend='cv2', file_client_args={'backend': 'http'}) mock_method.assert_called() assert_array_equal(img_cv2_color_bgr_http, img_cv2_color_bgr_http_with_args) with pytest.raises(FileNotFoundError): mmcv.imread('/not/exists/' + self.img_path) # test arg backend pillow img_pil_gray_alpha = mmcv.imread( self.gray_alpha_img_path, 'grayscale', backend='pillow') assert img_pil_gray_alpha.shape == (400, 500) mean = img_pil_gray_alpha[300:, 400:].mean() assert_allclose(img_pil_gray_alpha[300:, 400:] - mean, 0) img_pil_gray_alpha = mmcv.imread( self.gray_alpha_img_path, backend='pillow') mean = img_pil_gray_alpha[300:, 400:].mean(axis=(0, 1)) assert_allclose(img_pil_gray_alpha[300:, 400:] - mean, 0) assert img_pil_gray_alpha.shape == (400, 500, 3) img_pil_gray_alpha = mmcv.imread( self.gray_alpha_img_path, 'unchanged', backend='pillow') assert img_pil_gray_alpha.shape == (400, 500, 2) img_pil_palette = mmcv.imread( self.palette_img_path, 'grayscale', backend='pillow') assert img_pil_palette.shape == (300, 400) img_pil_palette = mmcv.imread(self.palette_img_path, backend='pillow') assert img_pil_palette.shape == (300, 400, 3) img_pil_palette = mmcv.imread( self.palette_img_path, 'unchanged', backend='pillow') assert img_pil_palette.shape == (300, 400) # backend pillow mmcv.use_backend('pillow') img_pil_grayscale1 = mmcv.imread(self.img_path, 'grayscale') assert img_pil_grayscale1.shape == (300, 400) img_pil_gray_alpha = mmcv.imread(self.gray_alpha_img_path, 'grayscale') assert img_pil_gray_alpha.shape == (400, 500) mean = img_pil_gray_alpha[300:, 400:].mean() assert_allclose(img_pil_gray_alpha[300:, 400:] - mean, 0) img_pil_gray_alpha = mmcv.imread(self.gray_alpha_img_path) mean = img_pil_gray_alpha[300:, 400:].mean(axis=(0, 1)) assert_allclose(img_pil_gray_alpha[300:, 400:] - mean, 0) assert img_pil_gray_alpha.shape == (400, 500, 3) img_pil_gray_alpha = mmcv.imread(self.gray_alpha_img_path, 'unchanged') assert img_pil_gray_alpha.shape == (400, 500, 2) img_pil_palette = mmcv.imread(self.palette_img_path, 'grayscale') assert img_pil_palette.shape == (300, 400) img_pil_palette = mmcv.imread(self.palette_img_path) assert img_pil_palette.shape == (300, 400, 3) img_pil_palette = mmcv.imread(self.palette_img_path, 'unchanged') assert img_pil_palette.shape == (300, 400) img_pil_grayscale2 = mmcv.imread(self.gray_img_path) assert img_pil_grayscale2.shape == (300, 400, 3) img_pil_unchanged = mmcv.imread(self.gray_img_path, 'unchanged') assert img_pil_unchanged.shape == (300, 400) img_pil_unchanged = mmcv.imread(img_pil_unchanged) assert_array_equal(img_pil_unchanged, mmcv.imread(img_pil_unchanged)) img_pil_color_bgr = mmcv.imread(self.img_path_obj) assert img_pil_color_bgr.shape == (300, 400, 3) img_pil_color_rgb = mmcv.imread(self.img_path_obj, channel_order='rgb') assert img_pil_color_rgb.shape == (300, 400, 3) assert (img_pil_color_rgb == img_cv2_color_rgb).sum() / float( img_cv2_color_rgb.size) > 0.5 assert_array_equal(img_pil_color_rgb[:, :, ::-1], img_pil_color_bgr) img_pil_grayscale1 = mmcv.imread(self.img_path_obj, 'grayscale') assert img_pil_grayscale1.shape == (300, 400) img_pil_grayscale2 = mmcv.imread(self.gray_img_path_obj) assert img_pil_grayscale2.shape == (300, 400, 3) img_pil_unchanged = mmcv.imread(self.gray_img_path_obj, 'unchanged') assert img_pil_unchanged.shape == (300, 400) with pytest.raises(TypeError): mmcv.imread(1) # backend turbojpeg mmcv.use_backend('turbojpeg') img_turbojpeg_color_bgr = mmcv.imread(self.img_path) assert img_turbojpeg_color_bgr.shape == (300, 400, 3) assert_array_equal(img_turbojpeg_color_bgr, img_cv2_color_bgr) img_turbojpeg_color_rgb = mmcv.imread( self.img_path, channel_order='rgb') assert img_turbojpeg_color_rgb.shape == (300, 400, 3) assert_array_equal(img_turbojpeg_color_rgb, img_cv2_color_rgb) with pytest.raises(ValueError): mmcv.imread(self.img_path, channel_order='unsupport_order') img_turbojpeg_grayscale1 = mmcv.imread(self.img_path, flag='grayscale') assert img_turbojpeg_grayscale1.shape == (300, 400) assert_array_equal(img_turbojpeg_grayscale1, img_cv2_grayscale1) img_turbojpeg_grayscale2 = mmcv.imread(self.gray_img_path) assert img_turbojpeg_grayscale2.shape == (300, 400, 3) assert_array_equal(img_turbojpeg_grayscale2, img_cv2_grayscale2) img_turbojpeg_grayscale2 = mmcv.imread(img_turbojpeg_grayscale2) assert_array_equal(img_turbojpeg_grayscale2, mmcv.imread(img_turbojpeg_grayscale2)) with pytest.raises(ValueError): mmcv.imread(self.gray_img_path, 'unchanged') with pytest.raises(TypeError): mmcv.imread(1) with pytest.raises(AssertionError): mmcv.use_backend('unsupport_backend') with pytest.raises(ValueError): mmcv.imread(self.img_path, 'unsupported_backend') # backend tifffile, multi channel tiff file(> 4 channels). mmcv.use_backend('tifffile') img_tifffile = mmcv.imread(self.tiff_path) assert img_tifffile.shape == (200, 150, 5) mmcv.use_backend('cv2') # consistent exif behaviour img_cv2_exif = mmcv.imread(self.exif_img_path) img_pil_exif = mmcv.imread(self.exif_img_path, backend='pillow') assert img_cv2_exif.shape == (400, 300, 3) assert img_pil_exif.shape == (400, 300, 3) img_cv2_exif_unchanged = mmcv.imread( self.exif_img_path, flag='unchanged') img_pil_exif_unchanged = mmcv.imread( self.exif_img_path, backend='pillow', flag='unchanged') assert img_cv2_exif_unchanged.shape == (300, 400, 3) assert img_pil_exif_unchanged.shape == (300, 400, 3) img_cv2_color_ignore_exif = mmcv.imread( self.exif_img_path, flag='color_ignore_orientation') img_pil_color_ignore_exif = mmcv.imread( self.exif_img_path, backend='pillow', flag='color_ignore_orientation') assert img_cv2_color_ignore_exif.shape == (300, 400, 3) assert img_pil_color_ignore_exif.shape == (300, 400, 3) img_cv2_grayscale_ignore_exif = mmcv.imread( self.exif_img_path, flag='grayscale_ignore_orientation') img_pil_grayscale_ignore_exif = mmcv.imread( self.exif_img_path, backend='pillow', flag='grayscale_ignore_orientation') assert img_cv2_grayscale_ignore_exif.shape == (300, 400) assert img_pil_grayscale_ignore_exif.shape == (300, 400) def test_imfrombytes(self): # backend cv2, channel order: bgr mmcv.use_backend('cv2') with open(self.img_path, 'rb') as f: img_bytes = f.read() img_cv2 = mmcv.imfrombytes(img_bytes) assert img_cv2.shape == (300, 400, 3) # backend cv2, channel order: rgb mmcv.use_backend('cv2') with open(self.img_path, 'rb') as f: img_bytes = f.read() img_rgb_cv2 = mmcv.imfrombytes(img_bytes, channel_order='rgb') assert img_rgb_cv2.shape == (300, 400, 3) assert_array_equal(img_rgb_cv2, img_cv2[:, :, ::-1]) # backend cv2, grayscale, decode as 3 channels with open(self.gray_img_path, 'rb') as f: img_bytes = f.read() gray_img_rgb_cv2 = mmcv.imfrombytes(img_bytes) assert gray_img_rgb_cv2.shape == (300, 400, 3) # backend cv2, grayscale with open(self.gray_img_path, 'rb') as f: img_bytes = f.read() gray_img_cv2 = mmcv.imfrombytes(img_bytes, flag='grayscale') assert gray_img_cv2.shape == (300, 400) # backend cv2, grayscale dim3 with open(self.gray_img_dim3_path, 'rb') as f: img_bytes = f.read() gray_img_dim3_cv2 = mmcv.imfrombytes(img_bytes, flag='grayscale') assert gray_img_dim3_cv2.shape == (300, 400) # arg backend pillow, channel order: bgr with open(self.img_path, 'rb') as f: img_bytes = f.read() img_pillow = mmcv.imfrombytes(img_bytes, backend='pillow') assert img_pillow.shape == (300, 400, 3) # Pillow and opencv decoding may not be the same assert (img_cv2 == img_pillow).sum() / float(img_cv2.size) > 0.5 # backend pillow, channel order: bgr mmcv.use_backend('pillow') with open(self.img_path, 'rb') as f: img_bytes = f.read() img_pillow = mmcv.imfrombytes(img_bytes) assert img_pillow.shape == (300, 400, 3) # Pillow and opencv decoding may not be the same assert (img_cv2 == img_pillow).sum() / float(img_cv2.size) > 0.5 # backend turbojpeg, channel order: bgr mmcv.use_backend('turbojpeg') with open(self.img_path, 'rb') as f: img_bytes = f.read() img_turbojpeg = mmcv.imfrombytes(img_bytes) assert img_turbojpeg.shape == (300, 400, 3) assert_array_equal(img_cv2, img_turbojpeg) # backend turbojpeg, channel order: rgb with open(self.img_path, 'rb') as f: img_bytes = f.read() img_rgb_turbojpeg = mmcv.imfrombytes(img_bytes, channel_order='rgb') assert img_rgb_turbojpeg.shape == (300, 400, 3) assert_array_equal(img_rgb_turbojpeg, img_cv2[:, :, ::-1]) # backend turbojpeg, grayscale, decode as 3 channels with open(self.gray_img_path, 'rb') as f: img_bytes = f.read() gray_img_turbojpeg = mmcv.imfrombytes(img_bytes) assert gray_img_turbojpeg.shape == (300, 400, 3) assert_array_equal(gray_img_rgb_cv2, gray_img_turbojpeg) # backend turbojpeg, grayscale with open(self.gray_img_path, 'rb') as f: img_bytes = f.read() gray_img_turbojpeg = mmcv.imfrombytes(img_bytes, flag='grayscale') assert gray_img_turbojpeg.shape == (300, 400) assert_array_equal(gray_img_cv2, gray_img_turbojpeg) # backend turbojpeg, grayscale dim3 with open(self.gray_img_dim3_path, 'rb') as f: img_bytes = f.read() gray_img_dim3_turbojpeg = mmcv.imfrombytes(img_bytes, flag='grayscale') assert gray_img_dim3_turbojpeg.shape == (300, 400) assert_array_equal(gray_img_dim3_cv2, gray_img_dim3_turbojpeg) mmcv.use_backend('cv2') with pytest.raises(ValueError): with open(self.img_path, 'rb') as f: img_bytes = f.read() mmcv.imfrombytes(img_bytes, backend='unsupported_backend') def test_imwrite(self): img = mmcv.imread(self.img_path) out_file = osp.join(tempfile.gettempdir(), 'mmcv_test.jpg') mmcv.imwrite(img, out_file) rewrite_img = mmcv.imread(out_file) os.remove(out_file) self.assert_img_equal(img, rewrite_img) # test petrel client with patch.object( PetrelBackend, 'put', return_value=None) as mock_method: ret = mmcv.imwrite(img, self.s3_path) ret_with_args = mmcv.imwrite( img, self.s3_path, file_client_args={'backend': 'petrel'}) assert ret assert ret_with_args mock_method.assert_called() with pytest.raises(cv2.error): mmcv.imwrite(img, 'error_file.jppg') @patch('mmcv.image.io.TurboJPEG', None) def test_no_turbojpeg(self): with pytest.raises(ImportError): mmcv.use_backend('turbojpeg') mmcv.use_backend('cv2') @patch('mmcv.image.io.Image', None) def test_no_pillow(self): with pytest.raises(ImportError): mmcv.use_backend('pillow') mmcv.use_backend('cv2') ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_image/test_photometric.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import os.path as osp import cv2 import numpy as np import pytest from numpy.testing import assert_array_equal import mmcv class TestPhotometric: @classmethod def setup_class(cls): # the test img resolution is 400x300 cls.img_path = osp.join(osp.dirname(__file__), '../data/color.jpg') cls.img = cv2.imread(cls.img_path) cls.mean = np.array([123.675, 116.28, 103.53], dtype=np.float32) cls.std = np.array([58.395, 57.12, 57.375], dtype=np.float32) def test_imnormalize(self): rgb_img = self.img[:, :, ::-1] baseline = (rgb_img - self.mean) / self.std img = mmcv.imnormalize(self.img, self.mean, self.std) assert np.allclose(img, baseline) assert id(img) != id(self.img) img = mmcv.imnormalize(rgb_img, self.mean, self.std, to_rgb=False) assert np.allclose(img, baseline) assert id(img) != id(rgb_img) def test_imnormalize_(self): img_for_normalize = np.float32(self.img) rgb_img_for_normalize = np.float32(self.img[:, :, ::-1]) baseline = (rgb_img_for_normalize - self.mean) / self.std img = mmcv.imnormalize_(img_for_normalize, self.mean, self.std) assert np.allclose(img_for_normalize, baseline) assert id(img) == id(img_for_normalize) img = mmcv.imnormalize_( rgb_img_for_normalize, self.mean, self.std, to_rgb=False) assert np.allclose(img, baseline) assert id(img) == id(rgb_img_for_normalize) def test_imdenormalize(self): norm_img = (self.img[:, :, ::-1] - self.mean) / self.std rgb_baseline = (norm_img * self.std + self.mean) bgr_baseline = rgb_baseline[:, :, ::-1] img = mmcv.imdenormalize(norm_img, self.mean, self.std) assert np.allclose(img, bgr_baseline) img = mmcv.imdenormalize(norm_img, self.mean, self.std, to_bgr=False) assert np.allclose(img, rgb_baseline) def test_iminvert(self): img = np.array([[0, 128, 255], [1, 127, 254], [2, 129, 253]], dtype=np.uint8) img_r = np.array([[255, 127, 0], [254, 128, 1], [253, 126, 2]], dtype=np.uint8) assert_array_equal(mmcv.iminvert(img), img_r) def test_solarize(self): img = np.array([[0, 128, 255], [1, 127, 254], [2, 129, 253]], dtype=np.uint8) img_r = np.array([[0, 127, 0], [1, 127, 1], [2, 126, 2]], dtype=np.uint8) assert_array_equal(mmcv.solarize(img), img_r) img_r = np.array([[0, 127, 0], [1, 128, 1], [2, 126, 2]], dtype=np.uint8) assert_array_equal(mmcv.solarize(img, 100), img_r) def test_posterize(self): img = np.array([[0, 128, 255], [1, 127, 254], [2, 129, 253]], dtype=np.uint8) img_r = np.array([[0, 128, 128], [0, 0, 128], [0, 128, 128]], dtype=np.uint8) assert_array_equal(mmcv.posterize(img, 1), img_r) img_r = np.array([[0, 128, 224], [0, 96, 224], [0, 128, 224]], dtype=np.uint8) assert_array_equal(mmcv.posterize(img, 3), img_r) def test_adjust_color(self): img = np.array([[0, 128, 255], [1, 127, 254], [2, 129, 253]], dtype=np.uint8) img = np.stack([img, img, img], axis=-1) assert_array_equal(mmcv.adjust_color(img), img) img_gray = mmcv.bgr2gray(img) img_r = np.stack([img_gray, img_gray, img_gray], axis=-1) assert_array_equal(mmcv.adjust_color(img, 0), img_r) assert_array_equal(mmcv.adjust_color(img, 0, 1), img_r) assert_array_equal( mmcv.adjust_color(img, 0.5, 0.5), np.round(np.clip((img * 0.5 + img_r * 0.5), 0, 255)).astype(img.dtype)) assert_array_equal( mmcv.adjust_color(img, 1, 1.5), np.round(np.clip(img * 1 + img_r * 1.5, 0, 255)).astype(img.dtype)) assert_array_equal( mmcv.adjust_color(img, 0.8, -0.6, gamma=2), np.round(np.clip(img * 0.8 - 0.6 * img_r + 2, 0, 255)).astype(img.dtype)) assert_array_equal( mmcv.adjust_color(img, 0.8, -0.6, gamma=-0.6), np.round(np.clip(img * 0.8 - 0.6 * img_r - 0.6, 0, 255)).astype(img.dtype)) # test float type of image img = img.astype(np.float32) assert_array_equal( np.round(mmcv.adjust_color(img, 0.8, -0.6, gamma=-0.6)), np.round(np.clip(img * 0.8 - 0.6 * img_r - 0.6, 0, 255))) def test_imequalize(self, nb_rand_test=100): def _imequalize(img): # equalize the image using PIL.ImageOps.equalize from PIL import ImageOps, Image img = Image.fromarray(img) equalized_img = np.asarray(ImageOps.equalize(img)) return equalized_img img = np.array([[0, 128, 255], [1, 127, 254], [2, 129, 253]], dtype=np.uint8) img = np.stack([img, img, img], axis=-1) equalized_img = mmcv.imequalize(img) assert_array_equal(equalized_img, _imequalize(img)) # test equalize with case step=0 img = np.array([[0, 0, 0], [120, 120, 120], [255, 255, 255]], dtype=np.uint8) img = np.stack([img, img, img], axis=-1) assert_array_equal(mmcv.imequalize(img), img) # test equalize with randomly sampled image. for _ in range(nb_rand_test): img = np.clip(np.random.normal(0, 1, (256, 256, 3)) * 260, 0, 255).astype(np.uint8) equalized_img = mmcv.imequalize(img) assert_array_equal(equalized_img, _imequalize(img)) def test_adjust_brightness(self, nb_rand_test=100): def _adjust_brightness(img, factor): # adjust the brightness of image using # PIL.ImageEnhance.Brightness from PIL.ImageEnhance import Brightness from PIL import Image img = Image.fromarray(img) brightened_img = Brightness(img).enhance(factor) return np.asarray(brightened_img) img = np.array([[0, 128, 255], [1, 127, 254], [2, 129, 253]], dtype=np.uint8) img = np.stack([img, img, img], axis=-1) # test case with factor 1.0 assert_array_equal(mmcv.adjust_brightness(img, 1.), img) # test case with factor 0.0 assert_array_equal(mmcv.adjust_brightness(img, 0.), np.zeros_like(img)) # test adjust_brightness with randomly sampled images and factors. for _ in range(nb_rand_test): img = np.clip( np.random.uniform(0, 1, (1000, 1200, 3)) * 260, 0, 255).astype(np.uint8) factor = np.random.uniform() + np.random.choice([0, 1]) np.testing.assert_allclose( mmcv.adjust_brightness(img, factor).astype(np.int32), _adjust_brightness(img, factor).astype(np.int32), rtol=0, atol=1) def test_adjust_contrast(self, nb_rand_test=100): def _adjust_contrast(img, factor): from PIL.ImageEnhance import Contrast from PIL import Image # Image.fromarray defaultly supports RGB, not BGR. # convert from BGR to RGB img = Image.fromarray(img[..., ::-1], mode='RGB') contrasted_img = Contrast(img).enhance(factor) # convert from RGB to BGR return np.asarray(contrasted_img)[..., ::-1] img = np.array([[0, 128, 255], [1, 127, 254], [2, 129, 253]], dtype=np.uint8) img = np.stack([img, img, img], axis=-1) # test case with factor 1.0 assert_array_equal(mmcv.adjust_contrast(img, 1.), img) # test case with factor 0.0 assert_array_equal( mmcv.adjust_contrast(img, 0.), _adjust_contrast(img, 0.)) # test adjust_contrast with randomly sampled images and factors. for _ in range(nb_rand_test): img = np.clip( np.random.uniform(0, 1, (1200, 1000, 3)) * 260, 0, 255).astype(np.uint8) factor = np.random.uniform() + np.random.choice([0, 1]) # Note the gap (less_equal 1) between PIL.ImageEnhance.Contrast # and mmcv.adjust_contrast comes from the gap that converts from # a color image to gray image using mmcv or PIL. np.testing.assert_allclose( mmcv.adjust_contrast(img, factor).astype(np.int32), _adjust_contrast(img, factor).astype(np.int32), rtol=0, atol=1) def test_auto_contrast(self, nb_rand_test=100): def _auto_contrast(img, cutoff=0): from PIL.ImageOps import autocontrast from PIL import Image # Image.fromarray defaultly supports RGB, not BGR. # convert from BGR to RGB img = Image.fromarray(img[..., ::-1], mode='RGB') contrasted_img = autocontrast(img, cutoff) # convert from RGB to BGR return np.asarray(contrasted_img)[..., ::-1] img = np.array([[0, 128, 255], [1, 127, 254], [2, 129, 253]], dtype=np.uint8) img = np.stack([img, img, img], axis=-1) # test case without cut-off assert_array_equal(mmcv.auto_contrast(img), _auto_contrast(img)) # test case with cut-off as int assert_array_equal( mmcv.auto_contrast(img, 10), _auto_contrast(img, 10)) # test case with cut-off as float assert_array_equal( mmcv.auto_contrast(img, 12.5), _auto_contrast(img, 12.5)) # test case with cut-off as tuple assert_array_equal( mmcv.auto_contrast(img, (10, 10)), _auto_contrast(img, 10)) # test case with cut-off with sum over 100 assert_array_equal( mmcv.auto_contrast(img, 60), _auto_contrast(img, 60)) # test auto_contrast with randomly sampled images and factors. for _ in range(nb_rand_test): img = np.clip( np.random.uniform(0, 1, (1200, 1000, 3)) * 260, 0, 255).astype(np.uint8) # cut-offs are not set as tuple since in `build.yml`, pillow 6.2.2 # is installed, which does not support setting low cut-off and high # cut-off differently. # With pillow above 8.0.0, cutoff can be set as tuple cutoff = np.random.rand() * 100 assert_array_equal( mmcv.auto_contrast(img, cutoff), _auto_contrast(img, cutoff)) def test_adjust_sharpness(self, nb_rand_test=100): def _adjust_sharpness(img, factor): # adjust the sharpness of image using # PIL.ImageEnhance.Sharpness from PIL.ImageEnhance import Sharpness from PIL import Image img = Image.fromarray(img) sharpened_img = Sharpness(img).enhance(factor) return np.asarray(sharpened_img) img = np.array([[0, 128, 255], [1, 127, 254], [2, 129, 253]], dtype=np.uint8) img = np.stack([img, img, img], axis=-1) # test case with invalid type of kernel with pytest.raises(AssertionError): mmcv.adjust_sharpness(img, 1., kernel=1.) # test case with invalid shape of kernel kernel = np.ones((3, 3, 3)) with pytest.raises(AssertionError): mmcv.adjust_sharpness(img, 1., kernel=kernel) # test case with all-zero kernel, factor 0.0 kernel = np.zeros((3, 3)) assert_array_equal( mmcv.adjust_sharpness(img, 0., kernel=kernel), np.zeros_like(img)) # test case with factor 1.0 assert_array_equal(mmcv.adjust_sharpness(img, 1.), img) # test adjust_sharpness with randomly sampled images and factors. for _ in range(nb_rand_test): img = np.clip( np.random.uniform(0, 1, (1000, 1200, 3)) * 260, 0, 255).astype(np.uint8) factor = np.random.uniform() # Note the gap between PIL.ImageEnhance.Sharpness and # mmcv.adjust_sharpness mainly comes from the difference ways of # handling img edges when applying filters np.testing.assert_allclose( mmcv.adjust_sharpness(img, factor).astype(np.int32)[1:-1, 1:-1], _adjust_sharpness(img, factor).astype(np.int32)[1:-1, 1:-1], rtol=0, atol=1) def test_adjust_lighting(self): img = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]).astype(np.uint8) img = np.stack([img, img, img], axis=-1) # eigval and eigvec must be np.ndarray with pytest.raises(AssertionError): mmcv.adjust_lighting(img, 1, np.ones((3, 1))) with pytest.raises(AssertionError): mmcv.adjust_lighting(img, np.array([1]), (1, 1, 1)) # we must have the same number of eigval and eigvec with pytest.raises(AssertionError): mmcv.adjust_lighting(img, np.array([1]), np.eye(2)) with pytest.raises(AssertionError): mmcv.adjust_lighting(img, np.array([1]), np.array([1])) img_adjusted = mmcv.adjust_lighting( img, np.random.normal(0, 1, 2), np.random.normal(0, 1, (3, 2)), alphastd=0.) assert_array_equal(img_adjusted, img) def test_lut_transform(self): lut_table = np.array(list(range(256))) # test assertion image values should between 0 and 255. with pytest.raises(AssertionError): mmcv.lut_transform(np.array([256]), lut_table) with pytest.raises(AssertionError): mmcv.lut_transform(np.array([-1]), lut_table) # test assertion lut_table should be ndarray with shape (256, ) with pytest.raises(AssertionError): mmcv.lut_transform(np.array([0]), list(range(256))) with pytest.raises(AssertionError): mmcv.lut_transform(np.array([1]), np.array(list(range(257)))) img = mmcv.lut_transform(self.img, lut_table) baseline = cv2.LUT(self.img, lut_table) assert np.allclose(img, baseline) input_img = np.array( [[[0, 128, 255], [255, 128, 0]], [[0, 128, 255], [255, 128, 0]]], dtype=float) img = mmcv.lut_transform(input_img, lut_table) baseline = cv2.LUT(np.array(input_img, dtype=np.uint8), lut_table) assert np.allclose(img, baseline) input_img = np.random.randint(0, 256, size=(7, 8, 9, 10, 11)) img = mmcv.lut_transform(input_img, lut_table) baseline = cv2.LUT(np.array(input_img, dtype=np.uint8), lut_table) assert np.allclose(img, baseline) def test_clahe(self): def _clahe(img, clip_limit=40.0, tile_grid_size=(8, 8)): clahe = cv2.createCLAHE(clip_limit, tile_grid_size) return clahe.apply(np.array(img, dtype=np.uint8)) # test assertion image should have the right shape with pytest.raises(AssertionError): mmcv.clahe(self.img) # test assertion tile_grid_size should be a tuple with 2 integers with pytest.raises(AssertionError): mmcv.clahe(self.img[:, :, 0], tile_grid_size=(8.0, 8.0)) with pytest.raises(AssertionError): mmcv.clahe(self.img[:, :, 0], tile_grid_size=(8, 8, 8)) with pytest.raises(AssertionError): mmcv.clahe(self.img[:, :, 0], tile_grid_size=[8, 8]) # test with different channels for i in range(self.img.shape[-1]): img = mmcv.clahe(self.img[:, :, i]) img_std = _clahe(self.img[:, :, i]) assert np.allclose(img, img_std) assert id(img) != id(self.img[:, :, i]) assert id(img_std) != id(self.img[:, :, i]) # test case with clip_limit=1.2 for i in range(self.img.shape[-1]): img = mmcv.clahe(self.img[:, :, i], 1.2) img_std = _clahe(self.img[:, :, i], 1.2) assert np.allclose(img, img_std) assert id(img) != id(self.img[:, :, i]) assert id(img_std) != id(self.img[:, :, i]) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_load_model_zoo.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import os import os.path as osp from unittest.mock import patch import pytest import mmcv from mmcv.runner.checkpoint import (DEFAULT_CACHE_DIR, ENV_MMCV_HOME, ENV_XDG_CACHE_HOME, _get_mmcv_home, _load_checkpoint, get_deprecated_model_names, get_external_models) from mmcv.utils import TORCH_VERSION @patch('mmcv.__path__', [osp.join(osp.dirname(__file__), 'data/')]) def test_set_mmcv_home(): os.environ.pop(ENV_MMCV_HOME, None) mmcv_home = osp.join(osp.dirname(__file__), 'data/model_zoo/mmcv_home/') os.environ[ENV_MMCV_HOME] = mmcv_home assert _get_mmcv_home() == mmcv_home @patch('mmcv.__path__', [osp.join(osp.dirname(__file__), 'data/')]) def test_default_mmcv_home(): os.environ.pop(ENV_MMCV_HOME, None) os.environ.pop(ENV_XDG_CACHE_HOME, None) assert _get_mmcv_home() == os.path.expanduser( os.path.join(DEFAULT_CACHE_DIR, 'mmcv')) model_urls = get_external_models() assert model_urls == mmcv.load( osp.join(mmcv.__path__[0], 'model_zoo/open_mmlab.json')) @patch('mmcv.__path__', [osp.join(osp.dirname(__file__), 'data/')]) def test_get_external_models(): os.environ.pop(ENV_MMCV_HOME, None) mmcv_home = osp.join(osp.dirname(__file__), 'data/model_zoo/mmcv_home/') os.environ[ENV_MMCV_HOME] = mmcv_home ext_urls = get_external_models() assert ext_urls == { 'train': 'https://localhost/train.pth', 'test': 'test.pth', 'val': 'val.pth', 'train_empty': 'train.pth' } @patch('mmcv.__path__', [osp.join(osp.dirname(__file__), 'data/')]) def test_get_deprecated_models(): os.environ.pop(ENV_MMCV_HOME, None) mmcv_home = osp.join(osp.dirname(__file__), 'data/model_zoo/mmcv_home/') os.environ[ENV_MMCV_HOME] = mmcv_home dep_urls = get_deprecated_model_names() assert dep_urls == { 'train_old': 'train', 'test_old': 'test', } def load_from_http(url, map_location=None): return 'url:' + url def load_url(url, map_location=None, model_dir=None): return load_from_http(url) def load(filepath, map_location=None): return 'local:' + filepath @patch('mmcv.__path__', [osp.join(osp.dirname(__file__), 'data/')]) @patch('mmcv.runner.checkpoint.load_from_http', load_from_http) @patch('mmcv.runner.checkpoint.load_url', load_url) @patch('torch.load', load) def test_load_external_url(): # test modelzoo:// url = _load_checkpoint('modelzoo://resnet50') if TORCH_VERSION < '1.9.0': assert url == ('url:https://download.pytorch.org/models/resnet50-19c8e' '357.pth') else: # filename of checkpoint is renamed in torch1.9.0 assert url == ('url:https://download.pytorch.org/models/resnet50-0676b' 'a61.pth') # test torchvision:// url = _load_checkpoint('torchvision://resnet50') if TORCH_VERSION < '1.9.0': assert url == ('url:https://download.pytorch.org/models/resnet50-19c8e' '357.pth') else: # filename of checkpoint is renamed in torch1.9.0 assert url == ('url:https://download.pytorch.org/models/resnet50-0676b' 'a61.pth') # test open-mmlab:// with default MMCV_HOME os.environ.pop(ENV_MMCV_HOME, None) os.environ.pop(ENV_XDG_CACHE_HOME, None) url = _load_checkpoint('open-mmlab://train') assert url == 'url:https://localhost/train.pth' # test open-mmlab:// with deprecated model name os.environ.pop(ENV_MMCV_HOME, None) os.environ.pop(ENV_XDG_CACHE_HOME, None) with pytest.warns( Warning, match='open-mmlab://train_old is deprecated in favor of ' 'open-mmlab://train'): url = _load_checkpoint('open-mmlab://train_old') assert url == 'url:https://localhost/train.pth' # test openmmlab:// with deprecated model name os.environ.pop(ENV_MMCV_HOME, None) os.environ.pop(ENV_XDG_CACHE_HOME, None) with pytest.warns( Warning, match='openmmlab://train_old is deprecated in favor of ' 'openmmlab://train'): url = _load_checkpoint('openmmlab://train_old') assert url == 'url:https://localhost/train.pth' # test open-mmlab:// with user-defined MMCV_HOME os.environ.pop(ENV_MMCV_HOME, None) mmcv_home = osp.join(osp.dirname(__file__), 'data/model_zoo/mmcv_home') os.environ[ENV_MMCV_HOME] = mmcv_home url = _load_checkpoint('open-mmlab://train') assert url == 'url:https://localhost/train.pth' with pytest.raises(FileNotFoundError, match='train.pth can not be found.'): _load_checkpoint('open-mmlab://train_empty') url = _load_checkpoint('open-mmlab://test') assert url == f'local:{osp.join(_get_mmcv_home(), "test.pth")}' url = _load_checkpoint('open-mmlab://val') assert url == f'local:{osp.join(_get_mmcv_home(), "val.pth")}' # test http:// https:// url = _load_checkpoint('http://localhost/train.pth') assert url == 'url:http://localhost/train.pth' # test local file with pytest.raises(FileNotFoundError, match='train.pth can not be found.'): _load_checkpoint('train.pth') url = _load_checkpoint(osp.join(_get_mmcv_home(), 'test.pth')) assert url == f'local:{osp.join(_get_mmcv_home(), "test.pth")}' ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_active_rotated_filter.py ================================================ import numpy as np import pytest import torch from mmcv.ops import active_rotated_filter np_feature = np.array([[[[[-1.4934e-01, 1.1341e+00, -1.6241e-01], [-1.0986e+00, -1.1463e+00, -1.3176e+00], [1.4808e+00, 7.6572e-01, -1.4548e+00]]]], [[[[1.9370e+00, 6.2799e-01, 2.5834e-02], [-1.4242e+00, 7.6566e-01, 1.0015e+00], [9.8669e-01, 4.1356e-01, 6.1068e-01]]]], [[[[1.4565e+00, 1.4960e+00, 2.4339e-01], [-2.2484e-01, 7.5942e-01, -8.1184e-01], [-1.7077e+00, 1.0658e+00, 3.8311e-01]]]], [[[[8.4734e-01, 1.0904e+00, 2.4356e+00], [9.5822e-01, 2.2260e-01, -2.4450e-01], [-1.5078e+00, 7.0902e-02, -1.5921e+00]]]], [[[[2.1173e+00, -7.3524e-01, 1.8888e+00], [1.0169e+00, 4.7033e-01, -1.0875e+00], [-1.0736e+00, -5.2245e-01, -2.8733e-01]]]], [[[[-5.6433e-01, 1.5835e+00, -1.5826e+00], [-8.8974e-01, -4.3128e-01, -2.2423e-01], [1.6552e-03, -1.7292e+00, 2.6639e-01]]]], [[[[-1.2951e-01, 1.3493e+00, -1.9329e+00], [5.6248e-01, -5.1189e-01, 1.3614e+00], [3.3680e-01, -8.7148e-01, 5.0592e-01]]]], [[[[1.6781e-02, -8.3929e-01, 1.2060e+00], [-1.0764e+00, 4.7821e-01, 1.5342e+00], [-4.4542e-01, -1.8606e+00, 3.0827e-01]]]]]) np_indices = np.array([[[[1, 2, 3, 6, 9, 8, 7, 4], [2, 3, 6, 9, 8, 7, 4, 1], [3, 6, 9, 8, 7, 4, 1, 2]], [[4, 1, 2, 3, 6, 9, 8, 7], [5, 5, 5, 5, 5, 5, 5, 5], [6, 9, 8, 7, 4, 1, 2, 3]], [[7, 4, 1, 2, 3, 6, 9, 8], [8, 7, 4, 1, 2, 3, 6, 9], [9, 8, 7, 4, 1, 2, 3, 6]]]]) expected_output = np.array([[[[-1.4934e-01, 1.1341e+00, -1.6241e-01], [-1.0986e+00, -1.1463e+00, -1.3176e+00], [1.4808e+00, 7.6572e-01, -1.4548e+00]]], [[[-1.0986e+00, -1.4934e-01, 1.1341e+00], [1.4808e+00, -1.1463e+00, -1.6241e-01], [7.6572e-01, -1.4548e+00, -1.3176e+00]]], [[[1.4808e+00, -1.0986e+00, -1.4934e-01], [7.6572e-01, -1.1463e+00, 1.1341e+00], [-1.4548e+00, -1.3176e+00, -1.6241e-01]]], [[[7.6572e-01, 1.4808e+00, -1.0986e+00], [-1.4548e+00, -1.1463e+00, -1.4934e-01], [-1.3176e+00, -1.6241e-01, 1.1341e+00]]], [[[-1.4548e+00, 7.6572e-01, 1.4808e+00], [-1.3176e+00, -1.1463e+00, -1.0986e+00], [-1.6241e-01, 1.1341e+00, -1.4934e-01]]], [[[-1.3176e+00, -1.4548e+00, 7.6572e-01], [-1.6241e-01, -1.1463e+00, 1.4808e+00], [1.1341e+00, -1.4934e-01, -1.0986e+00]]], [[[-1.6241e-01, -1.3176e+00, -1.4548e+00], [1.1341e+00, -1.1463e+00, 7.6572e-01], [-1.4934e-01, -1.0986e+00, 1.4808e+00]]], [[[1.1341e+00, -1.6241e-01, -1.3176e+00], [-1.4934e-01, -1.1463e+00, -1.4548e+00], [-1.0986e+00, 1.4808e+00, 7.6572e-01]]], [[[1.9370e+00, 6.2799e-01, 2.5834e-02], [-1.4242e+00, 7.6566e-01, 1.0015e+00], [9.8669e-01, 4.1356e-01, 6.1068e-01]]], [[[-1.4242e+00, 1.9370e+00, 6.2799e-01], [9.8669e-01, 7.6566e-01, 2.5834e-02], [4.1356e-01, 6.1068e-01, 1.0015e+00]]], [[[9.8669e-01, -1.4242e+00, 1.9370e+00], [4.1356e-01, 7.6566e-01, 6.2799e-01], [6.1068e-01, 1.0015e+00, 2.5834e-02]]], [[[4.1356e-01, 9.8669e-01, -1.4242e+00], [6.1068e-01, 7.6566e-01, 1.9370e+00], [1.0015e+00, 2.5834e-02, 6.2799e-01]]], [[[6.1068e-01, 4.1356e-01, 9.8669e-01], [1.0015e+00, 7.6566e-01, -1.4242e+00], [2.5834e-02, 6.2799e-01, 1.9370e+00]]], [[[1.0015e+00, 6.1068e-01, 4.1356e-01], [2.5834e-02, 7.6566e-01, 9.8669e-01], [6.2799e-01, 1.9370e+00, -1.4242e+00]]], [[[2.5834e-02, 1.0015e+00, 6.1068e-01], [6.2799e-01, 7.6566e-01, 4.1356e-01], [1.9370e+00, -1.4242e+00, 9.8669e-01]]], [[[6.2799e-01, 2.5834e-02, 1.0015e+00], [1.9370e+00, 7.6566e-01, 6.1068e-01], [-1.4242e+00, 9.8669e-01, 4.1356e-01]]], [[[1.4565e+00, 1.4960e+00, 2.4339e-01], [-2.2484e-01, 7.5942e-01, -8.1184e-01], [-1.7077e+00, 1.0658e+00, 3.8311e-01]]], [[[-2.2484e-01, 1.4565e+00, 1.4960e+00], [-1.7077e+00, 7.5942e-01, 2.4339e-01], [1.0658e+00, 3.8311e-01, -8.1184e-01]]], [[[-1.7077e+00, -2.2484e-01, 1.4565e+00], [1.0658e+00, 7.5942e-01, 1.4960e+00], [3.8311e-01, -8.1184e-01, 2.4339e-01]]], [[[1.0658e+00, -1.7077e+00, -2.2484e-01], [3.8311e-01, 7.5942e-01, 1.4565e+00], [-8.1184e-01, 2.4339e-01, 1.4960e+00]]], [[[3.8311e-01, 1.0658e+00, -1.7077e+00], [-8.1184e-01, 7.5942e-01, -2.2484e-01], [2.4339e-01, 1.4960e+00, 1.4565e+00]]], [[[-8.1184e-01, 3.8311e-01, 1.0658e+00], [2.4339e-01, 7.5942e-01, -1.7077e+00], [1.4960e+00, 1.4565e+00, -2.2484e-01]]], [[[2.4339e-01, -8.1184e-01, 3.8311e-01], [1.4960e+00, 7.5942e-01, 1.0658e+00], [1.4565e+00, -2.2484e-01, -1.7077e+00]]], [[[1.4960e+00, 2.4339e-01, -8.1184e-01], [1.4565e+00, 7.5942e-01, 3.8311e-01], [-2.2484e-01, -1.7077e+00, 1.0658e+00]]], [[[8.4734e-01, 1.0904e+00, 2.4356e+00], [9.5822e-01, 2.2260e-01, -2.4450e-01], [-1.5078e+00, 7.0902e-02, -1.5921e+00]]], [[[9.5822e-01, 8.4734e-01, 1.0904e+00], [-1.5078e+00, 2.2260e-01, 2.4356e+00], [7.0902e-02, -1.5921e+00, -2.4450e-01]]], [[[-1.5078e+00, 9.5822e-01, 8.4734e-01], [7.0902e-02, 2.2260e-01, 1.0904e+00], [-1.5921e+00, -2.4450e-01, 2.4356e+00]]], [[[7.0902e-02, -1.5078e+00, 9.5822e-01], [-1.5921e+00, 2.2260e-01, 8.4734e-01], [-2.4450e-01, 2.4356e+00, 1.0904e+00]]], [[[-1.5921e+00, 7.0902e-02, -1.5078e+00], [-2.4450e-01, 2.2260e-01, 9.5822e-01], [2.4356e+00, 1.0904e+00, 8.4734e-01]]], [[[-2.4450e-01, -1.5921e+00, 7.0902e-02], [2.4356e+00, 2.2260e-01, -1.5078e+00], [1.0904e+00, 8.4734e-01, 9.5822e-01]]], [[[2.4356e+00, -2.4450e-01, -1.5921e+00], [1.0904e+00, 2.2260e-01, 7.0902e-02], [8.4734e-01, 9.5822e-01, -1.5078e+00]]], [[[1.0904e+00, 2.4356e+00, -2.4450e-01], [8.4734e-01, 2.2260e-01, -1.5921e+00], [9.5822e-01, -1.5078e+00, 7.0902e-02]]], [[[2.1173e+00, -7.3524e-01, 1.8888e+00], [1.0169e+00, 4.7033e-01, -1.0875e+00], [-1.0736e+00, -5.2245e-01, -2.8733e-01]]], [[[1.0169e+00, 2.1173e+00, -7.3524e-01], [-1.0736e+00, 4.7033e-01, 1.8888e+00], [-5.2245e-01, -2.8733e-01, -1.0875e+00]]], [[[-1.0736e+00, 1.0169e+00, 2.1173e+00], [-5.2245e-01, 4.7033e-01, -7.3524e-01], [-2.8733e-01, -1.0875e+00, 1.8888e+00]]], [[[-5.2245e-01, -1.0736e+00, 1.0169e+00], [-2.8733e-01, 4.7033e-01, 2.1173e+00], [-1.0875e+00, 1.8888e+00, -7.3524e-01]]], [[[-2.8733e-01, -5.2245e-01, -1.0736e+00], [-1.0875e+00, 4.7033e-01, 1.0169e+00], [1.8888e+00, -7.3524e-01, 2.1173e+00]]], [[[-1.0875e+00, -2.8733e-01, -5.2245e-01], [1.8888e+00, 4.7033e-01, -1.0736e+00], [-7.3524e-01, 2.1173e+00, 1.0169e+00]]], [[[1.8888e+00, -1.0875e+00, -2.8733e-01], [-7.3524e-01, 4.7033e-01, -5.2245e-01], [2.1173e+00, 1.0169e+00, -1.0736e+00]]], [[[-7.3524e-01, 1.8888e+00, -1.0875e+00], [2.1173e+00, 4.7033e-01, -2.8733e-01], [1.0169e+00, -1.0736e+00, -5.2245e-01]]], [[[-5.6433e-01, 1.5835e+00, -1.5826e+00], [-8.8974e-01, -4.3128e-01, -2.2423e-01], [1.6552e-03, -1.7292e+00, 2.6639e-01]]], [[[-8.8974e-01, -5.6433e-01, 1.5835e+00], [1.6552e-03, -4.3128e-01, -1.5826e+00], [-1.7292e+00, 2.6639e-01, -2.2423e-01]]], [[[1.6552e-03, -8.8974e-01, -5.6433e-01], [-1.7292e+00, -4.3128e-01, 1.5835e+00], [2.6639e-01, -2.2423e-01, -1.5826e+00]]], [[[-1.7292e+00, 1.6552e-03, -8.8974e-01], [2.6639e-01, -4.3128e-01, -5.6433e-01], [-2.2423e-01, -1.5826e+00, 1.5835e+00]]], [[[2.6639e-01, -1.7292e+00, 1.6552e-03], [-2.2423e-01, -4.3128e-01, -8.8974e-01], [-1.5826e+00, 1.5835e+00, -5.6433e-01]]], [[[-2.2423e-01, 2.6639e-01, -1.7292e+00], [-1.5826e+00, -4.3128e-01, 1.6552e-03], [1.5835e+00, -5.6433e-01, -8.8974e-01]]], [[[-1.5826e+00, -2.2423e-01, 2.6639e-01], [1.5835e+00, -4.3128e-01, -1.7292e+00], [-5.6433e-01, -8.8974e-01, 1.6552e-03]]], [[[1.5835e+00, -1.5826e+00, -2.2423e-01], [-5.6433e-01, -4.3128e-01, 2.6639e-01], [-8.8974e-01, 1.6552e-03, -1.7292e+00]]], [[[-1.2951e-01, 1.3493e+00, -1.9329e+00], [5.6248e-01, -5.1189e-01, 1.3614e+00], [3.3680e-01, -8.7148e-01, 5.0592e-01]]], [[[5.6248e-01, -1.2951e-01, 1.3493e+00], [3.3680e-01, -5.1189e-01, -1.9329e+00], [-8.7148e-01, 5.0592e-01, 1.3614e+00]]], [[[3.3680e-01, 5.6248e-01, -1.2951e-01], [-8.7148e-01, -5.1189e-01, 1.3493e+00], [5.0592e-01, 1.3614e+00, -1.9329e+00]]], [[[-8.7148e-01, 3.3680e-01, 5.6248e-01], [5.0592e-01, -5.1189e-01, -1.2951e-01], [1.3614e+00, -1.9329e+00, 1.3493e+00]]], [[[5.0592e-01, -8.7148e-01, 3.3680e-01], [1.3614e+00, -5.1189e-01, 5.6248e-01], [-1.9329e+00, 1.3493e+00, -1.2951e-01]]], [[[1.3614e+00, 5.0592e-01, -8.7148e-01], [-1.9329e+00, -5.1189e-01, 3.3680e-01], [1.3493e+00, -1.2951e-01, 5.6248e-01]]], [[[-1.9329e+00, 1.3614e+00, 5.0592e-01], [1.3493e+00, -5.1189e-01, -8.7148e-01], [-1.2951e-01, 5.6248e-01, 3.3680e-01]]], [[[1.3493e+00, -1.9329e+00, 1.3614e+00], [-1.2951e-01, -5.1189e-01, 5.0592e-01], [5.6248e-01, 3.3680e-01, -8.7148e-01]]], [[[1.6781e-02, -8.3929e-01, 1.2060e+00], [-1.0764e+00, 4.7821e-01, 1.5342e+00], [-4.4542e-01, -1.8606e+00, 3.0827e-01]]], [[[-1.0764e+00, 1.6781e-02, -8.3929e-01], [-4.4542e-01, 4.7821e-01, 1.2060e+00], [-1.8606e+00, 3.0827e-01, 1.5342e+00]]], [[[-4.4542e-01, -1.0764e+00, 1.6781e-02], [-1.8606e+00, 4.7821e-01, -8.3929e-01], [3.0827e-01, 1.5342e+00, 1.2060e+00]]], [[[-1.8606e+00, -4.4542e-01, -1.0764e+00], [3.0827e-01, 4.7821e-01, 1.6781e-02], [1.5342e+00, 1.2060e+00, -8.3929e-01]]], [[[3.0827e-01, -1.8606e+00, -4.4542e-01], [1.5342e+00, 4.7821e-01, -1.0764e+00], [1.2060e+00, -8.3929e-01, 1.6781e-02]]], [[[1.5342e+00, 3.0827e-01, -1.8606e+00], [1.2060e+00, 4.7821e-01, -4.4542e-01], [-8.3929e-01, 1.6781e-02, -1.0764e+00]]], [[[1.2060e+00, 1.5342e+00, 3.0827e-01], [-8.3929e-01, 4.7821e-01, -1.8606e+00], [1.6781e-02, -1.0764e+00, -4.4542e-01]]], [[[-8.3929e-01, 1.2060e+00, 1.5342e+00], [1.6781e-02, 4.7821e-01, 3.0827e-01], [-1.0764e+00, -4.4542e-01, -1.8606e+00]]]]) expected_grad = np.array([[[[[8., 8., 8.], [8., 8., 8.], [8., 8., 8.]]]], [[[[8., 8., 8.], [8., 8., 8.], [8., 8., 8.]]]], [[[[8., 8., 8.], [8., 8., 8.], [8., 8., 8.]]]], [[[[8., 8., 8.], [8., 8., 8.], [8., 8., 8.]]]], [[[[8., 8., 8.], [8., 8., 8.], [8., 8., 8.]]]], [[[[8., 8., 8.], [8., 8., 8.], [8., 8., 8.]]]], [[[[8., 8., 8.], [8., 8., 8.], [8., 8., 8.]]]], [[[[8., 8., 8.], [8., 8., 8.], [8., 8., 8.]]]]]) @pytest.mark.parametrize('device', [ 'cpu', pytest.param( 'cuda', marks=pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support')), ]) def test_active_rotated_filter(device): feature = torch.tensor( np_feature, dtype=torch.float, device=device, requires_grad=True) indices = torch.tensor(np_indices, dtype=torch.int, device=device) output = active_rotated_filter(feature, indices) output.backward(torch.ones_like(output)) assert np.allclose(output.data.cpu().numpy(), expected_output, atol=1e-3) assert np.allclose( feature.grad.data.cpu().numpy(), expected_grad, atol=1e-3) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_assign_score_withk.py ================================================ import pytest import torch from mmcv.ops import assign_score_withk @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_paconv_assign_scores(): scores = torch.tensor([[[[0.06947571, 0.6065746], [0.28462553, 0.8378516], [0.7595994, 0.97220325], [0.519155, 0.766185]], [[0.15348864, 0.6051019], [0.21510637, 0.31916398], [0.00236845, 0.5842595], [0.6783676, 0.5216348]]], [[[0.23089725, 0.5568468], [0.7405102, 0.06438422], [0.6887394, 0.22089851], [0.0502342, 0.79228795]], [[0.44883424, 0.15427643], [0.13817799, 0.34856772], [0.7989621, 0.33788306], [0.15699774, 0.7693662]]]]).float().cuda() scores.requires_grad_() points = torch.tensor([[[[0.06001121, 0.92963666, 0.5753327, 0.7251477], [0.53563064, 0.23129565, 0.92366195, 0.44261628]], [[0.5770022, 0.56625944, 0.23560429, 0.11178821], [0.7735967, 0.95678777, 0.25468266, 0.02895975]], [[0.0589869, 0.09017515, 0.5977862, 0.02797985], [0.603862, 0.35991007, 0.85761684, 0.3096559]], [[0.22359002, 0.13983732, 0.5544243, 0.68863827], [0.85646236, 0.75651926, 0.8638947, 0.83600986]], [[0.45424145, 0.27458847, 0.6456112, 0.47162914], [0.15773582, 0.47645122, 0.79964715, 0.3323908]], [[0.8351399, 0.84696376, 0.9431732, 0.29418713], [0.77168906, 0.6996871, 0.19354361, 0.03392768]], [[0.30976456, 0.7074133, 0.581795, 0.976677], [0.69656056, 0.07199162, 0.4708506, 0.29117996]], [[0.5829035, 0.30201727, 0.76556486, 0.0935446], [0.88030535, 0.16129416, 0.9242525, 0.49545723]]], [[[0.50899494, 0.06482804, 0.44939405, 0.37704808], [0.47028124, 0.11969638, 0.62823206, 0.28560323]], [[0.40690207, 0.689753, 0.51636654, 0.23040164], [0.06935787, 0.00488842, 0.22462702, 0.09182382]], [[0.26611632, 0.00184339, 0.7730655, 0.5228131], [0.87776035, 0.77895886, 0.2787183, 0.16620636]], [[0.502574, 0.04039001, 0.5368497, 0.98379374], [0.40973026, 0.3238272, 0.9733018, 0.13988364]], [[0.04586202, 0.20983845, 0.20662665, 0.22270602], [0.60387236, 0.5155574, 0.51237285, 0.6528438]], [[0.45735973, 0.86821306, 0.61054605, 0.8370336], [0.45193362, 0.3734138, 0.7825672, 0.5699416]], [[0.44591594, 0.12447512, 0.09282011, 0.7055254], [0.25223452, 0.46696228, 0.7051136, 0.892151]], [[0.49615085, 0.47321403, 0.93138885, 0.7652197], [0.38766378, 0.30332977, 0.23131835, 0.02863514]]]]).float().cuda() points.requires_grad_() centers = torch.tensor([[[[0.83878064, 0.96658987, 0.8033424, 0.9598312], [0.45035273, 0.8768925, 0.977736, 0.54547966]], [[0.01041394, 0.597893, 0.36212963, 0.4410367], [0.94879234, 0.8372817, 0.21237361, 0.67945415]], [[0.5096087, 0.26401454, 0.60034937, 0.5417416], [0.87591463, 0.546456, 0.4096033, 0.16373193]], [[0.79547447, 0.1482386, 0.12840575, 0.45384115], [0.5640288, 0.944541, 0.5745328, 0.73229736]], [[0.93011934, 0.7406011, 0.62621707, 0.8677915], [0.91563636, 0.3595413, 0.6678378, 0.6085383]], [[0.22431666, 0.65617776, 0.7483924, 0.6263364], [0.30968404, 0.78204364, 0.14899081, 0.09628749]], [[0.73675203, 0.72104895, 0.4648038, 0.6101647], [0.7817645, 0.16572917, 0.3311919, 0.43407398]], [[0.8193154, 0.09559608, 0.05978829, 0.90262103], [0.4256065, 0.8165596, 0.8206446, 0.6604721]]], [[[0.7159653, 0.18600845, 0.21433902, 0.3159626], [0.3921569, 0.33221376, 0.5061177, 0.7961841]], [[0.95338356, 0.04785997, 0.67185795, 0.6538394], [0.4729132, 0.33404195, 0.17750603, 0.8445621]], [[0.6755793, 0.16193843, 0.75943846, 0.92123103], [0.2781859, 0.03114432, 0.710638, 0.52729136]], [[0.8376105, 0.10858494, 0.13208169, 0.365772], [0.5930795, 0.27390373, 0.14036089, 0.170403]], [[0.3479789, 0.89855295, 0.04844379, 0.9871029], [0.29781651, 0.0244137, 0.9179047, 0.8081611]], [[0.12460887, 0.44991326, 0.19382608, 0.35037738], [0.2773472, 0.4362057, 0.36757517, 0.5993509]], [[0.29630446, 0.90046406, 0.5417113, 0.13510644], [0.09623539, 0.04226565, 0.32001644, 0.44358212]], [[0.5274848, 0.82096446, 0.9415489, 0.7123748], [0.7537517, 0.8086482, 0.85345286, 0.7472754]]]]).float().cuda() centers.requires_grad_() knn_idx = torch.tensor([[[6, 7, 4, 6], [2, 4, 2, 4]], [[7, 1, 3, 2], [6, 0, 2, 6]]]).long().cuda() aggregate = 'sum' expected_output = torch.tensor( [[[[-0.08134781, 0.03877336, -0.8212776, -0.2869547], [-0.23378491, -0.24112664, -0.1600166, -0.4121864]], [[-0.05780616, -0.12298299, -0.0370461, -0.07889931], [-0.13956165, -0.02006848, -0.10940295, -0.0293439]], [[0.09284145, 0.58250105, 0.5927749, 0.16774094], [0.27070042, 0.13422406, 0.2617501, 0.23416464]], [[-0.06121218, -0.09561322, -0.20408826, 0.08079343], [0.00944228, 0.03874819, 0.08404065, 0.04041629]]], [[[-0.2110898, -0.13335688, -0.09315082, 0.08512095], [0.09121774, 0.15976946, 0.23994486, 0.14350912]], [[-0.36167958, -0.14891288, -0.64470863, -0.0646704], [-0.28276974, -0.08847666, -0.46904767, 0.20491874]], [[-0.34877953, -0.35533834, -0.25225785, -0.4638189], [-0.1420663, 0.09467781, 0.17088932, 0.22580585]], [[-0.3879708, -0.3991068, 0.05276498, -0.46989647], [0.32522714, -0.02163534, 0.21604237, 0.4346682]]]]).float() # test forward output = assign_score_withk(scores, points, centers, knn_idx, aggregate) assert torch.allclose(output.detach().cpu(), expected_output, atol=1e-6) # test backward loss = output.sum() loss.backward() expected_scores_grad = torch.tensor([[[[0.04288036, -0.18217683], [-0.78873926, 0.7485497], [-0.6866992, 0.05346543], [0.04288036, -0.18217683]], [[-1.1407862, 0.13533896], [-0.06964391, -0.22948086], [-1.1407862, 0.13533896], [-0.06964391, -0.22948086]]], [[[-0.3363995, -2.212181], [-1.1589496, -2.7724311], [-0.9387654, -1.3163853], [-1.4385346, -1.0614843]], [[-0.5048497, 1.4143617], [-0.47332114, 0.6017133], [-0.30974793, 1.1995442], [-0.5048497, 1.4143617]]]]).float() expected_points_grad = torch.tensor( [[[[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0.15585709, 0.15585709, 0.15585709, 0.15585709], [1.1893613, 1.1893613, 1.1893613, 1.1893613]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[1.6530733, 1.6530733, 1.6530733, 1.6530733], [1.8130021, 1.8130021, 1.8130021, 1.8130021]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0.58863074, 0.58863074, 0.58863074, 0.58863074], [1.3727596, 1.3727596, 1.3727596, 1.3727596]], [[0.28462553, 0.28462553, 0.28462553, 0.28462553], [0.8378516, 0.8378516, 0.8378516, 0.8378516]]], [[[0.13817799, 0.13817799, 0.13817799, 0.13817799], [0.34856772, 0.34856772, 0.34856772, 0.34856772]], [[0.7405102, 0.7405102, 0.7405102, 0.7405102], [0.06438422, 0.06438422, 0.06438422, 0.06438422]], [[0.8491963, 0.8491963, 0.8491963, 0.8491963], [1.1301711, 1.1301711, 1.1301711, 1.1301711]], [[0.6887394, 0.6887394, 0.6887394, 0.6887394], [0.22089851, 0.22089851, 0.22089851, 0.22089851]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0.605832, 0.605832, 0.605832, 0.605832], [0.92364264, 0.92364264, 0.92364264, 0.92364264]], [[0.23089725, 0.23089725, 0.23089725, 0.23089725], [0.5568468, 0.5568468, 0.5568468, 0.5568468]]]]).float() expected_centers_grad = torch.tensor( [[[[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[-1.0493311, -1.0493311, -1.0493311, -1.0493311], [-2.0301602, -2.0301602, -2.0301602, -2.0301602]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[-1.6328557, -1.6328557, -1.6328557, -1.6328557], [-3.1828144, -3.1828144, -3.1828144, -3.1828144]], [[0., 0., 0., 0.], [0., 0., 0., 0.]]], [[[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[0., 0., 0., 0.], [0., 0., 0., 0.]], [[-1.5429721, -1.5429721, -1.5429721, -1.5429721], [-1.6100934, -1.6100934, -1.6100934, -1.6100934]], [[-1.7103812, -1.7103812, -1.7103812, -1.7103812], [-1.6344175, -1.6344175, -1.6344175, -1.6344175]]]]).float() assert torch.allclose( scores.grad.detach().cpu(), expected_scores_grad, atol=1e-6) assert torch.allclose( points.grad.detach().cpu(), expected_points_grad, atol=1e-6) assert torch.allclose( centers.grad.detach().cpu(), expected_centers_grad, atol=1e-6) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_ball_query.py ================================================ import pytest import torch from mmcv.ops import ball_query @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_ball_query(): new_xyz = torch.tensor([[[-0.0740, 1.3147, -1.3625], [-2.2769, 2.7817, -0.2334], [-0.4003, 2.4666, -0.5116], [-0.0740, 1.3147, -1.3625], [-0.0740, 1.3147, -1.3625]], [[-2.0289, 2.4952, -0.1708], [-2.0668, 6.0278, -0.4875], [0.4066, 1.4211, -0.2947], [-2.0289, 2.4952, -0.1708], [-2.0289, 2.4952, -0.1708]]]).cuda() xyz = torch.tensor([[[-0.0740, 1.3147, -1.3625], [0.5555, 1.0399, -1.3634], [-0.4003, 2.4666, -0.5116], [-0.5251, 2.4379, -0.8466], [-0.9691, 1.1418, -1.3733], [-0.2232, 0.9561, -1.3626], [-2.2769, 2.7817, -0.2334], [-0.2822, 1.3192, -1.3645], [0.1533, 1.5024, -1.0432], [0.4917, 1.1529, -1.3496]], [[-2.0289, 2.4952, -0.1708], [-0.7188, 0.9956, -0.5096], [-2.0668, 6.0278, -0.4875], [-1.9304, 3.3092, 0.6610], [0.0949, 1.4332, 0.3140], [-1.2879, 2.0008, -0.7791], [-0.7252, 0.9611, -0.6371], [0.4066, 1.4211, -0.2947], [0.3220, 1.4447, 0.3548], [-0.9744, 2.3856, -1.2000]]]).cuda() idx = ball_query(0, 0.2, 5, xyz, new_xyz) expected_idx = torch.tensor([[[0, 0, 0, 0, 0], [6, 6, 6, 6, 6], [2, 2, 2, 2, 2], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0], [2, 2, 2, 2, 2], [7, 7, 7, 7, 7], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]).cuda() assert torch.all(idx == expected_idx) # test dilated ball query idx = ball_query(0.2, 0.4, 5, xyz, new_xyz) expected_idx = torch.tensor([[[0, 5, 7, 0, 0], [6, 6, 6, 6, 6], [2, 3, 2, 2, 2], [0, 5, 7, 0, 0], [0, 5, 7, 0, 0]], [[0, 0, 0, 0, 0], [2, 2, 2, 2, 2], [7, 7, 7, 7, 7], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]).cuda() assert torch.all(idx == expected_idx) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_bbox.py ================================================ import numpy as np import pytest import torch @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') class TestBBox(object): def _test_bbox_overlaps(self, dtype=torch.float): from mmcv.ops import bbox_overlaps b1 = torch.tensor([[1.0, 1.0, 3.0, 4.0], [2.0, 2.0, 3.0, 4.0], [7.0, 7.0, 8.0, 8.0]]).cuda().type(dtype) b2 = torch.tensor([[0.0, 2.0, 2.0, 5.0], [2.0, 1.0, 3.0, 3.0]]).cuda().type(dtype) should_output = np.array([[0.33333334, 0.5], [0.2, 0.5], [0.0, 0.0]]) out = bbox_overlaps(b1, b2, offset=1) assert np.allclose(out.cpu().numpy(), should_output, 1e-2) b1 = torch.tensor([[1.0, 1.0, 3.0, 4.0], [2.0, 2.0, 3.0, 4.0]]).cuda().type(dtype) b2 = torch.tensor([[0.0, 2.0, 2.0, 5.0], [2.0, 1.0, 3.0, 3.0]]).cuda().type(dtype) should_output = np.array([0.33333334, 0.5]) out = bbox_overlaps(b1, b2, aligned=True, offset=1) assert np.allclose(out.cpu().numpy(), should_output, 1e-2) b1 = torch.tensor([[0.0, 0.0, 3.0, 3.0]]).cuda().type(dtype) b1 = torch.tensor([[0.0, 0.0, 3.0, 3.0]]).cuda().type(dtype) b2 = torch.tensor([[4.0, 0.0, 5.0, 3.0], [3.0, 0.0, 4.0, 3.0], [2.0, 0.0, 3.0, 3.0], [1.0, 0.0, 2.0, 3.0]]).cuda().type(dtype) should_output = np.array([0, 0.2, 0.5, 0.5]) out = bbox_overlaps(b1, b2, offset=1) assert np.allclose(out.cpu().numpy(), should_output, 1e-2) def test_bbox_overlaps_float(self): self._test_bbox_overlaps(torch.float) def test_bbox_overlaps_half(self): self._test_bbox_overlaps(torch.half) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_bilinear_grid_sample.py ================================================ import numpy as np import torch import torch.nn.functional as F class TestBilinearGridSample(object): def _test_bilinear_grid_sample(self, dtype=torch.float, align_corners=False, multiplier=1, precision=1e-3): from mmcv.ops.point_sample import bilinear_grid_sample input = torch.rand(1, 1, 20, 20, dtype=dtype) grid = torch.Tensor([[[1, 0, 0], [0, 1, 0]]]) grid = F.affine_grid( grid, (1, 1, 15, 15), align_corners=align_corners).type_as(input) grid *= multiplier out = bilinear_grid_sample(input, grid, align_corners=align_corners) ref_out = F.grid_sample(input, grid, align_corners=align_corners) assert np.allclose(out.data.detach().cpu().numpy(), ref_out.data.detach().cpu().numpy(), precision) def test_bilinear_grid_sample(self): self._test_bilinear_grid_sample(torch.double, False) self._test_bilinear_grid_sample(torch.double, True) self._test_bilinear_grid_sample(torch.float, False) self._test_bilinear_grid_sample(torch.float, True) self._test_bilinear_grid_sample(torch.float, False) self._test_bilinear_grid_sample(torch.float, True, 5) self._test_bilinear_grid_sample(torch.float, False, 10) self._test_bilinear_grid_sample(torch.float, True, -6) self._test_bilinear_grid_sample(torch.float, False, -10) self._test_bilinear_grid_sample(torch.double, True, 5) self._test_bilinear_grid_sample(torch.double, False, 10) self._test_bilinear_grid_sample(torch.double, True, -6) self._test_bilinear_grid_sample(torch.double, False, -10) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_border_align.py ================================================ import copy import numpy as np import pytest import torch # [1,4c,h,w] input_arr = [[[[1., 2., 3., 4.], [5., 6., 7., 8.], [9., 10., 11., 12.]], [[6, 7, 5, 8], [2, 1, 3, 4], [12, 9, 11, 10]], [[-2, -3, 2, 0], [-4, -5, 1, -1], [-1, -1, -1, -1]], [[0, -1, 2, 1], [-4, -3, -2, -1], [-1, -2, -3, -4]]]] # [1,h*w,4] boxes_arr = [[[0, 0, 2, 1], [1, 0, 3, 1], [1, 0, 2, 1], [0, 0, 3, 1], [0, 0, 1, 2], [0, 0, 2, 2], [1, 0, 2, 1], [1, 0, 3, 1], [0, 1, 1, 2], [0, 0, 3, 2], [1, 0, 3, 2], [2, 0, 3, 2]]] output_dict = { # [1,c,h*w,4] for each value, # the output is manually checked for its correctness # pool_size=1 1: [[[[3., 6., 1., 2.], [4., 7., -1., 1.], [3., 7., 1., 2.], [4., 6., -1., 1.], [2., 12., -1., -1.], [3., 12., -1., 2.], [3., 7., 1., 2.], [4., 7., -1., 1.], [6., 12., -1., -2.], [4., 12., -1., 1.], [4., 9., -1., 1.], [4., 11., -1., 1.]]]], # pool_size=2 2: [[[[3., 6., 1., 2.], [4., 7., 1., 1.], [3., 7., 1., 2.], [4., 6., -1., 1.], [2., 12., -1., -1.], [3., 12., -1., 2.], [3., 7., 1., 2.], [4., 7., 1., 1.], [6., 12., -1., -2.], [4., 12., -1., 1.], [4., 9., -1., 1.], [4., 11., -1., 1.]]]], } input_grad_dict = { # [1,4c,h,w] for each value # the grad is manually checked for its correctness # pool_size=1 1: [[[[0., 1., 4., 6.], [0., 1., 0., 0.], [0., 0., 0., 0.]], [[2., 4., 0., 0.], [0., 0., 0., 0.], [4., 1., 1., 0.]], [[0., 0., 0., 0.], [0., 0., 3., 3.], [0., 2., 1., 3.]], [[0., 1., 4., 6.], [0., 0., 0., 0.], [0., 1., 0., 0.]]]], # pool_size=2 2: [[[[0., 1., 4., 6.], [0., 1., 0., 0.], [0., 0., 0., 0.]], [[2., 4., 0., 0.], [0., 0., 0., 0.], [4., 1., 1., 0.]], [[0., 0., 0., 0.], [0., 0., 5., 1.], [0., 2., 1., 3.]], [[0., 1., 4., 6.], [0., 0., 0., 0.], [0., 1., 0., 0.]]]], } def _test_border_align_allclose(device, dtype, pool_size): if not torch.cuda.is_available() and device == 'cuda': pytest.skip('test requires GPU') try: from mmcv.ops import border_align, BorderAlign except ModuleNotFoundError: pytest.skip('BorderAlign op is not successfully compiled') np_input = np.array(input_arr) np_boxes = np.array(boxes_arr) np_output = np.array(output_dict[pool_size]) np_grad = np.array(input_grad_dict[pool_size]) input = torch.tensor( np_input, dtype=dtype, device=device, requires_grad=True) boxes = torch.tensor(np_boxes, dtype=dtype, device=device) # test for border_align input_cp = copy.deepcopy(input) output = border_align(input_cp, boxes, pool_size) output.backward(torch.ones_like(output)) assert np.allclose( output.data.type(dtype).cpu().numpy(), np_output, atol=1e-5) assert np.allclose( input_cp.grad.data.type(dtype).cpu().numpy(), np_grad, atol=1e-5) # test for BorderAlign pool_module = BorderAlign(pool_size) output = pool_module(input, boxes) output.backward(torch.ones_like(output)) assert np.allclose( output.data.type(dtype).cpu().numpy(), np_output, atol=1e-5) assert np.allclose( input.grad.data.type(dtype).cpu().numpy(), np_grad, atol=1e-5) @pytest.mark.parametrize('device', ['cuda']) @pytest.mark.parametrize('dtype', [torch.float, torch.half, torch.double]) @pytest.mark.parametrize('pool_size', [1, 2]) def test_border_align(device, dtype, pool_size): _test_border_align_allclose(device, dtype, pool_size) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_box_iou_rotated.py ================================================ import numpy as np import pytest import torch class TestBoxIoURotated(object): def test_box_iou_rotated_cpu(self): from mmcv.ops import box_iou_rotated np_boxes1 = np.asarray( [[1.0, 1.0, 3.0, 4.0, 0.5], [2.0, 2.0, 3.0, 4.0, 0.6], [7.0, 7.0, 8.0, 8.0, 0.4]], dtype=np.float32) np_boxes2 = np.asarray( [[0.0, 2.0, 2.0, 5.0, 0.3], [2.0, 1.0, 3.0, 3.0, 0.5], [5.0, 5.0, 6.0, 7.0, 0.4]], dtype=np.float32) np_expect_ious = np.asarray( [[0.3708, 0.4351, 0.0000], [0.1104, 0.4487, 0.0424], [0.0000, 0.0000, 0.3622]], dtype=np.float32) np_expect_ious_aligned = np.asarray([0.3708, 0.4487, 0.3622], dtype=np.float32) boxes1 = torch.from_numpy(np_boxes1) boxes2 = torch.from_numpy(np_boxes2) # test cw angle definition ious = box_iou_rotated(boxes1, boxes2) assert np.allclose(ious.cpu().numpy(), np_expect_ious, atol=1e-4) ious = box_iou_rotated(boxes1, boxes2, aligned=True) assert np.allclose( ious.cpu().numpy(), np_expect_ious_aligned, atol=1e-4) # test ccw angle definition boxes1[..., -1] *= -1 boxes2[..., -1] *= -1 ious = box_iou_rotated(boxes1, boxes2, clockwise=False) assert np.allclose(ious.cpu().numpy(), np_expect_ious, atol=1e-4) ious = box_iou_rotated(boxes1, boxes2, aligned=True, clockwise=False) assert np.allclose( ious.cpu().numpy(), np_expect_ious_aligned, atol=1e-4) @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_box_iou_rotated_cuda(self): from mmcv.ops import box_iou_rotated np_boxes1 = np.asarray( [[1.0, 1.0, 3.0, 4.0, 0.5], [2.0, 2.0, 3.0, 4.0, 0.6], [7.0, 7.0, 8.0, 8.0, 0.4]], dtype=np.float32) np_boxes2 = np.asarray( [[0.0, 2.0, 2.0, 5.0, 0.3], [2.0, 1.0, 3.0, 3.0, 0.5], [5.0, 5.0, 6.0, 7.0, 0.4]], dtype=np.float32) np_expect_ious = np.asarray( [[0.3708, 0.4351, 0.0000], [0.1104, 0.4487, 0.0424], [0.0000, 0.0000, 0.3622]], dtype=np.float32) np_expect_ious_aligned = np.asarray([0.3708, 0.4487, 0.3622], dtype=np.float32) boxes1 = torch.from_numpy(np_boxes1).cuda() boxes2 = torch.from_numpy(np_boxes2).cuda() # test cw angle definition ious = box_iou_rotated(boxes1, boxes2) assert np.allclose(ious.cpu().numpy(), np_expect_ious, atol=1e-4) ious = box_iou_rotated(boxes1, boxes2, aligned=True) assert np.allclose( ious.cpu().numpy(), np_expect_ious_aligned, atol=1e-4) # test ccw angle definition boxes1[..., -1] *= -1 boxes2[..., -1] *= -1 ious = box_iou_rotated(boxes1, boxes2, clockwise=False) assert np.allclose(ious.cpu().numpy(), np_expect_ious, atol=1e-4) ious = box_iou_rotated(boxes1, boxes2, aligned=True, clockwise=False) assert np.allclose( ious.cpu().numpy(), np_expect_ious_aligned, atol=1e-4) def test_box_iou_rotated_iof_cpu(self): from mmcv.ops import box_iou_rotated np_boxes1 = np.asarray( [[1.0, 1.0, 3.0, 4.0, 0.5], [2.0, 2.0, 3.0, 4.0, 0.6], [7.0, 7.0, 8.0, 8.0, 0.4]], dtype=np.float32) np_boxes2 = np.asarray( [[0.0, 2.0, 2.0, 5.0, 0.3], [2.0, 1.0, 3.0, 3.0, 0.5], [5.0, 5.0, 6.0, 7.0, 0.4]], dtype=np.float32) np_expect_ious = np.asarray( [[0.4959, 0.5306, 0.0000], [0.1823, 0.5420, 0.1832], [0.0000, 0.0000, 0.4404]], dtype=np.float32) np_expect_ious_aligned = np.asarray([0.4959, 0.5420, 0.4404], dtype=np.float32) boxes1 = torch.from_numpy(np_boxes1) boxes2 = torch.from_numpy(np_boxes2) # test cw angle definition ious = box_iou_rotated(boxes1, boxes2, mode='iof') assert np.allclose(ious.cpu().numpy(), np_expect_ious, atol=1e-4) ious = box_iou_rotated(boxes1, boxes2, mode='iof', aligned=True) assert np.allclose( ious.cpu().numpy(), np_expect_ious_aligned, atol=1e-4) # test ccw angle definition boxes1[..., -1] *= -1 boxes2[..., -1] *= -1 ious = box_iou_rotated(boxes1, boxes2, mode='iof', clockwise=False) assert np.allclose(ious.cpu().numpy(), np_expect_ious, atol=1e-4) ious = box_iou_rotated( boxes1, boxes2, mode='iof', aligned=True, clockwise=False) assert np.allclose( ious.cpu().numpy(), np_expect_ious_aligned, atol=1e-4) @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_box_iou_rotated_iof_cuda(self): from mmcv.ops import box_iou_rotated np_boxes1 = np.asarray( [[1.0, 1.0, 3.0, 4.0, 0.5], [2.0, 2.0, 3.0, 4.0, 0.6], [7.0, 7.0, 8.0, 8.0, 0.4]], dtype=np.float32) np_boxes2 = np.asarray( [[0.0, 2.0, 2.0, 5.0, 0.3], [2.0, 1.0, 3.0, 3.0, 0.5], [5.0, 5.0, 6.0, 7.0, 0.4]], dtype=np.float32) np_expect_ious = np.asarray( [[0.4959, 0.5306, 0.0000], [0.1823, 0.5420, 0.1832], [0.0000, 0.0000, 0.4404]], dtype=np.float32) np_expect_ious_aligned = np.asarray([0.4959, 0.5420, 0.4404], dtype=np.float32) boxes1 = torch.from_numpy(np_boxes1).cuda() boxes2 = torch.from_numpy(np_boxes2).cuda() # test cw angle definition ious = box_iou_rotated(boxes1, boxes2, mode='iof') assert np.allclose(ious.cpu().numpy(), np_expect_ious, atol=1e-4) ious = box_iou_rotated(boxes1, boxes2, mode='iof', aligned=True) assert np.allclose( ious.cpu().numpy(), np_expect_ious_aligned, atol=1e-4) # test ccw angle definition boxes1[..., -1] *= -1 boxes2[..., -1] *= -1 ious = box_iou_rotated(boxes1, boxes2, mode='iof', clockwise=False) assert np.allclose(ious.cpu().numpy(), np_expect_ious, atol=1e-4) ious = box_iou_rotated( boxes1, boxes2, mode='iof', aligned=True, clockwise=False) assert np.allclose( ious.cpu().numpy(), np_expect_ious_aligned, atol=1e-4) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_carafe.py ================================================ import torch from torch.autograd import gradcheck class TestCarafe(object): def test_carafe_naive_gradcheck(self): if not torch.cuda.is_available(): return from mmcv.ops import CARAFENaive feat = torch.randn( 2, 64, 3, 3, requires_grad=True, device='cuda').double() mask = torch.randn( 2, 100, 6, 6, requires_grad=True, device='cuda').sigmoid().double() gradcheck(CARAFENaive(5, 4, 2), (feat, mask), atol=1e-4, eps=1e-4) def test_carafe_gradcheck(self): if not torch.cuda.is_available(): return from mmcv.ops import CARAFE feat = torch.randn( 2, 64, 3, 3, requires_grad=True, device='cuda').double() mask = torch.randn( 2, 100, 6, 6, requires_grad=True, device='cuda').sigmoid().double() gradcheck(CARAFE(5, 4, 2), (feat, mask), atol=1e-4, eps=1e-4) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_cc_attention.py ================================================ import numpy as np import torch import torch.nn as nn class Loss(nn.Module): def __init__(self): super().__init__() def forward(self, input, target): input = input.view(-1) target = target.view(-1) return torch.mean(input - target) class TestCrissCrossAttention(object): def test_cc_attention(self): device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') from mmcv.ops import CrissCrossAttention loss_func = Loss() input = np.fromfile( 'tests/data/for_ccattention/ccattention_input.bin', dtype=np.float32) output = np.fromfile( 'tests/data/for_ccattention/ccattention_output.bin', dtype=np.float32) input = input.reshape((1, 32, 45, 45)) output = output.reshape((1, 32, 45, 45)) label = torch.ones((1, 32, 45, 45)) input = torch.FloatTensor(input) output = torch.FloatTensor(output) input.requires_grad = True shape = input.shape channel = shape[1] cca = CrissCrossAttention(channel) cca.to(device) input = input.to(device) label = label.to(device) cca.train() test_output = cca(input) test_loss = loss_func(test_output, label) test_loss.backward() test_output = test_output.detach().cpu().numpy() output = output.numpy() assert np.allclose(test_output, output) assert test_output.shape == shape ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_contour_expand.py ================================================ import numpy as np import torch def test_contour_expand(): from mmcv.ops import contour_expand np_internal_kernel_label = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 0, 0, 0, 0, 2, 0], [0, 0, 1, 1, 0, 0, 0, 0, 2, 0], [0, 0, 1, 1, 0, 0, 0, 0, 2, 0], [0, 0, 1, 1, 0, 0, 0, 0, 2, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]).astype(np.int32) np_kernel_mask1 = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 1, 1, 1, 1, 1, 0], [0, 0, 1, 1, 1, 1, 1, 1, 1, 0], [0, 0, 1, 1, 1, 1, 1, 1, 1, 0], [0, 0, 1, 1, 1, 1, 1, 1, 1, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]).astype(np.uint8) np_kernel_mask2 = (np_internal_kernel_label > 0).astype(np.uint8) np_kernel_mask = np.stack([np_kernel_mask1, np_kernel_mask2]) min_area = 1 kernel_region_num = 3 result = contour_expand(np_kernel_mask, np_internal_kernel_label, min_area, kernel_region_num) gt = [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 1, 1, 2, 2, 2, 0], [0, 0, 1, 1, 1, 1, 2, 2, 2, 0], [0, 0, 1, 1, 1, 1, 2, 2, 2, 0], [0, 0, 1, 1, 1, 1, 2, 2, 2, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]] assert np.allclose(result, gt) np_kernel_mask_t = torch.from_numpy(np_kernel_mask) np_internal_kernel_label_t = torch.from_numpy(np_internal_kernel_label) result = contour_expand(np_kernel_mask_t, np_internal_kernel_label_t, min_area, kernel_region_num) assert np.allclose(result, gt) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_convex_iou.py ================================================ import numpy as np import pytest import torch from mmcv.ops import convex_giou, convex_iou np_pointsets = np.asarray([[ 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 3.0, 3.0, 1.0, 2.0, 3.0, 3.0, 2.0, 1.5, 1.5 ], [ 1.5, 1.5, 2.5, 2.5, 1.5, 2.5, 2.5, 1.5, 1.5, 3.5, 3.5, 1.5, 2.5, 3.5, 3.5, 2.5, 2.0, 2.0 ]]) np_polygons = np.asarray([[1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0], [1.0, 1.0, 1.0, 3.0, 3.0, 3.0, 3.0, 1.0]]) np_expected_iou = np.asarray([[0.2857, 0.8750], [0.0588, 0.4286]]) np_expected_giou = np.asarray([0.2857, 0.3831]) np_expected_grad = np.asarray([[ 0.0204, 0.0408, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0612, -0.0408, -0.0408, 0.0816, -0.0408, -0.0816, -0.0816, -0.0408, 0.0000, 0.0000 ], [ -0.1848, -0.1848, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, -0.1076, -0.0801, -0.0801, -0.1076, -0.0367, -0.0734, -0.0734, -0.0367, 0.0000, 0.0000 ]]) @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_convex_iou(): pointsets = torch.from_numpy(np_pointsets).cuda().float() polygons = torch.from_numpy(np_polygons).cuda().float() expected_iou = torch.from_numpy(np_expected_iou).cuda().float() assert torch.allclose( convex_iou(pointsets, polygons), expected_iou, atol=1e-3) @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_convex_giou(): pointsets = torch.from_numpy(np_pointsets).cuda().float() polygons = torch.from_numpy(np_polygons).cuda().float() expected_giou = torch.from_numpy(np_expected_giou).cuda().float() expected_grad = torch.from_numpy(np_expected_grad).cuda().float() giou, grad = convex_giou(pointsets, polygons) assert torch.allclose(giou, expected_giou, atol=1e-3) assert torch.allclose(grad, expected_grad, atol=1e-3) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_corner_pool.py ================================================ """ CommandLine: pytest tests/test_corner_pool.py """ import pytest import torch from mmcv.ops import CornerPool def test_corner_pool_device_and_dtypes_cpu(): """ CommandLine: xdoctest -m tests/test_corner_pool.py \ test_corner_pool_device_and_dtypes_cpu """ with pytest.raises(AssertionError): # pool mode must in ['bottom', 'left', 'right', 'top'] pool = CornerPool('corner') lr_tensor = torch.tensor([[[[0, 0, 0, 0, 0], [2, 1, 3, 0, 2], [5, 4, 1, 1, 6], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]]) tb_tensor = torch.tensor([[[[0, 3, 1, 0, 0], [0, 1, 1, 0, 0], [0, 3, 4, 0, 0], [0, 2, 2, 0, 0], [0, 0, 2, 0, 0]]]]) # Left Pool left_answer = torch.tensor([[[[0, 0, 0, 0, 0], [3, 3, 3, 2, 2], [6, 6, 6, 6, 6], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]]) pool = CornerPool('left') left_tensor = pool(lr_tensor) assert left_tensor.type() == lr_tensor.type() assert torch.equal(left_tensor, left_answer) # Right Pool right_answer = torch.tensor([[[[0, 0, 0, 0, 0], [2, 2, 3, 3, 3], [5, 5, 5, 5, 6], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]]) pool = CornerPool('right') right_tensor = pool(lr_tensor) assert right_tensor.type() == lr_tensor.type() assert torch.equal(right_tensor, right_answer) # Top Pool top_answer = torch.tensor([[[[0, 3, 4, 0, 0], [0, 3, 4, 0, 0], [0, 3, 4, 0, 0], [0, 2, 2, 0, 0], [0, 0, 2, 0, 0]]]]) pool = CornerPool('top') top_tensor = pool(tb_tensor) assert top_tensor.type() == tb_tensor.type() assert torch.equal(top_tensor, top_answer) # Bottom Pool bottom_answer = torch.tensor([[[[0, 3, 1, 0, 0], [0, 3, 1, 0, 0], [0, 3, 4, 0, 0], [0, 3, 4, 0, 0], [0, 3, 4, 0, 0]]]]) pool = CornerPool('bottom') bottom_tensor = pool(tb_tensor) assert bottom_tensor.type() == tb_tensor.type() assert torch.equal(bottom_tensor, bottom_answer) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_correlation.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import pytest import torch from mmcv.ops import Correlation _input1 = [[[[1., 2., 3.], [0., 1., 2.], [3., 5., 2.]]]] _input2 = [[[[1., 2., 3.], [3., 1., 2.], [8., 5., 2.]]]] gt_out_shape = (1, 1, 1, 3, 3) _gt_out = [[[[[1., 4., 9.], [0., 1., 4.], [24., 25., 4.]]]]] gt_input1_grad = [[[[1., 2., 3.], [3., 1., 2.], [8., 5., 2.]]]] def assert_equal_tensor(tensor_a, tensor_b): assert tensor_a.eq(tensor_b).all() class TestCorrelation: def _test_correlation(self, dtype=torch.float): layer = Correlation(max_displacement=0) input1 = torch.tensor(_input1, dtype=dtype).cuda() input2 = torch.tensor(_input2, dtype=dtype).cuda() input1.requires_grad = True input2.requires_grad = True out = layer(input1, input2) out.backward(torch.ones_like(out)) # `eq_cpu` is not implemented for 'Half' in torch1.5.0, # so we need to make a comparison for cuda tensor # rather than cpu tensor gt_out = torch.tensor(_gt_out, dtype=dtype).cuda() assert_equal_tensor(out, gt_out) assert_equal_tensor(input1.grad.detach(), input2) assert_equal_tensor(input2.grad.detach(), input1) @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_correlation(self): self._test_correlation(torch.float) self._test_correlation(torch.double) self._test_correlation(torch.half) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_deform_conv.py ================================================ import numpy as np import pytest import torch from mmcv.utils import TORCH_VERSION, digit_version try: # If PyTorch version >= 1.6.0 and fp16 is enabled, torch.cuda.amp.autocast # would be imported and used; we should test if our modules support it. from torch.cuda.amp import autocast except ImportError: pass input = [[[[1., 2., 3.], [0., 1., 2.], [3., 5., 2.]]]] offset_weight = [[[0.1, 0.4, 0.6, 0.1]], [[0.3, 0.2, 0.1, 0.3]], [[0.5, 0.5, 0.2, 0.8]], [[0.8, 0.3, 0.9, 0.1]], [[0.3, 0.1, 0.2, 0.5]], [[0.3, 0.7, 0.5, 0.3]], [[0.6, 0.2, 0.5, 0.3]], [[0.4, 0.1, 0.8, 0.4]]] offset_bias = [0.7, 0.1, 0.8, 0.5, 0.6, 0.5, 0.4, 0.7] deform_weight = [[[0.4, 0.2, 0.1, 0.9]]] gt_out = [[[[1.650, 0.], [0.000, 0.]]]] gt_x_grad = [[[[-0.666, 0.204, 0.000], [0.030, -0.416, 0.012], [0.000, 0.252, 0.129]]]] gt_offset_weight_grad = [[[[1.44, 2.88], [0.00, 1.44]]], [[[-0.72, -1.44], [0.00, -0.72]]], [[[0.00, 0.00], [0.00, 0.00]]], [[[0.00, 0.00], [0.00, 0.00]]], [[[-0.10, -0.20], [0.00, -0.10]]], [[[-0.08, -0.16], [0.00, -0.08]]], [[[-0.54, -1.08], [0.00, -0.54]]], [[[-0.54, -1.08], [0.00, -0.54]]]] gt_offset_bias_grad = [1.44, -0.72, 0., 0., -0.10, -0.08, -0.54, -0.54], gt_deform_weight_grad = [[[[3.62, 0.], [0.40, 0.18]]]] class TestDeformconv(object): def _test_deformconv(self, dtype=torch.float, threshold=1e-3, device='cuda', batch_size=10, im2col_step=2): if not torch.cuda.is_available() and device == 'cuda': pytest.skip('test requires GPU') from mmcv.ops import DeformConv2dPack c_in = 1 c_out = 1 batch_size = 10 repeated_input = np.repeat(input, batch_size, axis=0) repeated_gt_out = np.repeat(gt_out, batch_size, axis=0) repeated_gt_x_grad = np.repeat(gt_x_grad, batch_size, axis=0) x = torch.tensor(repeated_input, device=device, dtype=dtype) x.requires_grad = True model = DeformConv2dPack( in_channels=c_in, out_channels=c_out, kernel_size=2, stride=1, padding=0, im2col_step=im2col_step) model.conv_offset.weight.data = torch.nn.Parameter( torch.Tensor(offset_weight).reshape(8, 1, 2, 2)) model.conv_offset.bias.data = torch.nn.Parameter( torch.Tensor(offset_bias).reshape(8)) model.weight.data = torch.nn.Parameter( torch.Tensor(deform_weight).reshape(1, 1, 2, 2)) if device == 'cuda': model.cuda() model.type(dtype) out = model(x) out.backward(torch.ones_like(out)) assert np.allclose(out.data.detach().cpu().numpy(), repeated_gt_out, threshold) assert np.allclose(x.grad.detach().cpu().numpy(), repeated_gt_x_grad, threshold) # the batch size of the input is increased which results in # a larger gradient so we need to divide by the batch_size assert np.allclose( model.conv_offset.weight.grad.detach().cpu().numpy() / batch_size, gt_offset_weight_grad, threshold) assert np.allclose( model.conv_offset.bias.grad.detach().cpu().numpy() / batch_size, gt_offset_bias_grad, threshold) assert np.allclose( model.weight.grad.detach().cpu().numpy() / batch_size, gt_deform_weight_grad, threshold) from mmcv.ops import DeformConv2d # test bias model = DeformConv2d(1, 1, 2, stride=1, padding=0) assert not hasattr(model, 'bias') # test bias=True with pytest.raises(AssertionError): model = DeformConv2d(1, 1, 2, stride=1, padding=0, bias=True) # test in_channels % group != 0 with pytest.raises(AssertionError): model = DeformConv2d(3, 2, 3, groups=2) # test out_channels % group != 0 with pytest.raises(AssertionError): model = DeformConv2d(3, 4, 3, groups=3) def _test_amp_deformconv(self, input_dtype, threshold=1e-3, batch_size=10, im2col_step=2): """The function to test amp released on pytorch 1.6.0. The type of input data might be torch.float or torch.half, so we should test deform_conv in both cases. With amp, the data type of model will NOT be set manually. Args: input_dtype: torch.float or torch.half. threshold: the same as above function. """ if not torch.cuda.is_available(): return from mmcv.ops import DeformConv2dPack c_in = 1 c_out = 1 repeated_input = np.repeat(input, batch_size, axis=0) repeated_gt_out = np.repeat(gt_out, batch_size, axis=0) repeated_gt_x_grad = np.repeat(gt_x_grad, batch_size, axis=0) x = torch.Tensor(repeated_input).cuda().type(input_dtype) x.requires_grad = True model = DeformConv2dPack( in_channels=c_in, out_channels=c_out, kernel_size=2, stride=1, padding=0, im2col_step=im2col_step) model.conv_offset.weight.data = torch.nn.Parameter( torch.Tensor(offset_weight).reshape(8, 1, 2, 2)) model.conv_offset.bias.data = torch.nn.Parameter( torch.Tensor(offset_bias).reshape(8)) model.weight.data = torch.nn.Parameter( torch.Tensor(deform_weight).reshape(1, 1, 2, 2)) model.cuda() out = model(x) out.backward(torch.ones_like(out)) assert np.allclose(out.data.detach().cpu().numpy(), repeated_gt_out, threshold) assert np.allclose(x.grad.detach().cpu().numpy(), repeated_gt_x_grad, threshold) assert np.allclose( model.conv_offset.weight.grad.detach().cpu().numpy() / batch_size, gt_offset_weight_grad, threshold) assert np.allclose( model.conv_offset.bias.grad.detach().cpu().numpy() / batch_size, gt_offset_bias_grad, threshold) assert np.allclose( model.weight.grad.detach().cpu().numpy() / batch_size, gt_deform_weight_grad, threshold) from mmcv.ops import DeformConv2d # test bias model = DeformConv2d(1, 1, 2, stride=1, padding=0) assert not hasattr(model, 'bias') # test bias=True with pytest.raises(AssertionError): model = DeformConv2d(1, 1, 2, stride=1, padding=0, bias=True) # test in_channels % group != 0 with pytest.raises(AssertionError): model = DeformConv2d(3, 2, 3, groups=2) # test out_channels % group != 0 with pytest.raises(AssertionError): model = DeformConv2d(3, 4, 3, groups=3) def test_deformconv(self): self._test_deformconv(torch.double, device='cpu') self._test_deformconv(torch.float, device='cpu', threshold=1e-1) self._test_deformconv(torch.double) self._test_deformconv(torch.float) self._test_deformconv(torch.half, threshold=1e-1) # test batch_size < im2col_step self._test_deformconv(torch.float, batch_size=1, im2col_step=2) # test bach_size % im2col_step != 0 with pytest.raises( AssertionError, match='batch size must be divisible by im2col_step'): self._test_deformconv(torch.float, batch_size=10, im2col_step=3) # test amp when torch version >= '1.6.0', the type of # input data for deformconv might be torch.float or torch.half if (TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) >= digit_version('1.6.0')): with autocast(enabled=True): self._test_amp_deformconv(torch.float, 1e-1) self._test_amp_deformconv(torch.half, 1e-1) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_deform_roi_pool.py ================================================ import os import numpy as np import torch _USING_PARROTS = True try: from parrots.autograd import gradcheck except ImportError: from torch.autograd import gradcheck _USING_PARROTS = False cur_dir = os.path.dirname(os.path.abspath(__file__)) inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0., 0., 1., 1.]]), ([[[[1., 2.], [3., 4.]], [[4., 3.], [2., 1.]]]], [[0., 0., 0., 1., 1.]]), ([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.], [11., 12., 15., 16.]]]], [[0., 0., 0., 3., 3.]])] outputs = [([[[[1, 1.25], [1.5, 1.75]]]], [[[[3.0625, 0.4375], [0.4375, 0.0625]]]]), ([[[[1., 1.25], [1.5, 1.75]], [[4, 3.75], [3.5, 3.25]]]], [[[[3.0625, 0.4375], [0.4375, 0.0625]], [[3.0625, 0.4375], [0.4375, 0.0625]]]]), ([[[[1.9375, 4.75], [7.5625, 10.375]]]], [[[[0.47265625, 0.4296875, 0.4296875, 0.04296875], [0.4296875, 0.390625, 0.390625, 0.0390625], [0.4296875, 0.390625, 0.390625, 0.0390625], [0.04296875, 0.0390625, 0.0390625, 0.00390625]]]])] class TestDeformRoIPool(object): def test_deform_roi_pool_gradcheck(self): if not torch.cuda.is_available(): return from mmcv.ops import DeformRoIPoolPack pool_h = 2 pool_w = 2 spatial_scale = 1.0 sampling_ratio = 2 for case in inputs: np_input = np.array(case[0]) np_rois = np.array(case[1]) x = torch.tensor( np_input, device='cuda', dtype=torch.float, requires_grad=True) rois = torch.tensor(np_rois, device='cuda', dtype=torch.float) output_c = x.size(1) droipool = DeformRoIPoolPack((pool_h, pool_w), output_c, spatial_scale=spatial_scale, sampling_ratio=sampling_ratio).cuda() if _USING_PARROTS: gradcheck(droipool, (x, rois), no_grads=[rois]) else: gradcheck(droipool, (x, rois), eps=1e-2, atol=1e-2) def test_modulated_deform_roi_pool_gradcheck(self): if not torch.cuda.is_available(): return from mmcv.ops import ModulatedDeformRoIPoolPack pool_h = 2 pool_w = 2 spatial_scale = 1.0 sampling_ratio = 2 for case in inputs: np_input = np.array(case[0]) np_rois = np.array(case[1]) x = torch.tensor( np_input, device='cuda', dtype=torch.float, requires_grad=True) rois = torch.tensor(np_rois, device='cuda', dtype=torch.float) output_c = x.size(1) droipool = ModulatedDeformRoIPoolPack( (pool_h, pool_w), output_c, spatial_scale=spatial_scale, sampling_ratio=sampling_ratio).cuda() if _USING_PARROTS: gradcheck(droipool, (x, rois), no_grads=[rois]) else: gradcheck(droipool, (x, rois), eps=1e-2, atol=1e-2) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_focal_loss.py ================================================ import numpy as np import torch _USING_PARROTS = True try: from parrots.autograd import gradcheck except ImportError: from torch.autograd import gradcheck _USING_PARROTS = False # torch.set_printoptions(precision=8, threshold=100) inputs = [ ([[1., 0], [0, 1.]], [0, 1]), ([[1., 0, -1.], [0, 1., 2.]], [2, 1]), ([[1e-6, 2e-6, 3e-6], [4e-6, 5e-5, 6e-4], [7e-3, 8e-2, 9e-1]], [1, 2, 0]), ] softmax_outputs = [(0.00566451, [[-0.00657264, 0.00657264], [0.00657264, -0.00657264]]), (0.34956908, [[0.10165970, 0.03739851, -0.13905823], [0.01227554, -0.10298023, 0.09070466]]), (0.15754992, [[0.02590877, -0.05181759, 0.02590882], [0.02589641, 0.02589760, -0.05179400], [-0.07307514, 0.02234372, 0.05073142]])] sigmoid_outputs = [(0.13562961, [[-0.00657264, 0.11185755], [0.11185755, -0.00657264]]), (1.10251057, [[0.28808805, 0.11185755, -0.09602935], [0.11185755, -0.00657264, 0.40376765]]), (0.42287254, [[0.07457182, -0.02485716, 0.07457201], [0.07457211, 0.07457669, -0.02483728], [-0.02462499, 0.08277918, 0.18050370]])] class Testfocalloss(object): def _test_softmax(self, dtype=torch.float): if not torch.cuda.is_available(): return from mmcv.ops import softmax_focal_loss alpha = 0.25 gamma = 2.0 for case, output in zip(inputs, softmax_outputs): np_x = np.array(case[0]) np_y = np.array(case[1]) np_x_grad = np.array(output[1]) x = torch.from_numpy(np_x).cuda().type(dtype) x.requires_grad_() y = torch.from_numpy(np_y).cuda().long() loss = softmax_focal_loss(x, y, gamma, alpha, None, 'mean') loss.backward() assert np.allclose(loss.data.cpu().numpy(), output[0], 1e-2) assert np.allclose(x.grad.data.cpu(), np_x_grad, 1e-2) def _test_sigmoid(self, dtype=torch.float): if not torch.cuda.is_available(): return from mmcv.ops import sigmoid_focal_loss alpha = 0.25 gamma = 2.0 for case, output in zip(inputs, sigmoid_outputs): np_x = np.array(case[0]) np_y = np.array(case[1]) np_x_grad = np.array(output[1]) x = torch.from_numpy(np_x).cuda().type(dtype) x.requires_grad_() y = torch.from_numpy(np_y).cuda().long() loss = sigmoid_focal_loss(x, y, gamma, alpha, None, 'mean') loss.backward() assert np.allclose(loss.data.cpu().numpy(), output[0], 1e-2) assert np.allclose(x.grad.data.cpu(), np_x_grad, 1e-2) def _test_grad_softmax(self, dtype=torch.float): if not torch.cuda.is_available(): return from mmcv.ops import SoftmaxFocalLoss alpha = 0.25 gamma = 2.0 for case in inputs: np_x = np.array(case[0]) np_y = np.array(case[1]) x = torch.from_numpy(np_x).cuda().type(dtype) x.requires_grad_() y = torch.from_numpy(np_y).cuda().long() floss = SoftmaxFocalLoss(gamma, alpha) if _USING_PARROTS: # gradcheck(floss, (x, y), # no_grads=[y]) pass else: gradcheck(floss, (x, y), eps=1e-2, atol=1e-2) def _test_grad_sigmoid(self, dtype=torch.float): if not torch.cuda.is_available(): return from mmcv.ops import SigmoidFocalLoss alpha = 0.25 gamma = 2.0 for case in inputs: np_x = np.array(case[0]) np_y = np.array(case[1]) x = torch.from_numpy(np_x).cuda().type(dtype) x.requires_grad_() y = torch.from_numpy(np_y).cuda().long() floss = SigmoidFocalLoss(gamma, alpha) if _USING_PARROTS: # gradcheck(floss, (x, y), # no_grads=[y]) pass else: gradcheck(floss, (x, y), eps=1e-2, atol=1e-2) def test_softmax_float(self): self._test_softmax(dtype=torch.float) def test_softmax_half(self): self._test_softmax(dtype=torch.half) def test_sigmoid_float(self): self._test_sigmoid(dtype=torch.float) def test_sigmoid_half(self): self._test_sigmoid(dtype=torch.half) def test_grad_softmax_float(self): self._test_grad_softmax(dtype=torch.float) def test_grad_sigmoid_float(self): self._test_grad_sigmoid(dtype=torch.float) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_furthest_point_sample.py ================================================ import pytest import torch from mmcv.ops import furthest_point_sample, furthest_point_sample_with_dist @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_fps(): xyz = torch.tensor([[[-0.2748, 1.0020, -1.1674], [0.1015, 1.3952, -1.2681], [-0.8070, 2.4137, -0.5845], [-1.0001, 2.1982, -0.5859], [0.3841, 1.8983, -0.7431]], [[-1.0696, 3.0758, -0.1899], [-0.2559, 3.5521, -0.1402], [0.8164, 4.0081, -0.1839], [-1.1000, 3.0213, -0.8205], [-0.0518, 3.7251, -0.3950]]]).cuda() idx = furthest_point_sample(xyz, 3) expected_idx = torch.tensor([[0, 2, 4], [0, 2, 1]]).cuda() assert torch.all(idx == expected_idx) @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_fps_with_dist(): xyz = torch.tensor([[[-0.2748, 1.0020, -1.1674], [0.1015, 1.3952, -1.2681], [-0.8070, 2.4137, -0.5845], [-1.0001, 2.1982, -0.5859], [0.3841, 1.8983, -0.7431]], [[-1.0696, 3.0758, -0.1899], [-0.2559, 3.5521, -0.1402], [0.8164, 4.0081, -0.1839], [-1.1000, 3.0213, -0.8205], [-0.0518, 3.7251, -0.3950]]]).cuda() expected_idx = torch.tensor([[0, 2, 4], [0, 2, 1]]).cuda() xyz_square_dist = ((xyz.unsqueeze(dim=1) - xyz.unsqueeze(dim=2))**2).sum(-1) idx = furthest_point_sample_with_dist(xyz_square_dist, 3) assert torch.all(idx == expected_idx) import numpy as np fps_idx = np.load('tests/data/for_3d_ops/fps_idx.npy') features_for_fps_distance = np.load( 'tests/data/for_3d_ops/features_for_fps_distance.npy') expected_idx = torch.from_numpy(fps_idx).cuda() features_for_fps_distance = torch.from_numpy( features_for_fps_distance).cuda() idx = furthest_point_sample_with_dist(features_for_fps_distance, 16) assert torch.all(idx == expected_idx) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_fused_bias_leakyrelu.py ================================================ import pytest import torch _USING_PARROTS = True try: from parrots.autograd import gradcheck except ImportError: from torch.autograd import gradcheck, gradgradcheck _USING_PARROTS = False class TestFusedBiasLeakyReLU(object): @classmethod def setup_class(cls): if not torch.cuda.is_available(): return cls.input_tensor = torch.randn((2, 2, 2, 2), requires_grad=True).cuda() cls.bias = torch.zeros(2, requires_grad=True).cuda() @pytest.mark.skipif(not torch.cuda.is_available(), reason='requires cuda') def test_gradient(self): from mmcv.ops import FusedBiasLeakyReLU if _USING_PARROTS: gradcheck( FusedBiasLeakyReLU(2).cuda(), self.input_tensor, delta=1e-4, pt_atol=1e-3) else: gradcheck( FusedBiasLeakyReLU(2).cuda(), self.input_tensor, eps=1e-4, atol=1e-3) @pytest.mark.skipif( not torch.cuda.is_available() or _USING_PARROTS, reason='requires cuda') def test_gradgradient(self): from mmcv.ops import FusedBiasLeakyReLU gradgradcheck( FusedBiasLeakyReLU(2).cuda(), self.input_tensor, eps=1e-4, atol=1e-3) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_gather_points.py ================================================ import pytest import torch from mmcv.ops import gather_points @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_gather_points(): features = torch.tensor([[[ -1.6095, -0.1029, -0.8876, -1.2447, -2.4031, 0.3708, -1.1586, -1.4967, -0.4800, 0.2252 ], [ 1.9138, 3.4979, 1.6854, 1.5631, 3.6776, 3.1154, 2.1705, 2.5221, 2.0411, 3.1446 ], [ -1.4173, 0.3073, -1.4339, -1.4340, -1.2770, -0.2867, -1.4162, -1.4044, -1.4245, -1.4074 ]], [[ 0.2160, 0.0842, 0.3661, -0.2749, -0.4909, -0.6066, -0.8773, -0.0745, -0.9496, 0.1434 ], [ 1.3644, 1.8087, 1.6855, 1.9563, 1.2746, 1.9662, 0.9566, 1.8778, 1.1437, 1.3639 ], [ -0.7172, 0.1692, 0.2241, 0.0721, -0.7540, 0.0462, -0.6227, 0.3223, -0.6944, -0.5294 ]]]).cuda() idx = torch.tensor([[0, 1, 4, 0, 0, 0], [0, 5, 6, 0, 0, 0]]).int().cuda() output = gather_points(features, idx) expected_output = torch.tensor( [[[-1.6095, -0.1029, -2.4031, -1.6095, -1.6095, -1.6095], [1.9138, 3.4979, 3.6776, 1.9138, 1.9138, 1.9138], [-1.4173, 0.3073, -1.2770, -1.4173, -1.4173, -1.4173]], [[0.2160, -0.6066, -0.8773, 0.2160, 0.2160, 0.2160], [1.3644, 1.9662, 0.9566, 1.3644, 1.3644, 1.3644], [-0.7172, 0.0462, -0.6227, -0.7172, -0.7172, -0.7172]]]).cuda() assert torch.allclose(output, expected_output) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_group_points.py ================================================ import pytest import torch from mmcv.ops import grouping_operation @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_grouping_points(): idx = torch.tensor([[[0, 0, 0], [3, 3, 3], [8, 8, 8], [0, 0, 0], [0, 0, 0], [0, 0, 0]], [[0, 0, 0], [6, 6, 6], [9, 9, 9], [0, 0, 0], [0, 0, 0], [0, 0, 0]]]).int().cuda() festures = torch.tensor([[[ 0.5798, -0.7981, -0.9280, -1.3311, 1.3687, 0.9277, -0.4164, -1.8274, 0.9268, 0.8414 ], [ 5.4247, 1.5113, 2.3944, 1.4740, 5.0300, 5.1030, 1.9360, 2.1939, 2.1581, 3.4666 ], [ -1.6266, -1.0281, -1.0393, -1.6931, -1.3982, -0.5732, -1.0830, -1.7561, -1.6786, -1.6967 ]], [[ -0.0380, -0.1880, -1.5724, 0.6905, -0.3190, 0.7798, -0.3693, -0.9457, -0.2942, -1.8527 ], [ 1.1773, 1.5009, 2.6399, 5.9242, 1.0962, 2.7346, 6.0865, 1.5555, 4.3303, 2.8229 ], [ -0.6646, -0.6870, -0.1125, -0.2224, -0.3445, -1.4049, 0.4990, -0.7037, -0.9924, 0.0386 ]]]).cuda() output = grouping_operation(festures, idx) expected_output = torch.tensor([[[[0.5798, 0.5798, 0.5798], [-1.3311, -1.3311, -1.3311], [0.9268, 0.9268, 0.9268], [0.5798, 0.5798, 0.5798], [0.5798, 0.5798, 0.5798], [0.5798, 0.5798, 0.5798]], [[5.4247, 5.4247, 5.4247], [1.4740, 1.4740, 1.4740], [2.1581, 2.1581, 2.1581], [5.4247, 5.4247, 5.4247], [5.4247, 5.4247, 5.4247], [5.4247, 5.4247, 5.4247]], [[-1.6266, -1.6266, -1.6266], [-1.6931, -1.6931, -1.6931], [-1.6786, -1.6786, -1.6786], [-1.6266, -1.6266, -1.6266], [-1.6266, -1.6266, -1.6266], [-1.6266, -1.6266, -1.6266]]], [[[-0.0380, -0.0380, -0.0380], [-0.3693, -0.3693, -0.3693], [-1.8527, -1.8527, -1.8527], [-0.0380, -0.0380, -0.0380], [-0.0380, -0.0380, -0.0380], [-0.0380, -0.0380, -0.0380]], [[1.1773, 1.1773, 1.1773], [6.0865, 6.0865, 6.0865], [2.8229, 2.8229, 2.8229], [1.1773, 1.1773, 1.1773], [1.1773, 1.1773, 1.1773], [1.1773, 1.1773, 1.1773]], [[-0.6646, -0.6646, -0.6646], [0.4990, 0.4990, 0.4990], [0.0386, 0.0386, 0.0386], [-0.6646, -0.6646, -0.6646], [-0.6646, -0.6646, -0.6646], [-0.6646, -0.6646, -0.6646]]]]).cuda() assert torch.allclose(output, expected_output) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_info.py ================================================ import torch class TestInfo(object): def test_info(self): if not torch.cuda.is_available(): return from mmcv.ops import get_compiler_version, get_compiling_cuda_version cv = get_compiler_version() ccv = get_compiling_cuda_version() assert cv is not None assert ccv is not None ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_iou3d.py ================================================ import numpy as np import pytest import torch from mmcv.ops import boxes_iou_bev, nms_bev, nms_normal_bev @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_boxes_iou_bev(): np_boxes1 = np.asarray( [[1.0, 1.0, 3.0, 4.0, 0.5], [2.0, 2.0, 3.0, 4.0, 0.6], [7.0, 7.0, 8.0, 8.0, 0.4]], dtype=np.float32) np_boxes2 = np.asarray( [[0.0, 2.0, 2.0, 5.0, 0.3], [2.0, 1.0, 3.0, 3.0, 0.5], [5.0, 5.0, 6.0, 7.0, 0.4]], dtype=np.float32) np_expect_ious = np.asarray( [[0.2621, 0.2948, 0.0000], [0.0549, 0.1587, 0.0000], [0.0000, 0.0000, 0.0000]], dtype=np.float32) boxes1 = torch.from_numpy(np_boxes1).cuda() boxes2 = torch.from_numpy(np_boxes2).cuda() ious = boxes_iou_bev(boxes1, boxes2) assert np.allclose(ious.cpu().numpy(), np_expect_ious, atol=1e-4) @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_nms_bev(): np_boxes = np.array( [[6.0, 3.0, 8.0, 7.0, 2.0], [3.0, 6.0, 9.0, 11.0, 1.0], [3.0, 7.0, 10.0, 12.0, 1.0], [1.0, 4.0, 13.0, 7.0, 3.0]], dtype=np.float32) np_scores = np.array([0.6, 0.9, 0.7, 0.2], dtype=np.float32) np_inds = np.array([1, 0, 3]) boxes = torch.from_numpy(np_boxes) scores = torch.from_numpy(np_scores) inds = nms_bev(boxes.cuda(), scores.cuda(), thresh=0.3) assert np.allclose(inds.cpu().numpy(), np_inds) @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_nms_normal_bev(): np_boxes = np.array( [[6.0, 3.0, 8.0, 7.0, 2.0], [3.0, 6.0, 9.0, 11.0, 1.0], [3.0, 7.0, 10.0, 12.0, 1.0], [1.0, 4.0, 13.0, 7.0, 3.0]], dtype=np.float32) np_scores = np.array([0.6, 0.9, 0.7, 0.2], dtype=np.float32) np_inds = np.array([1, 0, 3]) boxes = torch.from_numpy(np_boxes) scores = torch.from_numpy(np_scores) inds = nms_normal_bev(boxes.cuda(), scores.cuda(), thresh=0.3) assert np.allclose(inds.cpu().numpy(), np_inds) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_knn.py ================================================ import pytest import torch from mmcv.ops import knn @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_knn(): new_xyz = torch.tensor([[[-0.0740, 1.3147, -1.3625], [-2.2769, 2.7817, -0.2334], [-0.4003, 2.4666, -0.5116], [-0.0740, 1.3147, -1.3625], [-0.0740, 1.3147, -1.3625]], [[-2.0289, 2.4952, -0.1708], [-2.0668, 6.0278, -0.4875], [0.4066, 1.4211, -0.2947], [-2.0289, 2.4952, -0.1708], [-2.0289, 2.4952, -0.1708]]]).cuda() xyz = torch.tensor([[[-0.0740, 1.3147, -1.3625], [0.5555, 1.0399, -1.3634], [-0.4003, 2.4666, -0.5116], [-0.5251, 2.4379, -0.8466], [-0.9691, 1.1418, -1.3733], [-0.2232, 0.9561, -1.3626], [-2.2769, 2.7817, -0.2334], [-0.2822, 1.3192, -1.3645], [0.1533, 1.5024, -1.0432], [0.4917, 1.1529, -1.3496]], [[-2.0289, 2.4952, -0.1708], [-0.7188, 0.9956, -0.5096], [-2.0668, 6.0278, -0.4875], [-1.9304, 3.3092, 0.6610], [0.0949, 1.4332, 0.3140], [-1.2879, 2.0008, -0.7791], [-0.7252, 0.9611, -0.6371], [0.4066, 1.4211, -0.2947], [0.3220, 1.4447, 0.3548], [-0.9744, 2.3856, -1.2000]]]).cuda() idx = knn(5, xyz, new_xyz) new_xyz_ = new_xyz.unsqueeze(2).repeat(1, 1, xyz.shape[1], 1) xyz_ = xyz.unsqueeze(1).repeat(1, new_xyz.shape[1], 1, 1) dist = ((new_xyz_ - xyz_) * (new_xyz_ - xyz_)).sum(-1) expected_idx = dist.topk(k=5, dim=2, largest=False)[1].transpose(2, 1) assert torch.all(idx == expected_idx) idx = knn(5, xyz.transpose(1, 2).contiguous(), new_xyz.transpose(1, 2).contiguous(), True) assert torch.all(idx == expected_idx) idx = knn(5, xyz, xyz) xyz_ = xyz.unsqueeze(2).repeat(1, 1, xyz.shape[1], 1) xyz__ = xyz.unsqueeze(1).repeat(1, xyz.shape[1], 1, 1) dist = ((xyz_ - xyz__) * (xyz_ - xyz__)).sum(-1) expected_idx = dist.topk(k=5, dim=2, largest=False)[1].transpose(2, 1) assert torch.all(idx == expected_idx) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_masked_conv2d.py ================================================ import torch class TestMaskedConv2d(object): def test_masked_conv2d(self): if not torch.cuda.is_available(): return from mmcv.ops import MaskedConv2d input = torch.randn(1, 3, 16, 16, requires_grad=True, device='cuda') mask = torch.randn(1, 16, 16, requires_grad=True, device='cuda') conv = MaskedConv2d(3, 3, 3).cuda() output = conv(input, mask) assert output is not None ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_merge_cells.py ================================================ """ CommandLine: pytest tests/test_merge_cells.py """ import torch import torch.nn.functional as F from mmcv.ops.merge_cells import (BaseMergeCell, ConcatCell, GlobalPoolingCell, SumCell) def test_sum_cell(): inputs_x = torch.randn([2, 256, 32, 32]) inputs_y = torch.randn([2, 256, 16, 16]) sum_cell = SumCell(256, 256) output = sum_cell(inputs_x, inputs_y, out_size=inputs_x.shape[-2:]) assert output.size() == inputs_x.size() output = sum_cell(inputs_x, inputs_y, out_size=inputs_y.shape[-2:]) assert output.size() == inputs_y.size() output = sum_cell(inputs_x, inputs_y) assert output.size() == inputs_x.size() def test_concat_cell(): inputs_x = torch.randn([2, 256, 32, 32]) inputs_y = torch.randn([2, 256, 16, 16]) concat_cell = ConcatCell(256, 256) output = concat_cell(inputs_x, inputs_y, out_size=inputs_x.shape[-2:]) assert output.size() == inputs_x.size() output = concat_cell(inputs_x, inputs_y, out_size=inputs_y.shape[-2:]) assert output.size() == inputs_y.size() output = concat_cell(inputs_x, inputs_y) assert output.size() == inputs_x.size() def test_global_pool_cell(): inputs_x = torch.randn([2, 256, 32, 32]) inputs_y = torch.randn([2, 256, 32, 32]) gp_cell = GlobalPoolingCell(with_out_conv=False) gp_cell_out = gp_cell(inputs_x, inputs_y, out_size=inputs_x.shape[-2:]) assert (gp_cell_out.size() == inputs_x.size()) gp_cell = GlobalPoolingCell(256, 256) gp_cell_out = gp_cell(inputs_x, inputs_y, out_size=inputs_x.shape[-2:]) assert (gp_cell_out.size() == inputs_x.size()) def test_resize_methods(): inputs_x = torch.randn([2, 256, 128, 128]) target_resize_sizes = [(128, 128), (256, 256)] resize_methods_list = ['nearest', 'bilinear'] for method in resize_methods_list: merge_cell = BaseMergeCell(upsample_mode=method) for target_size in target_resize_sizes: merge_cell_out = merge_cell._resize(inputs_x, target_size) gt_out = F.interpolate(inputs_x, size=target_size, mode=method) assert merge_cell_out.equal(gt_out) target_size = (64, 64) # resize to a smaller size merge_cell = BaseMergeCell() merge_cell_out = merge_cell._resize(inputs_x, target_size) kernel_size = inputs_x.shape[-1] // target_size[-1] gt_out = F.max_pool2d( inputs_x, kernel_size=kernel_size, stride=kernel_size) assert (merge_cell_out == gt_out).all() ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_min_area_polygons.py ================================================ import numpy as np import pytest import torch from mmcv.ops import min_area_polygons np_pointsets = np.asarray([[ 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 3.0, 3.0, 1.0, 2.0, 3.0, 3.0, 2.0, 1.5, 1.5 ], [ 1.0, 1.0, 8.0, 8.0, 1.0, 2.0, 2.0, 1.0, 1.0, 3.0, 3.0, 1.0, 2.0, 3.0, 3.0, 2.0, 1.5, 1.5 ]]) expected_polygons = np.asarray( [[3.0000, 1.0000, 1.0000, 1.0000, 1.0000, 3.0000, 3.0000, 3.0000], [8.0, 8.0, 2.3243, 0.0541, 0.0541, 1.6757, 5.7297, 9.6216]]) @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_min_area_polygons(): pointsets = torch.from_numpy(np_pointsets).cuda().float() assert np.allclose( min_area_polygons(pointsets).cpu().numpy(), expected_polygons, atol=1e-4) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_modulated_deform_conv.py ================================================ import os import numpy import pytest import torch from mmcv.utils import TORCH_VERSION, digit_version try: # If PyTorch version >= 1.6.0 and fp16 is enabled, torch.cuda.amp.autocast # would be imported and used; we should test if our modules support it. from torch.cuda.amp import autocast except ImportError: pass cur_dir = os.path.dirname(os.path.abspath(__file__)) input_t = [[[[1., 2., 3.], [1., 2., 3.], [1., 2., 3.]]]] output_t = [[[[0.5, 1.5, 2.5, 1.5], [1.0, 3.0, 5.0, 3.0], [1.0, 3.0, 5.0, 3.0], [0.5, 1.5, 2.5, 1.5]]]] input_grad = [[[[2., 2., 2.], [2., 2., 2.], [2., 2., 2.]]]] dcn_w_grad = [[[[9., 9.], [9., 9.]]]] dcn_offset_w_grad = [[[[-7.0, -4.0], [0.0, 0.0]]], [[[-9.0, 7.5], [-6.0, 5.0]]], [[[-4.0, -7.0], [0.0, 0.0]]], [[[-7.5, -9.0], [-5.0, -6.0]]], [[[-7.0, -4.0], [-7.0, -4.0]]], [[[-6.0, 5.0], [-9.0, 7.5]]], [[[-4.0, -7.0], [-4.0, -7.0]]], [[[-5.0, -6.0], [-7.5, -9.0]]], [[[10.5, 6.0], [7.0, 4.0]]], [[[6.0, 10.5], [4.0, 7.0]]], [[[7.0, 4.0], [10.5, 6.0]]], [[[4.0, 7.0], [6.0, 10.5]]]] dcn_offset_b_grad = [ -3.0, -1.5, -3.0, -1.5, -3.0, -1.5, -3.0, -1.5, 4.5, 4.5, 4.5, 4.5 ] class TestMdconv(object): def _test_mdconv(self, dtype=torch.float, device='cuda'): if not torch.cuda.is_available() and device == 'cuda': pytest.skip('test requires GPU') from mmcv.ops import ModulatedDeformConv2dPack input = torch.tensor(input_t, dtype=dtype, device=device) input.requires_grad = True dcn = ModulatedDeformConv2dPack( 1, 1, kernel_size=(2, 2), stride=1, padding=1, deform_groups=1, bias=False) if device == 'cuda': dcn.cuda() dcn.weight.data.fill_(1.) dcn.type(dtype) output = dcn(input) output.sum().backward() assert numpy.allclose(output.cpu().detach().numpy(), output_t, 1e-2) assert numpy.allclose(input.grad.cpu().detach().numpy(), input_grad, 1e-2) assert numpy.allclose(dcn.weight.grad.cpu().detach().numpy(), dcn_w_grad, 1e-2) assert numpy.allclose( dcn.conv_offset.weight.grad.cpu().detach().numpy(), dcn_offset_w_grad, 1e-2) assert numpy.allclose(dcn.conv_offset.bias.grad.cpu().detach().numpy(), dcn_offset_b_grad, 1e-2) def _test_amp_mdconv(self, input_dtype=torch.float): """The function to test amp released on pytorch 1.6.0. The type of input data might be torch.float or torch.half, so we should test mdconv in both cases. With amp, the data type of model will NOT be set manually. Args: input_dtype: torch.float or torch.half. """ if not torch.cuda.is_available(): return from mmcv.ops import ModulatedDeformConv2dPack input = torch.tensor(input_t).cuda().type(input_dtype) input.requires_grad = True dcn = ModulatedDeformConv2dPack( 1, 1, kernel_size=(2, 2), stride=1, padding=1, deform_groups=1, bias=False).cuda() dcn.weight.data.fill_(1.) output = dcn(input) output.sum().backward() assert numpy.allclose(output.cpu().detach().numpy(), output_t, 1e-2) assert numpy.allclose(input.grad.cpu().detach().numpy(), input_grad, 1e-2) assert numpy.allclose(dcn.weight.grad.cpu().detach().numpy(), dcn_w_grad, 1e-2) assert numpy.allclose( dcn.conv_offset.weight.grad.cpu().detach().numpy(), dcn_offset_w_grad, 1e-2) assert numpy.allclose(dcn.conv_offset.bias.grad.cpu().detach().numpy(), dcn_offset_b_grad, 1e-2) def test_mdconv(self): self._test_mdconv(torch.double, device='cpu') self._test_mdconv(torch.float, device='cpu') self._test_mdconv(torch.double) self._test_mdconv(torch.float) self._test_mdconv(torch.half) # test amp when torch version >= '1.6.0', the type of # input data for mdconv might be torch.float or torch.half if (TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) >= digit_version('1.6.0')): with autocast(enabled=True): self._test_amp_mdconv(torch.float) self._test_amp_mdconv(torch.half) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_ms_deformable_attn.py ================================================ import pytest import torch from mmcv.ops.multi_scale_deform_attn import ( MultiScaleDeformableAttention, MultiScaleDeformableAttnFunction, multi_scale_deformable_attn_pytorch) _USING_PARROTS = True try: from parrots.autograd import gradcheck except ImportError: from torch.autograd import gradcheck _USING_PARROTS = False @pytest.mark.parametrize('device_type', [ 'cpu', pytest.param( 'cuda:0', marks=pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support')) ]) def test_multiscale_deformable_attention(device_type): with pytest.raises(ValueError): # embed_dims must be divisible by num_heads, MultiScaleDeformableAttention( embed_dims=256, num_heads=7, ) device = torch.device(device_type) msda = MultiScaleDeformableAttention( embed_dims=3, num_levels=2, num_heads=3) msda.init_weights() num_query = 5 bs = 1 embed_dims = 3 query = torch.rand(num_query, bs, embed_dims).to(device) key = torch.rand(num_query, bs, embed_dims).to(device) spatial_shapes = torch.Tensor([[2, 2], [1, 1]]).long().to(device) level_start_index = torch.Tensor([0, 4]).long().to(device) reference_points = torch.rand(bs, num_query, 2, 2).to(device) msda.to(device) msda( query, key, key, reference_points=reference_points, spatial_shapes=spatial_shapes, level_start_index=level_start_index) def test_forward_multi_scale_deformable_attn_pytorch(): N, M, D = 1, 2, 2 Lq, L, P = 2, 2, 2 shapes = torch.as_tensor([(6, 4), (3, 2)], dtype=torch.long) S = sum([(H * W).item() for H, W in shapes]) torch.manual_seed(3) value = torch.rand(N, S, M, D) * 0.01 sampling_locations = torch.rand(N, Lq, M, L, P, 2) attention_weights = torch.rand(N, Lq, M, L, P) + 1e-5 attention_weights /= attention_weights.sum( -1, keepdim=True).sum( -2, keepdim=True) multi_scale_deformable_attn_pytorch(value.double(), shapes, sampling_locations.double(), attention_weights.double()).detach() @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_forward_equal_with_pytorch_double(): N, M, D = 1, 2, 2 Lq, L, P = 2, 2, 2 shapes = torch.as_tensor([(6, 4), (3, 2)], dtype=torch.long).cuda() level_start_index = torch.cat((shapes.new_zeros( (1, )), shapes.prod(1).cumsum(0)[:-1])) S = sum([(H * W).item() for H, W in shapes]) torch.manual_seed(3) value = torch.rand(N, S, M, D).cuda() * 0.01 sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda() attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5 attention_weights /= attention_weights.sum( -1, keepdim=True).sum( -2, keepdim=True) im2col_step = 2 output_pytorch = multi_scale_deformable_attn_pytorch( value.double(), shapes, sampling_locations.double(), attention_weights.double()).detach().cpu() output_cuda = MultiScaleDeformableAttnFunction.apply( value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step).detach().cpu() assert torch.allclose(output_cuda, output_pytorch) max_abs_err = (output_cuda - output_pytorch).abs().max() max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max() assert max_abs_err < 1e-18 assert max_rel_err < 1e-15 @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_forward_equal_with_pytorch_float(): N, M, D = 1, 2, 2 Lq, L, P = 2, 2, 2 shapes = torch.as_tensor([(6, 4), (3, 2)], dtype=torch.long).cuda() level_start_index = torch.cat((shapes.new_zeros( (1, )), shapes.prod(1).cumsum(0)[:-1])) S = sum([(H * W).item() for H, W in shapes]) torch.manual_seed(3) value = torch.rand(N, S, M, D).cuda() * 0.01 sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda() attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5 attention_weights /= attention_weights.sum( -1, keepdim=True).sum( -2, keepdim=True) im2col_step = 2 output_pytorch = multi_scale_deformable_attn_pytorch( value, shapes, sampling_locations, attention_weights).detach().cpu() output_cuda = MultiScaleDeformableAttnFunction.apply( value, shapes, level_start_index, sampling_locations, attention_weights, im2col_step).detach().cpu() assert torch.allclose(output_cuda, output_pytorch, rtol=1e-2, atol=1e-3) max_abs_err = (output_cuda - output_pytorch).abs().max() max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max() assert max_abs_err < 1e-9 assert max_rel_err < 1e-6 @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') @pytest.mark.parametrize('channels', [ 4, 30, 32, 64, 71, 1025, ]) def test_gradient_numerical(channels, grad_value=True, grad_sampling_loc=True, grad_attn_weight=True): N, M, _ = 1, 2, 2 Lq, L, P = 2, 2, 2 shapes = torch.as_tensor([(3, 2), (2, 1)], dtype=torch.long).cuda() level_start_index = torch.cat((shapes.new_zeros( (1, )), shapes.prod(1).cumsum(0)[:-1])) S = sum([(H * W).item() for H, W in shapes]) value = torch.rand(N, S, M, channels).cuda() * 0.01 sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda() attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5 attention_weights /= attention_weights.sum( -1, keepdim=True).sum( -2, keepdim=True) im2col_step = 2 func = MultiScaleDeformableAttnFunction.apply value.requires_grad = grad_value sampling_locations.requires_grad = grad_sampling_loc attention_weights.requires_grad = grad_attn_weight if _USING_PARROTS: assert gradcheck( func, (value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step), no_grads=[shapes, level_start_index]) else: assert gradcheck(func, (value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step)) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_nms.py ================================================ import numpy as np import pytest import torch class Testnms(object): def test_nms_allclose(self): if not torch.cuda.is_available(): return from mmcv.ops import nms np_boxes = np.array([[6.0, 3.0, 8.0, 7.0], [3.0, 6.0, 9.0, 11.0], [3.0, 7.0, 10.0, 12.0], [1.0, 4.0, 13.0, 7.0]], dtype=np.float32) np_scores = np.array([0.6, 0.9, 0.7, 0.2], dtype=np.float32) np_inds = np.array([1, 0, 3]) np_dets = np.array([[3.0, 6.0, 9.0, 11.0, 0.9], [6.0, 3.0, 8.0, 7.0, 0.6], [1.0, 4.0, 13.0, 7.0, 0.2]]) boxes = torch.from_numpy(np_boxes) scores = torch.from_numpy(np_scores) dets, inds = nms(boxes, scores, iou_threshold=0.3, offset=0) assert np.allclose(dets, np_dets) # test cpu assert np.allclose(inds, np_inds) # test cpu dets, inds = nms( boxes.cuda(), scores.cuda(), iou_threshold=0.3, offset=0) assert np.allclose(dets.cpu().numpy(), np_dets) # test gpu assert np.allclose(inds.cpu().numpy(), np_inds) # test gpu def test_softnms_allclose(self): if not torch.cuda.is_available(): return from mmcv.ops import soft_nms np_boxes = np.array([[6.0, 3.0, 8.0, 7.0], [3.0, 6.0, 9.0, 11.0], [3.0, 7.0, 10.0, 12.0], [1.0, 4.0, 13.0, 7.0]], dtype=np.float32) np_scores = np.array([0.6, 0.9, 0.7, 0.2], dtype=np.float32) np_output = { 'linear': { 'dets': np.array( [[3., 6., 9., 11., 0.9], [6., 3., 8., 7., 0.6], [3., 7., 10., 12., 0.29024392], [1., 4., 13., 7., 0.2]], dtype=np.float32), 'inds': np.array([1, 0, 2, 3], dtype=np.int64) }, 'gaussian': { 'dets': np.array([[3., 6., 9., 11., 0.9], [6., 3., 8., 7., 0.59630775], [3., 7., 10., 12., 0.35275510], [1., 4., 13., 7., 0.18650459]], dtype=np.float32), 'inds': np.array([1, 0, 2, 3], dtype=np.int64) }, 'naive': { 'dets': np.array([[3., 6., 9., 11., 0.9], [6., 3., 8., 7., 0.6], [1., 4., 13., 7., 0.2]], dtype=np.float32), 'inds': np.array([1, 0, 3], dtype=np.int64) } } boxes = torch.from_numpy(np_boxes) scores = torch.from_numpy(np_scores) configs = [[0.3, 0.5, 0.01, 'linear'], [0.3, 0.5, 0.01, 'gaussian'], [0.3, 0.5, 0.01, 'naive']] for iou, sig, mscore, m in configs: dets, inds = soft_nms( boxes, scores, iou_threshold=iou, sigma=sig, min_score=mscore, method=m) assert np.allclose(dets.cpu().numpy(), np_output[m]['dets']) assert np.allclose(inds.cpu().numpy(), np_output[m]['inds']) if torch.__version__ != 'parrots': boxes = boxes.cuda() scores = scores.cuda() for iou, sig, mscore, m in configs: dets, inds = soft_nms( boxes, scores, iou_threshold=iou, sigma=sig, min_score=mscore, method=m) assert np.allclose(dets.cpu().numpy(), np_output[m]['dets']) assert np.allclose(inds.cpu().numpy(), np_output[m]['inds']) def test_nms_match(self): if not torch.cuda.is_available(): return from mmcv.ops import nms, nms_match iou_thr = 0.6 # empty input empty_dets = np.array([]) assert len(nms_match(empty_dets, iou_thr)) == 0 # non empty ndarray input np_dets = np.array( [[49.1, 32.4, 51.0, 35.9, 0.9], [49.3, 32.9, 51.0, 35.3, 0.9], [35.3, 11.5, 39.9, 14.5, 0.4], [35.2, 11.7, 39.7, 15.7, 0.3]], dtype=np.float32) np_groups = nms_match(np_dets, iou_thr) assert isinstance(np_groups[0], np.ndarray) assert len(np_groups) == 2 tensor_dets = torch.from_numpy(np_dets) boxes = tensor_dets[:, :4] scores = tensor_dets[:, 4] nms_keep_inds = nms(boxes.contiguous(), scores.contiguous(), iou_thr)[1] assert set([g[0].item() for g in np_groups]) == set(nms_keep_inds.tolist()) # non empty tensor input tensor_dets = torch.from_numpy(np_dets) tensor_groups = nms_match(tensor_dets, iou_thr) assert isinstance(tensor_groups[0], torch.Tensor) for i in range(len(tensor_groups)): assert np.equal(tensor_groups[i].numpy(), np_groups[i]).all() # input of wrong shape wrong_dets = np.zeros((2, 3)) with pytest.raises(AssertionError): nms_match(wrong_dets, iou_thr) def test_batched_nms(self): import mmcv from mmcv.ops import batched_nms results = mmcv.load('./tests/data/batched_nms_data.pkl') nms_max_num = 100 nms_cfg = dict( type='nms', iou_threshold=0.7, score_threshold=0.5, max_num=nms_max_num) boxes, keep = batched_nms( torch.from_numpy(results['boxes']), torch.from_numpy(results['scores']), torch.from_numpy(results['idxs']), nms_cfg, class_agnostic=False) nms_cfg.update(split_thr=100) seq_boxes, seq_keep = batched_nms( torch.from_numpy(results['boxes']), torch.from_numpy(results['scores']), torch.from_numpy(results['idxs']), nms_cfg, class_agnostic=False) assert torch.equal(keep, seq_keep) assert torch.equal(boxes, seq_boxes) assert torch.equal(keep, torch.from_numpy(results['keep'][:nms_max_num])) nms_cfg = dict(type='soft_nms', iou_threshold=0.7) boxes, keep = batched_nms( torch.from_numpy(results['boxes']), torch.from_numpy(results['scores']), torch.from_numpy(results['idxs']), nms_cfg, class_agnostic=False) nms_cfg.update(split_thr=100) seq_boxes, seq_keep = batched_nms( torch.from_numpy(results['boxes']), torch.from_numpy(results['scores']), torch.from_numpy(results['idxs']), nms_cfg, class_agnostic=False) assert torch.equal(keep, seq_keep) assert torch.equal(boxes, seq_boxes) # test skip nms when `nms_cfg` is None seq_boxes, seq_keep = batched_nms( torch.from_numpy(results['boxes']), torch.from_numpy(results['scores']), torch.from_numpy(results['idxs']), None, class_agnostic=False) assert len(seq_keep) == len(results['boxes']) # assert score is descending order assert ((seq_boxes[:, -1][1:] - seq_boxes[:, -1][:-1]) < 0).all() ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_nms_rotated.py ================================================ import numpy as np import pytest import torch @pytest.mark.skipif( not torch.cuda.is_available(), reason='GPU is required to test NMSRotated op') class TestNmsRotated: def test_ml_nms_rotated(self): from mmcv.ops import nms_rotated np_boxes = np.array( [[6.0, 3.0, 8.0, 7.0, 0.5, 0.7], [3.0, 6.0, 9.0, 11.0, 0.6, 0.8], [3.0, 7.0, 10.0, 12.0, 0.3, 0.5], [1.0, 4.0, 13.0, 7.0, 0.6, 0.9] ], dtype=np.float32) np_labels = np.array([1, 0, 1, 0], dtype=np.float32) np_expect_dets = np.array( [[1.0, 4.0, 13.0, 7.0, 0.6], [3.0, 6.0, 9.0, 11.0, 0.6], [6.0, 3.0, 8.0, 7.0, 0.5]], dtype=np.float32) np_expect_keep_inds = np.array([3, 1, 0], dtype=np.int64) boxes = torch.from_numpy(np_boxes).cuda() labels = torch.from_numpy(np_labels).cuda() # test cw angle definition dets, keep_inds = nms_rotated(boxes[:, :5], boxes[:, -1], 0.5, labels) assert np.allclose(dets.cpu().numpy()[:, :5], np_expect_dets) assert np.allclose(keep_inds.cpu().numpy(), np_expect_keep_inds) # test ccw angle definition boxes[..., -2] *= -1 dets, keep_inds = nms_rotated( boxes[:, :5], boxes[:, -1], 0.5, labels, clockwise=False) dets[..., -2] *= -1 assert np.allclose(dets.cpu().numpy()[:, :5], np_expect_dets) assert np.allclose(keep_inds.cpu().numpy(), np_expect_keep_inds) def test_nms_rotated(self): from mmcv.ops import nms_rotated np_boxes = np.array( [[6.0, 3.0, 8.0, 7.0, 0.5, 0.7], [3.0, 6.0, 9.0, 11.0, 0.6, 0.8], [3.0, 7.0, 10.0, 12.0, 0.3, 0.5], [1.0, 4.0, 13.0, 7.0, 0.6, 0.9] ], dtype=np.float32) np_expect_dets = np.array( [[1.0, 4.0, 13.0, 7.0, 0.6], [3.0, 6.0, 9.0, 11.0, 0.6], [6.0, 3.0, 8.0, 7.0, 0.5]], dtype=np.float32) np_expect_keep_inds = np.array([3, 1, 0], dtype=np.int64) boxes = torch.from_numpy(np_boxes).cuda() # test cw angle definition dets, keep_inds = nms_rotated(boxes[:, :5], boxes[:, -1], 0.5) assert np.allclose(dets.cpu().numpy()[:, :5], np_expect_dets) assert np.allclose(keep_inds.cpu().numpy(), np_expect_keep_inds) # test ccw angle definition boxes[..., -2] *= -1 dets, keep_inds = nms_rotated( boxes[:, :5], boxes[:, -1], 0.5, clockwise=False) dets[..., -2] *= -1 assert np.allclose(dets.cpu().numpy()[:, :5], np_expect_dets) assert np.allclose(keep_inds.cpu().numpy(), np_expect_keep_inds) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_onnx.py ================================================ import os import warnings from functools import partial import numpy as np import onnx import onnxruntime as rt import pytest import torch import torch.nn as nn import torch.nn.functional as F from packaging import version onnx_file = 'tmp.onnx' @pytest.fixture(autouse=True) def run_before_and_after_test(): # clear onnx_file before test if os.path.exists(onnx_file): os.remove(onnx_file) yield # clear onnx_file after test if os.path.exists(onnx_file): os.remove(onnx_file) class WrapFunction(nn.Module): def __init__(self, wrapped_function): super(WrapFunction, self).__init__() self.wrapped_function = wrapped_function def forward(self, *args, **kwargs): return self.wrapped_function(*args, **kwargs) def process_grid_sample(func, input, grid, ort_custom_op_path=''): wrapped_model = WrapFunction(func).eval() input_names = ['input', 'grid'] output_names = ['output'] with torch.no_grad(): torch.onnx.export( wrapped_model, (input, grid), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=input_names, output_names=output_names, opset_version=11) onnx_model = onnx.load(onnx_file) session_options = rt.SessionOptions() if ort_custom_op_path: session_options.register_custom_ops_library(ort_custom_op_path) # get onnx output input_all = [node.name for node in onnx_model.graph.input] input_initializer = [node.name for node in onnx_model.graph.initializer] net_feed_input = list(set(input_all) - set(input_initializer)) assert (len(net_feed_input) == 2) sess = rt.InferenceSession(onnx_file, session_options) ort_result = sess.run(None, { 'input': input.detach().numpy(), 'grid': grid.detach().numpy() }) pytorch_results = wrapped_model(input.clone(), grid.clone()) assert np.allclose(pytorch_results, ort_result, atol=1e-3) @pytest.mark.parametrize('mode', ['bilinear', 'nearest']) @pytest.mark.parametrize('padding_mode', ['zeros', 'border', 'reflection']) @pytest.mark.parametrize('align_corners', [True, False]) def test_grid_sample(mode, padding_mode, align_corners): from mmcv.onnx.symbolic import register_extra_symbolics opset_version = 11 register_extra_symbolics(opset_version) from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() if not os.path.exists(ort_custom_op_path): pytest.skip('custom ops for onnxruntime are not compiled.') input = torch.rand(1, 1, 10, 10) grid = torch.Tensor([[[1, 0, 0], [0, 1, 0]]]) grid = F.affine_grid( grid, (1, 1, 15, 15), align_corners=align_corners).type_as(input) def func(input, grid): return F.grid_sample( input, grid, mode=mode, padding_mode=padding_mode, align_corners=align_corners) return process_grid_sample(func, input, grid, ort_custom_op_path) @pytest.mark.parametrize('align_corners', [True, False]) def test_bilinear_grid_sample(align_corners): from mmcv.ops.point_sample import bilinear_grid_sample # only support pytorch >= 1.5.0 if version.parse(torch.__version__) < version.parse('1.5.0'): pytest.skip('Only support PyTorch >= 1.5.0') input = torch.rand(1, 1, 10, 10) grid = torch.Tensor([[[1, 0, 0], [0, 1, 0]]]) grid = F.affine_grid( grid, (1, 1, 15, 15), align_corners=align_corners).type_as(input) def func(input, grid): return bilinear_grid_sample(input, grid, align_corners=align_corners) return process_grid_sample(func, input, grid) def test_nms(): if torch.__version__ == 'parrots': pytest.skip('onnx is not supported in parrots directly') from mmcv.ops import get_onnxruntime_op_path, nms np_boxes = np.array([[6.0, 3.0, 8.0, 7.0], [3.0, 6.0, 9.0, 11.0], [3.0, 7.0, 10.0, 12.0], [1.0, 4.0, 13.0, 7.0]], dtype=np.float32) np_scores = np.array([0.6, 0.9, 0.7, 0.2], dtype=np.float32) boxes = torch.from_numpy(np_boxes) scores = torch.from_numpy(np_scores) nms = partial( nms, iou_threshold=0.3, offset=0, score_threshold=0, max_num=0) pytorch_dets, _ = nms(boxes, scores) pytorch_score = pytorch_dets[:, 4] wrapped_model = WrapFunction(nms) wrapped_model.cpu().eval() with torch.no_grad(): torch.onnx.export( wrapped_model, (boxes, scores), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=['boxes', 'scores'], opset_version=11) onnx_model = onnx.load(onnx_file) ort_custom_op_path = get_onnxruntime_op_path() session_options = rt.SessionOptions() if os.path.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) # get onnx output input_all = [node.name for node in onnx_model.graph.input] input_initializer = [node.name for node in onnx_model.graph.initializer] net_feed_input = list(set(input_all) - set(input_initializer)) assert (len(net_feed_input) == 2) sess = rt.InferenceSession(onnx_file, session_options) onnx_dets, _ = sess.run(None, { 'scores': scores.detach().numpy(), 'boxes': boxes.detach().numpy() }) onnx_score = onnx_dets[:, 4] assert np.allclose(pytorch_score, onnx_score, atol=1e-3) @pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU') def test_softnms(): if torch.__version__ == 'parrots': pytest.skip('onnx is not supported in parrots directly') from mmcv.ops import get_onnxruntime_op_path, soft_nms # only support pytorch >= 1.7.0 if version.parse(torch.__version__) < version.parse('1.7.0'): warnings.warn('test_softnms should be ran with pytorch >= 1.7.0') return # only support onnxruntime >= 1.5.1 assert version.parse(rt.__version__) >= version.parse( '1.5.1'), 'test_softnms should be ran with onnxruntime >= 1.5.1' ort_custom_op_path = get_onnxruntime_op_path() if not os.path.exists(ort_custom_op_path): pytest.skip('softnms for onnxruntime is not compiled.') np_boxes = np.array([[6.0, 3.0, 8.0, 7.0], [3.0, 6.0, 9.0, 11.0], [3.0, 7.0, 10.0, 12.0], [1.0, 4.0, 13.0, 7.0]], dtype=np.float32) np_scores = np.array([0.6, 0.9, 0.7, 0.2], dtype=np.float32) boxes = torch.from_numpy(np_boxes) scores = torch.from_numpy(np_scores) configs = [[0.3, 0.5, 0.01, 'linear'], [0.3, 0.5, 0.01, 'gaussian'], [0.3, 0.5, 0.01, 'naive']] session_options = rt.SessionOptions() session_options.register_custom_ops_library(ort_custom_op_path) for _iou_threshold, _sigma, _min_score, _method in configs: pytorch_dets, pytorch_inds = soft_nms( boxes, scores, iou_threshold=_iou_threshold, sigma=_sigma, min_score=_min_score, method=_method) nms = partial( soft_nms, iou_threshold=_iou_threshold, sigma=_sigma, min_score=_min_score, method=_method) wrapped_model = WrapFunction(nms) wrapped_model.cpu().eval() with torch.no_grad(): torch.onnx.export( wrapped_model, (boxes, scores), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=['boxes', 'scores'], opset_version=11) onnx_model = onnx.load(onnx_file) # get onnx output input_all = [node.name for node in onnx_model.graph.input] input_initializer = [ node.name for node in onnx_model.graph.initializer ] net_feed_input = list(set(input_all) - set(input_initializer)) assert (len(net_feed_input) == 2) sess = rt.InferenceSession(onnx_file, session_options) onnx_dets, onnx_inds = sess.run(None, { 'scores': scores.detach().numpy(), 'boxes': boxes.detach().numpy() }) assert np.allclose(pytorch_dets, onnx_dets, atol=1e-3) assert np.allclose(onnx_inds, onnx_inds, atol=1e-3) def test_roialign(): if torch.__version__ == 'parrots': pytest.skip('onnx is not supported in parrots directly') try: from mmcv.ops import get_onnxruntime_op_path, roi_align except (ImportError, ModuleNotFoundError): pytest.skip('roi_align op is not successfully compiled') ort_custom_op_path = get_onnxruntime_op_path() # roi align config pool_h = 2 pool_w = 2 spatial_scale = 1.0 sampling_ratio = 2 inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0., 0., 1., 1.]]), ([[[[1., 2.], [3., 4.]], [[4., 3.], [2., 1.]]]], [[0., 0., 0., 1., 1.]]), ([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.], [11., 12., 15., 16.]]]], [[0., 0., 0., 3., 3.]])] def warpped_function(torch_input, torch_rois): return roi_align(torch_input, torch_rois, (pool_w, pool_h), spatial_scale, sampling_ratio, 'avg', True) for case in inputs: np_input = np.array(case[0], dtype=np.float32) np_rois = np.array(case[1], dtype=np.float32) input = torch.from_numpy(np_input) rois = torch.from_numpy(np_rois) # compute pytorch_output with torch.no_grad(): pytorch_output = roi_align(input, rois, (pool_w, pool_h), spatial_scale, sampling_ratio, 'avg', True) # export and load onnx model wrapped_model = WrapFunction(warpped_function) with torch.no_grad(): torch.onnx.export( wrapped_model, (input, rois), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=['input', 'rois'], opset_version=11) onnx_model = onnx.load(onnx_file) session_options = rt.SessionOptions() if os.path.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) # compute onnx_output input_all = [node.name for node in onnx_model.graph.input] input_initializer = [ node.name for node in onnx_model.graph.initializer ] net_feed_input = list(set(input_all) - set(input_initializer)) assert (len(net_feed_input) == 2) sess = rt.InferenceSession(onnx_file, session_options) onnx_output = sess.run(None, { 'input': input.detach().numpy(), 'rois': rois.detach().numpy() }) onnx_output = onnx_output[0] # allclose assert np.allclose(pytorch_output, onnx_output, atol=1e-3) def test_roialign_rotated(): if torch.__version__ == 'parrots': pytest.skip('onnx is not supported in parrots directly') try: from mmcv.ops import get_onnxruntime_op_path, roi_align_rotated except (ImportError, ModuleNotFoundError): pytest.skip('roi_align_aligned op is not successfully compiled') ort_custom_op_path = get_onnxruntime_op_path() if not os.path.exists(ort_custom_op_path): pytest.skip('custom ops for onnxruntime are not compiled.') # roi align config pool_h = 2 pool_w = 2 spatial_scale = 1.0 sampling_ratio = 2 inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0.5, 0.5, 1., 1., 0]]), ([[[[1., 2.], [3., 4.]]]], [[0., 0.5, 0.5, 1., 1., np.pi / 2]]), ([[[[1., 2.], [3., 4.]], [[4., 3.], [2., 1.]]]], [[0., 0.5, 0.5, 1., 1., 0]]), ([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.], [11., 12., 15., 16.]]]], [[0., 1.5, 1.5, 3., 3., 0]]), ([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.], [11., 12., 15., 16.]]]], [[0., 1.5, 1.5, 3., 3., np.pi / 2]])] def warpped_function(torch_input, torch_rois): return roi_align_rotated(torch_input, torch_rois, (pool_w, pool_h), spatial_scale, sampling_ratio, True, False) for case in inputs: np_input = np.array(case[0], dtype=np.float32) np_rois = np.array(case[1], dtype=np.float32) input = torch.from_numpy(np_input) rois = torch.from_numpy(np_rois) # compute pytorch_output with torch.no_grad(): pytorch_output = roi_align_rotated(input, rois, (pool_w, pool_h), spatial_scale, sampling_ratio, True, False) # export and load onnx model wrapped_model = WrapFunction(warpped_function) with torch.no_grad(): torch.onnx.export( wrapped_model, (input, rois), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=['features', 'rois'], opset_version=11) onnx_model = onnx.load(onnx_file) session_options = rt.SessionOptions() if os.path.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) # compute onnx_output input_all = [node.name for node in onnx_model.graph.input] input_initializer = [ node.name for node in onnx_model.graph.initializer ] net_feed_input = list(set(input_all) - set(input_initializer)) assert (len(net_feed_input) == 2) sess = rt.InferenceSession(onnx_file, session_options) onnx_output = sess.run(None, { 'features': input.detach().numpy(), 'rois': rois.detach().numpy() }) onnx_output = onnx_output[0] # allclose assert np.allclose(pytorch_output, onnx_output, atol=1e-3) @pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU') def test_roipool(): if torch.__version__ == 'parrots': pytest.skip('onnx is not supported in parrots directly') from mmcv.ops import roi_pool # roi pool config pool_h = 2 pool_w = 2 spatial_scale = 1.0 inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0., 0., 1., 1.]]), ([[[[1., 2.], [3., 4.]], [[4., 3.], [2., 1.]]]], [[0., 0., 0., 1., 1.]]), ([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.], [11., 12., 15., 16.]]]], [[0., 0., 0., 3., 3.]])] def warpped_function(torch_input, torch_rois): return roi_pool(torch_input, torch_rois, (pool_w, pool_h), spatial_scale) for case in inputs: np_input = np.array(case[0], dtype=np.float32) np_rois = np.array(case[1], dtype=np.float32) input = torch.from_numpy(np_input).cuda() rois = torch.from_numpy(np_rois).cuda() # compute pytorch_output with torch.no_grad(): pytorch_output = roi_pool(input, rois, (pool_w, pool_h), spatial_scale) pytorch_output = pytorch_output.cpu() # export and load onnx model wrapped_model = WrapFunction(warpped_function) with torch.no_grad(): torch.onnx.export( wrapped_model, (input, rois), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=['input', 'rois'], opset_version=11) onnx_model = onnx.load(onnx_file) # compute onnx_output input_all = [node.name for node in onnx_model.graph.input] input_initializer = [ node.name for node in onnx_model.graph.initializer ] net_feed_input = list(set(input_all) - set(input_initializer)) assert (len(net_feed_input) == 2) sess = rt.InferenceSession(onnx_file) onnx_output = sess.run( None, { 'input': input.detach().cpu().numpy(), 'rois': rois.detach().cpu().numpy() }) onnx_output = onnx_output[0] # allclose assert np.allclose(pytorch_output, onnx_output, atol=1e-3) def test_interpolate(): from mmcv.onnx.symbolic import register_extra_symbolics opset_version = 11 register_extra_symbolics(opset_version) def func(feat, scale_factor=2): out = F.interpolate(feat, scale_factor=scale_factor) return out net = WrapFunction(func) net = net.cpu().eval() dummy_input = torch.randn(2, 4, 8, 8).cpu() torch.onnx.export( net, dummy_input, onnx_file, input_names=['input'], opset_version=opset_version) sess = rt.InferenceSession(onnx_file) onnx_result = sess.run(None, {'input': dummy_input.detach().numpy()}) pytorch_result = func(dummy_input).detach().numpy() assert np.allclose(pytorch_result, onnx_result, atol=1e-3) @pytest.mark.parametrize('mode', ['top', 'bottom', 'left', 'right']) def test_corner_pool(mode, opset=11): if torch.__version__ == 'parrots': pytest.skip('onnx is not supported in parrots directly') from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() if not os.path.exists(ort_custom_op_path): pytest.skip('custom ops for onnxruntime are not compiled.') from mmcv.ops.corner_pool import CornerPool def corner_pool_func(input): corner_pool_module = CornerPool(mode) return corner_pool_module.corner_pool.apply(input) wrapped_model = WrapFunction(corner_pool_func).eval() input = torch.rand((2, 3, 9, 12)) # (n,c,h,w) with torch.no_grad(): torch.onnx.export( wrapped_model, input, onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=['input'], output_names=['output'], opset_version=opset) onnx_model = onnx.load(onnx_file) input_all = [node.name for node in onnx_model.graph.input] input_initializer = [node.name for node in onnx_model.graph.initializer] net_feed_input = list(set(input_all) - set(input_initializer)) assert (len(net_feed_input) == 1) session_options = rt.SessionOptions() session_options.register_custom_ops_library(ort_custom_op_path) sess = rt.InferenceSession(onnx_file, session_options) ort_result = sess.run(None, {'input': input.detach().numpy()}) pytorch_results = wrapped_model(input.clone()) assert np.allclose(pytorch_results, ort_result, atol=1e-5) @pytest.mark.parametrize('key', ['cummax', 'cummin']) def test_cummax_cummin(key, opset=11): if torch.__version__ == 'parrots': pytest.skip('onnx is not supported in parrots directly') # Note generally `cummax` or `cummin` is exportable to ONNX # as long as the pytorch version >= 1.5.0, since `torch.cummax` # is only supported with torch >= 1.5.0. # But when `cummax` or `cummin` serves as an intermediate component # whose outputs is used as inputs for another modules, it's expected # that pytorch version must be >= 1.7.0. Otherwise error appears like: # `RuntimeError: tuple appears in op that does not forward tuples, # unsupported 'kind: prim::PythonOp`. if version.parse(torch.__version__) < version.parse('1.7.0'): pytest.skip('test_cummax_cummin should be ran with pytorch >= 1.7.0') # register custom op `mmcv::cummax` and `mmcv::cummin` from mmcv.onnx.symbolic import register_extra_symbolics register_extra_symbolics(opset) from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() if not os.path.exists(ort_custom_op_path): pytest.skip('custom ops for onnxruntime are not compiled.') input_list = [ # arbitrary shape, e.g. 1-D, 2-D, 3-D, ... torch.rand((2, 3, 4, 1, 5)), torch.rand((1)), torch.rand((2, 0, 1)), # tensor.numel() is 0 torch.FloatTensor(), # empty tensor ] cummax_cummin_funcs = {'cummax': torch.cummax, 'cummin': torch.cummin} for input in input_list: ndims = input.dim() # valid dim range is [-ndims, ndims-1] # test for all `dim` value which is valid for dim in range(-ndims, ndims): cummax_func = partial(cummax_cummin_funcs[key], dim=dim) wrapped_model = WrapFunction(cummax_func).eval() with torch.no_grad(): torch.onnx.export( wrapped_model, input, onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=['input'], output_names=['output', 'indices'], opset_version=opset) onnx_model = onnx.load(onnx_file) input_all = [node.name for node in onnx_model.graph.input] input_initializer = [ node.name for node in onnx_model.graph.initializer ] net_feed_input = list(set(input_all) - set(input_initializer)) assert (len(net_feed_input) == 1) session_options = rt.SessionOptions() session_options.register_custom_ops_library(ort_custom_op_path) sess = rt.InferenceSession(onnx_file, session_options) ort_output, ort_inds = sess.run(None, {'input': input.detach().numpy()}) pytorch_output, pytorch_inds = wrapped_model(input.clone()) pytorch_output = pytorch_output.detach().numpy() pytorch_inds = pytorch_inds.detach().numpy() assert np.allclose(pytorch_output, ort_output, atol=1e-5) assert np.all(pytorch_inds == ort_inds) @pytest.mark.parametrize('shifts_dims_pair', [([-3, 5], [2, 0]), (5, None)]) def test_roll(shifts_dims_pair): opset = 11 from mmcv.onnx.symbolic import register_extra_symbolics register_extra_symbolics(opset) input = torch.arange(0, 4 * 5 * 6, dtype=torch.float32).view(4, 5, 6) shifts, dims = shifts_dims_pair func = partial(torch.roll, shifts=shifts, dims=dims) wrapped_model = WrapFunction(func).eval() with torch.no_grad(): torch.onnx.export( wrapped_model, input, onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=['input'], output_names=['output'], opset_version=opset) onnx_model = onnx.load(onnx_file) input_all = [node.name for node in onnx_model.graph.input] input_initializer = [node.name for node in onnx_model.graph.initializer] net_feed_input = list(set(input_all) - set(input_initializer)) assert (len(net_feed_input) == 1) sess = rt.InferenceSession(onnx_file) ort_output = sess.run(None, {'input': input.detach().numpy()})[0] with torch.no_grad(): pytorch_output = wrapped_model(input.clone()) torch.testing.assert_allclose(ort_output, pytorch_output) @pytest.mark.skipif( torch.__version__ == 'parrots', reason='onnx is not supported in parrots directly') @pytest.mark.skipif( not torch.cuda.is_available(), reason='modulated_deform_conv2d only supports in GPU') def test_modulated_deform_conv2d(): try: from mmcv.ops import ModulatedDeformConv2d, get_onnxruntime_op_path except (ImportError, ModuleNotFoundError): pytest.skip('modulated_deform_conv op is not successfully compiled') ort_custom_op_path = get_onnxruntime_op_path() # modulated deform conv config in_channels = 3 out_channels = 64 stride = 1 padding = 0 dilation = 1 groups = 1 deform_groups = 1 kernel_size = 3 input = torch.rand(1, in_channels, 28, 28).cuda() # (n, c, h, w) conv_offset = nn.Conv2d( in_channels=3, out_channels=deform_groups * 3 * kernel_size * kernel_size, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, bias=True).cuda() conv_offset.cuda() out = conv_offset(input) o1, o2, mask = torch.chunk(out, 3, dim=1) offset = torch.cat((o1, o2), dim=1) mask = torch.sigmoid(mask) model_with_bias = ModulatedDeformConv2d( in_channels, out_channels, kernel_size, stride, padding, dilation, groups, deform_groups, bias=True) model_without_bias = ModulatedDeformConv2d( in_channels, out_channels, kernel_size, stride, padding, dilation, groups, deform_groups, bias=False) models = [model_with_bias.cuda(), model_without_bias.cuda()] for model in models: # export and load onnx model with torch.no_grad(): torch.onnx.export( model, (input, offset, mask), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=['input', 'offset', 'mask'], opset_version=11) session_options = rt.SessionOptions() if os.path.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) # compute onnx_output sess = rt.InferenceSession(onnx_file, session_options) onnx_output = sess.run( None, { 'input': input.cpu().detach().numpy(), 'offset': offset.cpu().detach().numpy(), 'mask': mask.cpu().detach().numpy() })[0] # compute pytorch_output with torch.no_grad(): pytorch_output = model(input, offset, mask).cpu() # allclose assert np.allclose(pytorch_output, onnx_output, atol=1e-3) @pytest.mark.skipif( torch.__version__ == 'parrots', reason='onnx is not supported in parrots directly') def test_deform_conv2d(threshold=1e-3): try: from mmcv.ops import DeformConv2d, get_onnxruntime_op_path except (ImportError, ModuleNotFoundError): pytest.skip('deform_conv op is not successfully compiled') ort_custom_op_path = get_onnxruntime_op_path() if not os.path.exists(ort_custom_op_path): pytest.skip('custom ops for onnxruntime are not compiled.') # deform conv config # modulated deform conv config in_channels = 1 out_channels = 64 stride = 1 padding = 0 dilation = 1 groups = 1 deform_groups = 1 kernel_size = 2 input = [[[[1., 2., 3.], [0., 1., 2.], [3., 5., 2.]]]] offset_weight = [[[0.1, 0.4, 0.6, 0.1]], [[0.3, 0.2, 0.1, 0.3]], [[0.5, 0.5, 0.2, 0.8]], [[0.8, 0.3, 0.9, 0.1]], [[0.3, 0.1, 0.2, 0.5]], [[0.3, 0.7, 0.5, 0.3]], [[0.6, 0.2, 0.5, 0.3]], [[0.4, 0.1, 0.8, 0.4]]] offset_bias = [0.7, 0.1, 0.8, 0.5, 0.6, 0.5, 0.4, 0.7] deform_weight = [[[0.4, 0.2, 0.1, 0.9]]] x = torch.tensor(input) conv_offset = nn.Conv2d( in_channels=in_channels, out_channels=deform_groups * 2 * kernel_size * kernel_size, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, bias=True) conv_offset.weight.data = torch.nn.Parameter( torch.Tensor(offset_weight).reshape(8, 1, 2, 2)) conv_offset.bias.data = torch.nn.Parameter( torch.Tensor(offset_bias).reshape(8)) offset = conv_offset(x) model = DeformConv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, deform_groups) model.weight.data = torch.nn.Parameter( torch.Tensor(deform_weight).reshape(1, 1, 2, 2)) with torch.no_grad(): torch.onnx.export( model, (x, offset), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=['input', 'offset'], opset_version=11) session_options = rt.SessionOptions() if os.path.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) # compute onnx_output sess = rt.InferenceSession(onnx_file, session_options) onnx_output = sess.run( None, { 'input': x.cpu().detach().numpy(), 'offset': offset.cpu().detach().numpy(), })[0] # compute pytorch_output with torch.no_grad(): pytorch_output = model(x, offset).cpu() # allclose assert np.allclose(pytorch_output, onnx_output, atol=1e-3) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_pixel_group.py ================================================ import numpy as np import torch def test_pixel_group(): from mmcv.ops import pixel_group np_score = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0], [0, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0], [0, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0], [0, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]).astype(np.float32) np_mask = (np_score > 0.5) np_embedding = np.zeros((10, 10, 8)).astype(np.float32) np_embedding[:, :7] = 0.9 np_embedding[:, 7:] = 10.0 np_kernel_label = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 1, 0, 0, 0, 2, 0], [0, 0, 1, 1, 1, 0, 0, 0, 2, 0], [0, 0, 1, 1, 1, 0, 0, 0, 2, 0], [0, 0, 1, 1, 1, 0, 0, 0, 2, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]).astype(np.int32) np_kernel_contour = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 1, 0, 0, 0, 1, 0], [0, 0, 1, 0, 1, 0, 0, 0, 1, 0], [0, 0, 1, 0, 1, 0, 0, 0, 1, 0], [0, 0, 1, 1, 1, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]).astype(np.uint8) kernel_region_num = 3 distance_threshold = float(0.8) result = pixel_group(np_score, np_mask, np_embedding, np_kernel_label, np_kernel_contour, kernel_region_num, distance_threshold) gt_1 = [ 0.8999997973442078, 24.0, 1.0, 3.0, 2.0, 3.0, 3.0, 3.0, 4.0, 3.0, 5.0, 3.0, 6.0, 3.0, 1.0, 4.0, 2.0, 4.0, 3.0, 4.0, 4.0, 4.0, 5.0, 4.0, 6.0, 4.0, 1.0, 5.0, 2.0, 5.0, 3.0, 5.0, 4.0, 5.0, 5.0, 5.0, 6.0, 5.0, 1.0, 6.0, 2.0, 6.0, 3.0, 6.0, 4.0, 6.0, 5.0, 6.0, 6.0, 6.0 ] gt_2 = [ 0.9000000357627869, 8.0, 7.0, 3.0, 8.0, 3.0, 7.0, 4.0, 8.0, 4.0, 7.0, 5.0, 8.0, 5.0, 7.0, 6.0, 8.0, 6.0 ] assert np.allclose(result[0], [0, 0]) assert np.allclose(result[1], gt_1) assert np.allclose(result[2], gt_2) # test torch Tensor np_score_t = torch.from_numpy(np_score) np_mask_t = torch.from_numpy(np_mask) np_embedding_t = torch.from_numpy(np_embedding) np_kernel_label_t = torch.from_numpy(np_kernel_label) np_kernel_contour_t = torch.from_numpy(np_kernel_contour) result = pixel_group(np_score_t, np_mask_t, np_embedding_t, np_kernel_label_t, np_kernel_contour_t, kernel_region_num, distance_threshold) assert np.allclose(result[0], [0, 0]) assert np.allclose(result[1], gt_1) assert np.allclose(result[2], gt_2) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_points_in_polygons.py ================================================ import numpy as np import pytest import torch from mmcv.ops import points_in_polygons @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_points_in_polygons(): points = np.array([[300., 300.], [400., 400.], [100., 100], [300, 250], [100, 0]]) polygons = np.array([[200., 200., 400., 400., 500., 200., 400., 100.], [400., 400., 500., 500., 600., 300., 500., 200.], [300., 300., 600., 700., 700., 700., 700., 100.]]) expected_output = np.array([[0., 0., 0.], [0., 0., 1.], [0., 0., 0.], [1., 0., 0.], [0., 0., 0.]]) points = torch.from_numpy(points).cuda().float() polygons = torch.from_numpy(polygons).cuda().float() expected_output = torch.from_numpy(expected_output).cuda().float() assert torch.allclose( points_in_polygons(points, polygons), expected_output, 1e-3) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_psa_mask.py ================================================ import numpy as np import torch import torch.nn as nn class Loss(nn.Module): def __init__(self): super().__init__() def forward(self, input, target): input = input.view(-1) target = target.view(-1) return torch.mean(input - target) class TestPSAMask(object): def test_psa_mask_collect(self): if not torch.cuda.is_available(): return from mmcv.ops import PSAMask test_loss = Loss() input = np.fromfile( 'tests/data/for_psa_mask/psa_input.bin', dtype=np.float32) output_collect = np.fromfile( 'tests/data/for_psa_mask/psa_output_collect.bin', dtype=np.float32) input = input.reshape((4, 16, 8, 8)) output_collect = output_collect.reshape((4, 64, 8, 8)) label = torch.ones((4, 64, 8, 8)) input = torch.FloatTensor(input) input.requires_grad = True psamask_collect = PSAMask('collect', (4, 4)) # test collect cpu test_output = psamask_collect(input) loss = test_loss(test_output, label) loss.backward() test_output = test_output.detach().numpy() assert np.allclose(test_output, output_collect) assert test_output.shape == output_collect.shape psamask_collect.cuda() input = input.cuda() label = label.cuda() # test collect cuda test_output = psamask_collect(input) loss = test_loss(test_output, label) loss.backward() test_output = test_output.detach().cpu().numpy() assert np.allclose(test_output, output_collect) assert test_output.shape == output_collect.shape def test_psa_mask_distribute(self): if not torch.cuda.is_available(): return from mmcv.ops import PSAMask test_loss = Loss() input = np.fromfile( 'tests/data/for_psa_mask/psa_input.bin', dtype=np.float32) output_distribute = np.fromfile( 'tests/data/for_psa_mask/psa_output_distribute.bin', dtype=np.float32) input = input.reshape((4, 16, 8, 8)) output_distribute = output_distribute.reshape((4, 64, 8, 8)) label = torch.ones((4, 64, 8, 8)) input = torch.FloatTensor(input) input.requires_grad = True psamask_distribute = PSAMask('distribute', (4, 4)) # test distribute cpu test_output = psamask_distribute(input) loss = test_loss(test_output, label) loss.backward() test_output = test_output.detach().numpy() assert np.allclose(test_output, output_distribute) assert test_output.shape == output_distribute.shape psamask_distribute.cuda() input = input.cuda() label = label.cuda() # test distribute cuda test_output = psamask_distribute(input) loss = test_loss(test_output, label) loss.backward() test_output = test_output.detach().cpu().numpy() assert np.allclose(test_output, output_distribute) assert test_output.shape == output_distribute.shape ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_riroi_align_rotated.py ================================================ import numpy as np import pytest import torch from torch.autograd import gradcheck from mmcv.ops import RiRoIAlignRotated np_feature = np.array([[[[1, 2], [3, 4]], [[1, 2], [4, 3]], [[4, 3], [2, 1]], [[1, 2], [5, 6]], [[3, 4], [7, 8]], [[9, 10], [13, 14]], [[11, 12], [15, 16]], [[1, 1], [2, 2]]]]) np_rois = np.array([[0., 0.5, 0.5, 1., 1., np.pi / 3], [0., 1., 1., 3., 3., np.pi / 2]]) expect_output = np.array([[[[1.8425, 1.3516], [2.3151, 1.8241]], [[2.4779, 1.7416], [3.2173, 2.5632]], [[2.7149, 2.2638], [2.6540, 2.3673]], [[2.9461, 2.8638], [2.8028, 2.7205]], [[4.1943, 2.7214], [5.6119, 4.1391]], [[7.5276, 6.0547], [8.9453, 7.4724]], [[12.1943, 10.7214], [13.6119, 12.1391]], [[9.5489, 8.4237], [10.5763, 9.4511]]], [[[7.6562, 12.5625], [4.0000, 6.6250]], [[1.0000, 1.3125], [0.5000, 0.6562]], [[1.6562, 1.9375], [1.0000, 1.3125]], [[1.8438, 2.0547], [0.7500, 1.1562]], [[0.8438, 3.0625], [0.2500, 1.1875]], [[2.6562, 2.5625], [1.5000, 1.6250]], [[3.6562, 4.5625], [2.0000, 2.6250]], [[6.6562, 10.5625], [3.5000, 5.6250]]]]) expect_grad = np.array([[[[1.4727, 1.5586], [1.5586, 1.6602]], [[1.4727, 1.5586], [1.5586, 1.6602]], [[1.4727, 1.5586], [1.5586, 1.6602]], [[1.4727, 1.5586], [1.5586, 1.6602]], [[1.4727, 1.5586], [1.5586, 1.6602]], [[1.4727, 1.5586], [1.5586, 1.6602]], [[1.4727, 1.5586], [1.5586, 1.6602]], [[1.4727, 1.5586], [1.5586, 1.6602]]]]) pool_h = 2 pool_w = 2 spatial_scale = 1.0 num_samples = 2 sampling_ratio = 2 num_orientations = 8 clockwise = False @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_roialign_rotated_gradcheck(): x = torch.tensor( np_feature, dtype=torch.float, device='cuda', requires_grad=True) rois = torch.tensor(np_rois, dtype=torch.float, device='cuda') froipool = RiRoIAlignRotated((pool_h, pool_w), spatial_scale, num_samples, num_orientations, clockwise) gradcheck(froipool, (x, rois), eps=1e-3, atol=1e-3) @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_roialign_rotated_allclose(): x = torch.tensor( np_feature, dtype=torch.float, device='cuda', requires_grad=True) rois = torch.tensor(np_rois, dtype=torch.float, device='cuda') froipool = RiRoIAlignRotated((pool_h, pool_w), spatial_scale, num_samples, num_orientations, clockwise) output = froipool(x, rois) output.backward(torch.ones_like(output)) assert np.allclose( output.data.type(torch.float).cpu().numpy(), expect_output, atol=1e-3) assert np.allclose( x.grad.data.type(torch.float).cpu().numpy(), expect_grad, atol=1e-3) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_roi_align.py ================================================ import numpy as np import pytest import torch _USING_PARROTS = True try: from parrots.autograd import gradcheck except ImportError: from torch.autograd import gradcheck _USING_PARROTS = False # yapf:disable inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0., 0., 1., 1.]]), ([[[[1., 2.], [3., 4.]], [[4., 3.], [2., 1.]]]], [[0., 0., 0., 1., 1.]]), ([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.], [11., 12., 15., 16.]]]], [[0., 0., 0., 3., 3.]])] outputs = [([[[[1.0, 1.25], [1.5, 1.75]]]], [[[[3.0625, 0.4375], [0.4375, 0.0625]]]]), ([[[[1.0, 1.25], [1.5, 1.75]], [[4.0, 3.75], [3.5, 3.25]]]], [[[[3.0625, 0.4375], [0.4375, 0.0625]], [[3.0625, 0.4375], [0.4375, 0.0625]]]]), ([[[[1.9375, 4.75], [7.5625, 10.375]]]], [[[[0.47265625, 0.42968750, 0.42968750, 0.04296875], [0.42968750, 0.39062500, 0.39062500, 0.03906250], [0.42968750, 0.39062500, 0.39062500, 0.03906250], [0.04296875, 0.03906250, 0.03906250, 0.00390625]]]])] # yapf:enable pool_h = 2 pool_w = 2 spatial_scale = 1.0 sampling_ratio = 2 def _test_roialign_gradcheck(device, dtype): if not torch.cuda.is_available() and device == 'cuda': pytest.skip('test requires GPU') try: from mmcv.ops import RoIAlign except ModuleNotFoundError: pytest.skip('RoIAlign op is not successfully compiled') if dtype is torch.half: pytest.skip('grad check does not support fp16') for case in inputs: np_input = np.array(case[0]) np_rois = np.array(case[1]) x = torch.tensor( np_input, dtype=dtype, device=device, requires_grad=True) rois = torch.tensor(np_rois, dtype=dtype, device=device) froipool = RoIAlign((pool_h, pool_w), spatial_scale, sampling_ratio) if torch.__version__ == 'parrots': gradcheck( froipool, (x, rois), no_grads=[rois], delta=1e-5, pt_atol=1e-5) else: gradcheck(froipool, (x, rois), eps=1e-5, atol=1e-5) def _test_roialign_allclose(device, dtype): if not torch.cuda.is_available() and device == 'cuda': pytest.skip('test requires GPU') try: from mmcv.ops import roi_align except ModuleNotFoundError: pytest.skip('test requires compilation') pool_h = 2 pool_w = 2 spatial_scale = 1.0 sampling_ratio = 2 for case, output in zip(inputs, outputs): np_input = np.array(case[0]) np_rois = np.array(case[1]) np_output = np.array(output[0]) np_grad = np.array(output[1]) x = torch.tensor( np_input, dtype=dtype, device=device, requires_grad=True) rois = torch.tensor(np_rois, dtype=dtype, device=device) output = roi_align(x, rois, (pool_h, pool_w), spatial_scale, sampling_ratio, 'avg', True) output.backward(torch.ones_like(output)) assert np.allclose( output.data.type(torch.float).cpu().numpy(), np_output, atol=1e-3) assert np.allclose( x.grad.data.type(torch.float).cpu().numpy(), np_grad, atol=1e-3) @pytest.mark.parametrize('device', ['cuda', 'cpu']) @pytest.mark.parametrize('dtype', [torch.float, torch.double, torch.half]) def test_roialign(device, dtype): # check double only if dtype is torch.double: _test_roialign_gradcheck(device=device, dtype=dtype) _test_roialign_allclose(device=device, dtype=dtype) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_roi_align_rotated.py ================================================ import numpy as np import pytest import torch _USING_PARROTS = True try: from parrots.autograd import gradcheck except ImportError: from torch.autograd import gradcheck _USING_PARROTS = False # yapf:disable inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0.5, 0.5, 1., 1., 0]]), ([[[[1., 2.], [3., 4.]]]], [[0., 0.5, 0.5, 1., 1., np.pi / 2]]), ([[[[1., 2.], [3., 4.]], [[4., 3.], [2., 1.]]]], [[0., 0.5, 0.5, 1., 1., 0]]), ([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.], [11., 12., 15., 16.]]]], [[0., 1.5, 1.5, 3., 3., 0]]), ([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.], [11., 12., 15., 16.]]]], [[0., 1.5, 1.5, 3., 3., np.pi / 2]])] outputs = [([[[[1.0, 1.25], [1.5, 1.75]]]], [[[[3.0625, 0.4375], [0.4375, 0.0625]]]]), ([[[[1.5, 1], [1.75, 1.25]]]], [[[[3.0625, 0.4375], [0.4375, 0.0625]]]]), ([[[[1.0, 1.25], [1.5, 1.75]], [[4.0, 3.75], [3.5, 3.25]]]], [[[[3.0625, 0.4375], [0.4375, 0.0625]], [[3.0625, 0.4375], [0.4375, 0.0625]]]]), ([[[[1.9375, 4.75], [7.5625, 10.375]]]], [[[[0.47265625, 0.42968750, 0.42968750, 0.04296875], [0.42968750, 0.39062500, 0.39062500, 0.03906250], [0.42968750, 0.39062500, 0.39062500, 0.03906250], [0.04296875, 0.03906250, 0.03906250, 0.00390625]]]]), ([[[[7.5625, 1.9375], [10.375, 4.75]]]], [[[[0.47265625, 0.42968750, 0.42968750, 0.04296875], [0.42968750, 0.39062500, 0.39062500, 0.03906250], [0.42968750, 0.39062500, 0.39062500, 0.03906250], [0.04296875, 0.03906250, 0.03906250, 0.00390625]]]])] # yapf:enable pool_h = 2 pool_w = 2 spatial_scale = 1.0 sampling_ratio = 2 def _test_roialign_rotated_gradcheck(device, dtype): if not torch.cuda.is_available() and device == 'cuda': pytest.skip('unittest does not support GPU yet.') try: from mmcv.ops import RoIAlignRotated except ModuleNotFoundError: pytest.skip('RoIAlignRotated op is not successfully compiled') if dtype is torch.half: pytest.skip('grad check does not support fp16') for case in inputs: np_input = np.array(case[0]) np_rois = np.array(case[1]) x = torch.tensor( np_input, dtype=dtype, device=device, requires_grad=True) rois = torch.tensor(np_rois, dtype=dtype, device=device) froipool = RoIAlignRotated((pool_h, pool_w), spatial_scale, sampling_ratio) if torch.__version__ == 'parrots': gradcheck( froipool, (x, rois), no_grads=[rois], delta=1e-5, pt_atol=1e-5) else: gradcheck(froipool, (x, rois), eps=1e-5, atol=1e-5) def _test_roialign_rotated_allclose(device, dtype): if not torch.cuda.is_available() and device == 'cuda': pytest.skip('unittest does not support GPU yet.') try: from mmcv.ops import roi_align_rotated except ModuleNotFoundError: pytest.skip('test requires compilation') pool_h = 2 pool_w = 2 spatial_scale = 1.0 sampling_ratio = 2 for case, output in zip(inputs, outputs): np_input = np.array(case[0]) np_rois = np.array(case[1]) np_output = np.array(output[0]) np_grad = np.array(output[1]) x = torch.tensor( np_input, dtype=dtype, device=device, requires_grad=True) rois = torch.tensor(np_rois, dtype=dtype, device=device) output = roi_align_rotated(x, rois, (pool_h, pool_w), spatial_scale, sampling_ratio, True) output.backward(torch.ones_like(output)) assert np.allclose( output.data.type(torch.float).cpu().numpy(), np_output, atol=1e-3) assert np.allclose( x.grad.data.type(torch.float).cpu().numpy(), np_grad, atol=1e-3) @pytest.mark.parametrize('device', ['cuda', 'cpu']) @pytest.mark.parametrize('dtype', [torch.float, torch.double, torch.half]) def test_roialign_rotated(device, dtype): # check double only if (dtype is torch.double): _test_roialign_rotated_gradcheck(device=device, dtype=dtype) _test_roialign_rotated_allclose(device=device, dtype=dtype) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_roi_pool.py ================================================ import os import numpy as np import torch _USING_PARROTS = True try: from parrots.autograd import gradcheck except ImportError: from torch.autograd import gradcheck _USING_PARROTS = False cur_dir = os.path.dirname(os.path.abspath(__file__)) inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0., 0., 1., 1.]]), ([[[[1., 2.], [3., 4.]], [[4., 3.], [2., 1.]]]], [[0., 0., 0., 1., 1.]]), ([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.], [11., 12., 15., 16.]]]], [[0., 0., 0., 3., 3.]])] outputs = [([[[[1., 2.], [3., 4.]]]], [[[[1., 1.], [1., 1.]]]]), ([[[[1., 2.], [3., 4.]], [[4., 3.], [2., 1.]]]], [[[[1., 1.], [1., 1.]], [[1., 1.], [1., 1.]]]]), ([[[[4., 8.], [12., 16.]]]], [[[[0., 0., 0., 0.], [0., 1., 0., 1.], [0., 0., 0., 0.], [0., 1., 0., 1.]]]])] class TestRoiPool(object): def test_roipool_gradcheck(self): if not torch.cuda.is_available(): return from mmcv.ops import RoIPool pool_h = 2 pool_w = 2 spatial_scale = 1.0 for case in inputs: np_input = np.array(case[0]) np_rois = np.array(case[1]) x = torch.tensor(np_input, device='cuda', requires_grad=True) rois = torch.tensor(np_rois, device='cuda') froipool = RoIPool((pool_h, pool_w), spatial_scale) if _USING_PARROTS: pass # gradcheck(froipool, (x, rois), no_grads=[rois]) else: gradcheck(froipool, (x, rois), eps=1e-2, atol=1e-2) def _test_roipool_allclose(self, dtype=torch.float): if not torch.cuda.is_available(): return from mmcv.ops import roi_pool pool_h = 2 pool_w = 2 spatial_scale = 1.0 for case, output in zip(inputs, outputs): np_input = np.array(case[0]) np_rois = np.array(case[1]) np_output = np.array(output[0]) np_grad = np.array(output[1]) x = torch.tensor( np_input, dtype=dtype, device='cuda', requires_grad=True) rois = torch.tensor(np_rois, dtype=dtype, device='cuda') output = roi_pool(x, rois, (pool_h, pool_w), spatial_scale) output.backward(torch.ones_like(output)) assert np.allclose(output.data.cpu().numpy(), np_output, 1e-3) assert np.allclose(x.grad.data.cpu().numpy(), np_grad, 1e-3) def test_roipool_allclose(self): self._test_roipool_allclose(torch.double) self._test_roipool_allclose(torch.float) self._test_roipool_allclose(torch.half) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_roiaware_pool3d.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import numpy as np import pytest import torch from mmcv.ops import (RoIAwarePool3d, points_in_boxes_all, points_in_boxes_cpu, points_in_boxes_part) @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_RoIAwarePool3d(): roiaware_pool3d_max = RoIAwarePool3d( out_size=4, max_pts_per_voxel=128, mode='max') roiaware_pool3d_avg = RoIAwarePool3d( out_size=4, max_pts_per_voxel=128, mode='avg') rois = torch.tensor( [[1.0, 2.0, 3.0, 5.0, 4.0, 6.0, -0.3 - np.pi / 2], [-10.0, 23.0, 16.0, 20.0, 10.0, 20.0, -0.5 - np.pi / 2]], dtype=torch.float32).cuda( ) # boxes (m, 7) with bottom center in lidar coordinate pts = torch.tensor( [[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6], [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3], [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [-16, -18, 9], [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]], dtype=torch.float32).cuda() # points (n, 3) in lidar coordinate pts_feature = pts.clone() pooled_features_max = roiaware_pool3d_max( rois=rois, pts=pts, pts_feature=pts_feature) assert pooled_features_max.shape == torch.Size([2, 4, 4, 4, 3]) assert torch.allclose(pooled_features_max.sum(), torch.tensor(51.100).cuda(), 1e-3) pooled_features_avg = roiaware_pool3d_avg( rois=rois, pts=pts, pts_feature=pts_feature) assert pooled_features_avg.shape == torch.Size([2, 4, 4, 4, 3]) assert torch.allclose(pooled_features_avg.sum(), torch.tensor(49.750).cuda(), 1e-3) @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_points_in_boxes_part(): boxes = torch.tensor( [[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.3]], [[-10.0, 23.0, 16.0, 10, 20, 20, 0.5]]], dtype=torch.float32).cuda( ) # boxes (b, t, 7) with bottom center in lidar coordinate pts = torch.tensor( [[[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6], [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3], [4.7, 3.5, -12.2]], [[3.8, 7.6, -2], [-10.6, -12.9, -20], [-16, -18, 9], [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4], [6, 4, 9]]], dtype=torch.float32).cuda() # points (b, m, 3) in lidar coordinate point_indices = points_in_boxes_part(points=pts, boxes=boxes) expected_point_indices = torch.tensor( [[0, 0, 0, 0, 0, -1, -1, -1], [-1, -1, -1, -1, -1, -1, -1, -1]], dtype=torch.int32).cuda() assert point_indices.shape == torch.Size([2, 8]) assert (point_indices == expected_point_indices).all() boxes = torch.tensor([[[0.0, 0.0, 0.0, 1.0, 20.0, 1.0, 0.523598]]], dtype=torch.float32).cuda() # 30 degrees pts = torch.tensor( [[[4, 6.928, 0], [6.928, 4, 0], [4, -6.928, 0], [6.928, -4, 0], [-4, 6.928, 0], [-6.928, 4, 0], [-4, -6.928, 0], [-6.928, -4, 0]]], dtype=torch.float32).cuda() point_indices = points_in_boxes_part(points=pts, boxes=boxes) expected_point_indices = torch.tensor([[-1, -1, 0, -1, 0, -1, -1, -1]], dtype=torch.int32).cuda() assert (point_indices == expected_point_indices).all() def test_points_in_boxes_cpu(): boxes = torch.tensor( [[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.3], [-10.0, 23.0, 16.0, 10, 20, 20, 0.5]]], dtype=torch.float32 ) # boxes (m, 7) with bottom center in lidar coordinate pts = torch.tensor( [[[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6], [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3], [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [ -16, -18, 9 ], [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]]], dtype=torch.float32) # points (n, 3) in lidar coordinate point_indices = points_in_boxes_cpu(points=pts, boxes=boxes) expected_point_indices = torch.tensor( [[[1, 0], [1, 0], [1, 0], [1, 0], [1, 0], [0, 1], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]], dtype=torch.int32) assert point_indices.shape == torch.Size([1, 15, 2]) assert (point_indices == expected_point_indices).all() boxes = torch.tensor([[[0.0, 0.0, 0.0, 1.0, 20.0, 1.0, 0.523598]]], dtype=torch.float32) # 30 degrees pts = torch.tensor( [[[4, 6.928, 0], [6.928, 4, 0], [4, -6.928, 0], [6.928, -4, 0], [-4, 6.928, 0], [-6.928, 4, 0], [-4, -6.928, 0], [-6.928, -4, 0]]], dtype=torch.float32) point_indices = points_in_boxes_cpu(points=pts, boxes=boxes) expected_point_indices = torch.tensor( [[[0], [0], [1], [0], [1], [0], [0], [0]]], dtype=torch.int32) assert (point_indices == expected_point_indices).all() @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_points_in_boxes_all(): boxes = torch.tensor( [[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.3], [-10.0, 23.0, 16.0, 10, 20, 20, 0.5]]], dtype=torch.float32).cuda( ) # boxes (m, 7) with bottom center in lidar coordinate pts = torch.tensor( [[[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6], [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3], [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [ -16, -18, 9 ], [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]]], dtype=torch.float32).cuda() # points (n, 3) in lidar coordinate point_indices = points_in_boxes_all(points=pts, boxes=boxes) expected_point_indices = torch.tensor( [[[1, 0], [1, 0], [1, 0], [1, 0], [1, 0], [0, 1], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]], dtype=torch.int32).cuda() assert point_indices.shape == torch.Size([1, 15, 2]) assert (point_indices == expected_point_indices).all() ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_roipoint_pool3d.py ================================================ import pytest import torch from mmcv.ops import RoIPointPool3d @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_gather_points(): feats = torch.tensor( [[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6], [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3], [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [-16, -18, 9], [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]], dtype=torch.float32).unsqueeze(0).cuda() points = feats.clone() rois = torch.tensor([[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.3], [-10.0, 23.0, 16.0, 10, 20, 20, 0.5]]], dtype=torch.float32).cuda() roipoint_pool3d = RoIPointPool3d(num_sampled_points=4) roi_feat, empty_flag = roipoint_pool3d(feats, points, rois) expected_roi_feat = torch.tensor([[[[1, 2, 3.3, 1, 2, 3.3], [1.2, 2.5, 3, 1.2, 2.5, 3], [0.8, 2.1, 3.5, 0.8, 2.1, 3.5], [1.6, 2.6, 3.6, 1.6, 2.6, 3.6]], [[-9.2, 21, 18.2, -9.2, 21, 18.2], [-9.2, 21, 18.2, -9.2, 21, 18.2], [-9.2, 21, 18.2, -9.2, 21, 18.2], [-9.2, 21, 18.2, -9.2, 21, 18.2]]]]).cuda() expected_empty_flag = torch.tensor([[0, 0]]).int().cuda() assert torch.allclose(roi_feat, expected_roi_feat) assert torch.allclose(empty_flag, expected_empty_flag) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_rotated_feature_align.py ================================================ import pytest import torch from mmcv.ops import rotated_feature_align @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_rotated_feature_align(): feature = torch.tensor([[[[1.2924, -0.2172, -0.5222, 0.1172], [0.9144, 1.2248, 1.3115, -0.9690], [-0.8949, -1.1797, -0.9093, -0.3961], [-0.4586, 0.5062, -0.7947, -0.7397]], [[-1.0943, -0.7495, 1.3461, -1.1652], [0.2034, 0.6763, -1.2357, 0.5231], [-1.0062, 1.2592, 1.4225, -0.3951], [-0.1242, -1.6240, 0.1932, 2.7181]], [[-1.6271, -1.0276, 0.0578, -0.2997], [-0.9684, -1.6946, -1.3188, -1.1938], [-1.6744, -0.8917, -0.6556, 1.0073], [-0.1205, 0.3671, -0.3731, -0.5347]]], [[[0.7035, 0.2089, -0.1774, 3.4670], [-0.8505, -0.9278, 1.4714, 0.1644], [0.0898, 0.3531, -0.4007, 0.1927], [1.2569, -0.2636, -0.5223, 0.0616]], [[0.1760, -0.7639, -0.4600, -1.3260], [-0.9921, -0.2970, -0.8955, 1.0508], [1.3515, -0.1641, 1.9679, 1.1986], [-0.3616, 0.6287, 0.4933, 0.3360]], [[-0.5860, 0.2124, -0.8700, 2.4200], [-0.0551, -1.5103, -1.6779, 0.8399], [0.8431, 1.2414, -1.1243, -0.3887], [-2.1254, 0.6047, -0.3515, 0.7254]]]], device='cuda', requires_grad=True) bbox = torch.tensor( [[[[1.3080e+01, 1.2688e+01, 1.1214e+01, 9.3944e+01, -9.1905e-01], [3.8104e+01, 1.0134e+01, 1.4659e+02, 9.0306e+01, -9.8211e-01], [-5.3213e+01, 4.9508e+01, 5.1513e+01, 3.2055e+01, -3.1954e-01], [2.6974e+01, 2.5248e+01, 5.4495e+01, 3.1083e+00, -6.2127e-01]], [[-1.5604e+01, -5.1908e+01, 2.3998e+02, 1.5008e+01, -1.2546e+00], [3.1354e+01, -7.3635e+00, 6.7879e+01, 3.5081e+01, -3.3851e-01], [-5.3292e+00, 9.1946e+00, 1.2834e+01, 1.0485e+01, -1.3039e+00], [-2.3925e+01, 3.6623e+01, 3.9875e+01, 7.2009e+01, -6.5934e-01]], [[7.2114e+01, -2.3781e+01, 2.9106e+01, 8.4501e+01, -1.1340e+00], [2.6258e+01, -7.7034e+00, 1.7629e+02, 1.0615e+02, -1.2156e+00], [3.8057e+01, 4.6016e+01, 1.2965e+01, 6.9384e+00, -1.0855e+00], [2.4428e+01, -1.6189e+01, 2.0572e+02, 3.1622e+01, -1.5719e-01]], [[3.8226e+00, 2.9608e+01, 1.4457e+01, 6.8179e+01, -9.1997e-01], [2.5003e+01, -4.2490e+01, 9.6007e+01, 4.9086e+01, -1.4786e+00], [8.5983e+01, 5.4980e+01, 7.8080e+01, 1.0003e+02, -1.0926e+00], [9.9065e+00, 4.1457e+01, 5.9799e+00, 1.7973e+01, -5.6313e-01]]], [[[-1.8244e+01, 4.6309e+00, 5.3010e+01, 2.4310e+01, -7.0345e-01], [1.9419e+01, 3.6704e+01, 5.2390e+01, 5.4133e+01, -3.7730e-01], [5.6387e+01, 2.3752e+01, 9.0441e+00, 1.7792e+01, -1.5583e+00], [3.6303e+01, 1.6396e+01, 2.0283e+01, 1.9148e+01, -8.3419e-01]], [[3.2169e+01, 3.0521e+01, 2.6283e+01, 1.9680e+02, -3.0454e-01], [2.5788e+01, -3.2189e+01, 8.8882e+01, 1.0207e+02, -1.5328e+00], [8.4676e+00, -1.6668e+01, 2.4657e+01, 1.1275e+02, -4.0388e-01], [-1.0799e+01, 6.0422e+00, 9.5807e+00, 3.3677e+01, -3.5438e-01]], [[6.9363e+01, 1.0850e+01, 2.5968e+01, 2.2311e+01, -1.6408e-01], [2.8140e+00, 4.6843e+00, 3.1289e+00, 2.1480e+01, -6.7583e-01], [2.6661e+01, 4.5290e+01, 6.1679e+00, 3.0005e+01, -8.9806e-01], [5.0871e+00, 1.3234e+01, 9.2087e+01, 4.9622e+01, -2.8020e-01]], [[-1.2643e+01, 2.5176e+01, 5.0488e+01, 5.4246e+01, -4.4840e-01], [-3.4521e+01, 9.8435e-01, 5.2413e+01, 9.7996e+00, -8.4218e-01], [4.9829e+01, -1.0808e+01, 2.9848e+01, 7.3579e+01, -6.2672e-01], [8.0446e+01, 2.8064e+01, 4.5273e+01, 5.3809e+01, -1.2359e+00]]]], device='cuda', requires_grad=True) expected_output = torch.tensor([[[[1.1095, -0.2172, -0.5222, -0.6225], [0.9144, 0.7662, 1.0487, -0.9690], [-0.8949, -1.6384, -0.9093, -0.3961], [-0.8604, 0.5062, -0.7947, -0.7397]], [[-0.3961, -0.7495, 1.3461, 1.5528], [0.2034, 0.5522, -1.6722, 0.5231], [-1.0062, 1.1350, 1.4225, -0.3951], [-0.4826, -1.6240, 0.1932, 2.7181]], [[-2.6436, -1.0276, 0.0578, -0.8344], [-0.9684, -1.8151, -2.1843, -1.1938], [-1.6744, -1.0121, -0.6556, 1.0073], [-0.8474, 0.3671, -0.3731, -0.5347]]], [[[0.7035, 0.2089, -0.1774, 3.4670], [-0.8505, -0.9278, 1.4714, 0.1644], [0.0898, 0.3064, -0.4007, 0.5849], [1.2569, -0.2636, -0.5223, 0.0616]], [[0.1760, -0.7639, -0.4600, -1.3260], [-0.9921, -0.2970, -0.8955, 1.0508], [1.3515, -0.6125, 1.9679, 0.5550], [-0.3616, 0.6287, 0.4933, 0.3360]], [[-0.5860, 0.2124, -0.8700, 2.4200], [-0.0551, -1.5103, -1.6779, 0.8399], [0.8431, 0.8455, -1.1243, -1.5994], [-2.1254, 0.6047, -0.3515, 0.7254]]]]).cuda() expected_grad = torch.tensor([[[[1.0000, 1.8507, 1.1493, 1.5222], [1.0000, 1.1511, 1.2139, 1.4778], [1.0000, 1.2629, 1.3721, 1.0000], [3.0000, 1.0000, 1.0000, 2.0000]], [[1.0000, 1.8507, 1.1493, 1.5222], [1.0000, 1.1511, 1.2139, 1.4778], [1.0000, 1.2629, 1.3721, 1.0000], [3.0000, 1.0000, 1.0000, 2.0000]], [[1.0000, 1.8507, 1.1493, 1.5222], [1.0000, 1.1511, 1.2139, 1.4778], [1.0000, 1.2629, 1.3721, 1.0000], [3.0000, 1.0000, 1.0000, 2.0000]]], [[[1.2687, 1.5055, 1.2382, 1.0000], [1.1458, 1.4258, 1.4160, 1.0000], [1.0000, 1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000, 1.0000]], [[1.2687, 1.5055, 1.2382, 1.0000], [1.1458, 1.4258, 1.4160, 1.0000], [1.0000, 1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000, 1.0000]], [[1.2687, 1.5055, 1.2382, 1.0000], [1.1458, 1.4258, 1.4160, 1.0000], [1.0000, 1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000, 1.0000]]]]).cuda() output = rotated_feature_align( feature, bbox, spatial_scale=1 / 8, points=1) output.backward(torch.ones_like(output)) assert torch.allclose(output, expected_output, 1e-2) assert torch.allclose(feature.grad, expected_grad, 1e-2) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_saconv.py ================================================ import torch import torch.nn as nn from mmcv.ops import SAConv2d def test_sacconv(): # test with normal cast x = torch.rand(1, 3, 256, 256) saconv = SAConv2d(3, 5, kernel_size=3, padding=1) sac_out = saconv(x) refer_conv = nn.Conv2d(3, 5, kernel_size=3, padding=1) refer_out = refer_conv(x) assert sac_out.shape == refer_out.shape # test with dilation >= 2 dalited_saconv = SAConv2d(3, 5, kernel_size=3, padding=2, dilation=2) dalited_sac_out = dalited_saconv(x) refer_conv = nn.Conv2d(3, 5, kernel_size=3, padding=2, dilation=2) refer_out = refer_conv(x) assert dalited_sac_out.shape == refer_out.shape # test with deform deform_saconv = SAConv2d(3, 5, kernel_size=3, padding=1, use_deform=True) if torch.cuda.is_available(): x = torch.rand(1, 3, 256, 256).cuda() deform_saconv = SAConv2d( 3, 5, kernel_size=3, padding=1, use_deform=True).cuda() deform_sac_out = deform_saconv(x).cuda() refer_conv = nn.Conv2d(3, 5, kernel_size=3, padding=1).cuda() refer_out = refer_conv(x) assert deform_sac_out.shape == refer_out.shape else: deform_sac_out = deform_saconv(x) refer_conv = nn.Conv2d(3, 5, kernel_size=3, padding=1) refer_out = refer_conv(x) assert deform_sac_out.shape == refer_out.shape # test with groups >= 2 x = torch.rand(1, 4, 256, 256) group_saconv = SAConv2d(4, 4, kernel_size=3, padding=1, groups=2) group_sac_out = group_saconv(x) refer_conv = nn.Conv2d(4, 4, kernel_size=3, padding=1, groups=2) refer_out = refer_conv(x) assert group_sac_out.shape == refer_out.shape ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_scatter_points.py ================================================ import pytest import torch from torch.autograd import gradcheck from mmcv.ops import DynamicScatter @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_dynamic_scatter(): feats = torch.rand( size=(200000, 3), dtype=torch.float32, device='cuda') * 100 - 50 coors = torch.randint( low=-1, high=20, size=(200000, 3), dtype=torch.int32, device='cuda') dsmean = DynamicScatter([0.32, 0.32, 6], [-74.88, -74.88, -2, 74.88, 74.88, 4], True) dsmax = DynamicScatter([0.32, 0.32, 6], [-74.88, -74.88, -2, 74.88, 74.88, 4], False) # test empty input empty_feats = torch.empty(size=(0, 3), dtype=torch.float32, device='cuda') empty_coors = torch.empty(size=(0, 3), dtype=torch.int32, device='cuda') empty_feats.requires_grad_() empty_feats_out_mean, empty_coors_out_mean = dsmean( empty_feats, empty_coors) empty_feats_out_mean.sum().backward() empty_feats_out_max, empty_coors_out_max = dsmax(empty_feats, empty_coors) empty_feats_out_max.sum().backward() assert empty_feats_out_mean.shape == empty_feats.shape assert empty_feats_out_max.shape == empty_feats.shape assert empty_coors_out_mean.shape == empty_coors.shape assert empty_coors_out_max.shape == empty_coors.shape # test empty reduced output empty_o_feats = torch.rand( size=(200000, 3), dtype=torch.float32, device='cuda') * 100 - 50 empty_o_coors = torch.randint( low=-1, high=0, size=(200000, 3), dtype=torch.int32, device='cuda') empty_o_feats.requires_grad_() empty_o_feats_out_mean, empty_o_coors_out_mean = dsmean( empty_o_feats, empty_o_coors) empty_o_feats_out_mean.sum().backward() assert (empty_o_feats.grad == 0).all() empty_o_feats_out_max, empty_o_coors_out_max = dsmax( empty_o_feats, empty_o_coors) empty_o_feats_out_max.sum().backward() assert (empty_o_feats.grad == 0).all() # test non-empty input ref_voxel_coors = coors.unique(dim=0, sorted=True) ref_voxel_coors = ref_voxel_coors[ref_voxel_coors.min(dim=-1).values >= 0] ref_voxel_feats_mean = [] ref_voxel_feats_max = [] for ref_voxel_coor in ref_voxel_coors: voxel_mask = (coors == ref_voxel_coor).all(dim=-1) ref_voxel_feats_mean.append(feats[voxel_mask].mean(dim=0)) ref_voxel_feats_max.append(feats[voxel_mask].max(dim=0).values) ref_voxel_feats_mean = torch.stack(ref_voxel_feats_mean) ref_voxel_feats_max = torch.stack(ref_voxel_feats_max) feats_out_mean, coors_out_mean = dsmean(feats, coors) seq_mean = (coors_out_mean[:, 0] * 400 + coors_out_mean[:, 1] * 20 + coors_out_mean[:, 2]).argsort() feats_out_mean = feats_out_mean[seq_mean] coors_out_mean = coors_out_mean[seq_mean] feats_out_max, coors_out_max = dsmax(feats, coors) seq_max = (coors_out_max[:, 0] * 400 + coors_out_max[:, 1] * 20 + coors_out_max[:, 2]).argsort() feats_out_max = feats_out_max[seq_max] coors_cout_max = coors_out_max[seq_max] assert (coors_out_mean == ref_voxel_coors).all() assert torch.allclose( feats_out_mean, ref_voxel_feats_mean, atol=1e-2, rtol=1e-5) assert (coors_cout_max == ref_voxel_coors).all() assert torch.allclose( feats_out_max, ref_voxel_feats_max, atol=1e-2, rtol=1e-5) # test grad # feats = torch.rand( size=(100, 4), dtype=torch.float32, device='cuda') * 100 - 50 coors = torch.randint( low=-1, high=3, size=(100, 3), dtype=torch.int32, device='cuda') feats.requires_grad_() gradcheck(dsmean, (feats, coors), eps=1e-2, atol=1e-2, rtol=1e-5) gradcheck(dsmax, (feats, coors), eps=1e-2, atol=1e-2, rtol=1e-5) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_syncbn.py ================================================ import os import platform import numpy as np import pytest import torch import torch.distributed as dist import torch.nn as nn if platform.system() == 'Windows': import regex as re else: import re class TestSyncBN(object): def dist_init(self): rank = int(os.environ['SLURM_PROCID']) world_size = int(os.environ['SLURM_NTASKS']) local_rank = int(os.environ['SLURM_LOCALID']) node_list = str(os.environ['SLURM_NODELIST']) node_parts = re.findall('[0-9]+', node_list) os.environ['MASTER_ADDR'] = (f'{node_parts[1]}.{node_parts[2]}' + f'.{node_parts[3]}.{node_parts[4]}') os.environ['MASTER_PORT'] = '12341' os.environ['WORLD_SIZE'] = str(world_size) os.environ['RANK'] = str(rank) dist.init_process_group('nccl') torch.cuda.set_device(local_rank) def _test_syncbn_train(self, size=1, half=False): if 'SLURM_NTASKS' not in os.environ or int( os.environ['SLURM_NTASKS']) != 4: print('must run with slurm has 4 processes!\n' 'srun -p test --gres=gpu:4 -n4') return else: print('Running syncbn test') from mmcv.ops import SyncBatchNorm assert size in (1, 2, 4) if not dist.is_initialized(): self.dist_init() rank = dist.get_rank() torch.manual_seed(9) torch.cuda.manual_seed(9) self.x = torch.rand(16, 3, 2, 3).cuda() self.y_bp = torch.rand(16, 3, 2, 3).cuda() if half: self.x = self.x.half() self.y_bp = self.y_bp.half() dist.broadcast(self.x, src=0) dist.broadcast(self.y_bp, src=0) torch.cuda.synchronize() if size == 1: groups = [None, None, None, None] groups[0] = dist.new_group([0]) groups[1] = dist.new_group([1]) groups[2] = dist.new_group([2]) groups[3] = dist.new_group([3]) group = groups[rank] elif size == 2: groups = [None, None, None, None] groups[0] = groups[1] = dist.new_group([0, 1]) groups[2] = groups[3] = dist.new_group([2, 3]) group = groups[rank] elif size == 4: group = dist.group.WORLD syncbn = SyncBatchNorm(3, group=group).cuda() syncbn.weight.data[0] = 0.2 syncbn.weight.data[1] = 0.5 syncbn.weight.data[2] = 0.7 syncbn.train() bn = nn.BatchNorm2d(3).cuda() bn.weight.data[0] = 0.2 bn.weight.data[1] = 0.5 bn.weight.data[2] = 0.7 bn.train() sx = self.x[rank * 4:rank * 4 + 4] sx.requires_grad_() sy = syncbn(sx) sy.backward(self.y_bp[rank * 4:rank * 4 + 4]) smean = syncbn.running_mean svar = syncbn.running_var sx_grad = sx.grad sw_grad = syncbn.weight.grad sb_grad = syncbn.bias.grad if size == 1: x = self.x[rank * 4:rank * 4 + 4] y_bp = self.y_bp[rank * 4:rank * 4 + 4] elif size == 2: x = self.x[rank // 2 * 8:rank // 2 * 8 + 8] y_bp = self.y_bp[rank // 2 * 8:rank // 2 * 8 + 8] elif size == 4: x = self.x y_bp = self.y_bp x.requires_grad_() y = bn(x) y.backward(y_bp) if size == 2: y = y[rank % 2 * 4:rank % 2 * 4 + 4] elif size == 4: y = y[rank * 4:rank * 4 + 4] mean = bn.running_mean var = bn.running_var if size == 1: x_grad = x.grad w_grad = bn.weight.grad b_grad = bn.bias.grad elif size == 2: x_grad = x.grad[rank % 2 * 4:rank % 2 * 4 + 4] w_grad = bn.weight.grad / 2 b_grad = bn.bias.grad / 2 elif size == 4: x_grad = x.grad[rank * 4:rank * 4 + 4] w_grad = bn.weight.grad / 4 b_grad = bn.bias.grad / 4 assert np.allclose(mean.data.cpu().numpy(), smean.data.cpu().numpy(), 1e-3) assert np.allclose(var.data.cpu().numpy(), svar.data.cpu().numpy(), 1e-3) assert np.allclose(y.data.cpu().numpy(), sy.data.cpu().numpy(), 1e-3) assert np.allclose(w_grad.data.cpu().numpy(), sw_grad.data.cpu().numpy(), 1e-3) assert np.allclose(b_grad.data.cpu().numpy(), sb_grad.data.cpu().numpy(), 1e-3) assert np.allclose(x_grad.data.cpu().numpy(), sx_grad.data.cpu().numpy(), 1e-2) def _test_syncbn_empty_train(self, size=1, half=False): if 'SLURM_NTASKS' not in os.environ or int( os.environ['SLURM_NTASKS']) != 4: print('must run with slurm has 4 processes!\n' 'srun -p test --gres=gpu:4 -n4') return else: print('Running syncbn test') from mmcv.ops import SyncBatchNorm assert size in (1, 2, 4) if not dist.is_initialized(): self.dist_init() rank = dist.get_rank() torch.manual_seed(9) torch.cuda.manual_seed(9) self.x = torch.rand(0, 3, 2, 3).cuda() self.y_bp = torch.rand(0, 3, 2, 3).cuda() if half: self.x = self.x.half() self.y_bp = self.y_bp.half() dist.broadcast(self.x, src=0) dist.broadcast(self.y_bp, src=0) torch.cuda.synchronize() if size == 1: groups = [None, None, None, None] groups[0] = dist.new_group([0]) groups[1] = dist.new_group([1]) groups[2] = dist.new_group([2]) groups[3] = dist.new_group([3]) group = groups[rank] elif size == 2: groups = [None, None, None, None] groups[0] = groups[1] = dist.new_group([0, 1]) groups[2] = groups[3] = dist.new_group([2, 3]) group = groups[rank] elif size == 4: group = dist.group.WORLD syncbn = SyncBatchNorm(3, group=group, stats_mode='N').cuda() syncbn.weight.data[0] = 0.2 syncbn.weight.data[1] = 0.5 syncbn.weight.data[2] = 0.7 syncbn.train() bn = nn.BatchNorm2d(3).cuda() bn.weight.data[0] = 0.2 bn.weight.data[1] = 0.5 bn.weight.data[2] = 0.7 bn.train() sx = self.x[rank * 4:rank * 4 + 4] sx.requires_grad_() sy = syncbn(sx) sy.backward(self.y_bp[rank * 4:rank * 4 + 4]) smean = syncbn.running_mean svar = syncbn.running_var sx_grad = sx.grad sw_grad = syncbn.weight.grad sb_grad = syncbn.bias.grad if size == 1: x = self.x[rank * 4:rank * 4 + 4] y_bp = self.y_bp[rank * 4:rank * 4 + 4] elif size == 2: x = self.x[rank // 2 * 8:rank // 2 * 8 + 8] y_bp = self.y_bp[rank // 2 * 8:rank // 2 * 8 + 8] elif size == 4: x = self.x y_bp = self.y_bp x.requires_grad_() y = bn(x) y.backward(y_bp) if size == 2: y = y[rank % 2 * 4:rank % 2 * 4 + 4] elif size == 4: y = y[rank * 4:rank * 4 + 4] mean = bn.running_mean var = bn.running_var if size == 1: x_grad = x.grad w_grad = bn.weight.grad b_grad = bn.bias.grad elif size == 2: x_grad = x.grad[rank % 2 * 4:rank % 2 * 4 + 4] w_grad = bn.weight.grad / 2 b_grad = bn.bias.grad / 2 elif size == 4: x_grad = x.grad[rank * 4:rank * 4 + 4] w_grad = bn.weight.grad / 4 b_grad = bn.bias.grad / 4 assert np.allclose(mean.data.cpu().numpy(), smean.data.cpu().numpy(), 1e-3) assert np.allclose(var.data.cpu().numpy(), svar.data.cpu().numpy(), 1e-3) assert np.allclose(y.data.cpu().numpy(), sy.data.cpu().numpy(), 1e-3) assert np.allclose(w_grad.data.cpu().numpy(), sw_grad.data.cpu().numpy(), 1e-3) assert np.allclose(b_grad.data.cpu().numpy(), sb_grad.data.cpu().numpy(), 1e-3) assert np.allclose(x_grad.data.cpu().numpy(), sx_grad.data.cpu().numpy(), 1e-2) # 'stats_mode' only allows 'default' and 'N' with pytest.raises(AssertionError): SyncBatchNorm(3, group=group, stats_mode='X') def test_syncbn_1(self): self._test_syncbn_train(size=1) def test_syncbn_2(self): self._test_syncbn_train(size=2) def test_syncbn_4(self): self._test_syncbn_train(size=4) def test_syncbn_1_half(self): self._test_syncbn_train(size=1, half=True) def test_syncbn_2_half(self): self._test_syncbn_train(size=2, half=True) def test_syncbn_4_half(self): self._test_syncbn_train(size=4, half=True) def test_syncbn_empty_1(self): self._test_syncbn_empty_train(size=1) def test_syncbn_empty_2(self): self._test_syncbn_empty_train(size=2) def test_syncbn_empty_4(self): self._test_syncbn_empty_train(size=4) def test_syncbn_empty_1_half(self): self._test_syncbn_empty_train(size=1, half=True) def test_syncbn_empty_2_half(self): self._test_syncbn_empty_train(size=2, half=True) def test_syncbn_empty_4_half(self): self._test_syncbn_empty_train(size=4, half=True) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_tensorrt.py ================================================ import os from functools import partial from typing import Callable import numpy as np import onnx import pytest import torch import torch.nn as nn import torch.nn.functional as F try: from mmcv.tensorrt import (TRTWrapper, is_tensorrt_plugin_loaded, onnx2trt, save_trt_engine) except ImportError: pytest.skip( 'TensorRT should be installed from source.', allow_module_level=True) if not torch.cuda.is_available(): pytest.skip( 'CUDA is required for this test module', allow_module_level=True) if not is_tensorrt_plugin_loaded(): pytest.skip( 'Test requires to complie TensorRT plugins in mmcv', allow_module_level=True) class WrapFunction(nn.Module): def __init__(self, wrapped_function): super(WrapFunction, self).__init__() self.wrapped_function = wrapped_function def forward(self, *args, **kwargs): return self.wrapped_function(*args, **kwargs) onnx_file = 'tmp.onnx' trt_file = 'tmp.engine' def test_roialign(): try: from mmcv.ops import RoIAlign except (ImportError, ModuleNotFoundError): pytest.skip('test requires compilation') # trt config fp16_mode = False max_workspace_size = 1 << 30 # roi align config pool_h = 2 pool_w = 2 spatial_scale = 1.0 sampling_ratio = 2 inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0., 0., 1., 1.]]), ([[[[1., 2.], [3., 4.]], [[4., 3.], [2., 1.]]]], [[0., 0., 0., 1., 1.]]), ([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.], [11., 12., 15., 16.]]]], [[0., 0., 0., 3., 3.]])] wrapped_model = RoIAlign((pool_w, pool_h), spatial_scale, sampling_ratio, 'avg', True).cuda() for case in inputs: np_input = np.array(case[0], dtype=np.float32) np_rois = np.array(case[1], dtype=np.float32) input = torch.from_numpy(np_input).cuda() rois = torch.from_numpy(np_rois).cuda() with torch.no_grad(): torch.onnx.export( wrapped_model, (input, rois), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=['input', 'rois'], output_names=['roi_feat'], opset_version=11) onnx_model = onnx.load(onnx_file) # create trt engine and wrapper opt_shape_dict = { 'input': [list(input.shape), list(input.shape), list(input.shape)], 'rois': [list(rois.shape), list(rois.shape), list(rois.shape)] } trt_engine = onnx2trt( onnx_model, opt_shape_dict, fp16_mode=fp16_mode, max_workspace_size=max_workspace_size) save_trt_engine(trt_engine, trt_file) trt_model = TRTWrapper(trt_file, ['input', 'rois'], ['roi_feat']) with torch.no_grad(): trt_outputs = trt_model({'input': input, 'rois': rois}) trt_roi_feat = trt_outputs['roi_feat'] # compute pytorch_output with torch.no_grad(): pytorch_roi_feat = wrapped_model(input, rois) # allclose if os.path.exists(onnx_file): os.remove(onnx_file) if os.path.exists(trt_file): os.remove(trt_file) assert torch.allclose(pytorch_roi_feat, trt_roi_feat) def test_nms(): try: import mmcv from mmcv.ops import nms except (ImportError, ModuleNotFoundError): pytest.skip('test requires compilation') os.environ['ONNX_BACKEND'] = 'MMCVTensorRT' # trt config fp16_mode = False max_workspace_size = 1 << 30 data = mmcv.load('./tests/data/batched_nms_data.pkl') boxes = torch.from_numpy(data['boxes']).cuda() scores = torch.from_numpy(data['scores']).cuda() nms = partial( nms, iou_threshold=0.7, offset=0, score_threshold=0.1, max_num=100) wrapped_model = WrapFunction(nms) wrapped_model.cpu().eval() with torch.no_grad(): torch.onnx.export( wrapped_model, (boxes.detach().cpu(), scores.detach().cpu()), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=['boxes', 'scores'], output_names=['dets', 'inds'], opset_version=11) onnx_model = onnx.load(onnx_file) # create trt engine and wrapper opt_shape_dict = { 'boxes': [list(boxes.shape), list(boxes.shape), list(boxes.shape)], 'scores': [list(scores.shape), list(scores.shape), list(scores.shape)] } trt_engine = onnx2trt( onnx_model, opt_shape_dict, fp16_mode=fp16_mode, max_workspace_size=max_workspace_size) save_trt_engine(trt_engine, trt_file) trt_model = TRTWrapper(trt_file, ['boxes', 'scores'], ['dets', 'inds']) with torch.no_grad(): trt_outputs = trt_model({'boxes': boxes, 'scores': scores}) trt_dets = trt_outputs['dets'] trt_inds = trt_outputs['inds'] trt_inds = trt_inds.long() # compute pytorch_output with torch.no_grad(): pytorch_outputs = wrapped_model(boxes, scores) pytorch_dets, pytorch_inds = pytorch_outputs # allclose if os.path.exists(onnx_file): os.remove(onnx_file) if os.path.exists(trt_file): os.remove(trt_file) num_boxes = pytorch_dets.shape[0] trt_dets = trt_dets[:num_boxes, ...] trt_inds = trt_inds[:num_boxes] trt_scores = trt_dets[:, 4] pytorch_scores = pytorch_dets[:, 4] os.environ.pop('ONNX_BACKEND') assert torch.allclose(pytorch_scores, trt_scores, atol=1e-3) assert torch.equal(pytorch_inds, trt_inds) def test_batched_nms(): try: import mmcv from mmcv.ops import batched_nms except (ImportError, ModuleNotFoundError): pytest.skip('test requires compilation') # trt config os.environ['ONNX_BACKEND'] = 'MMCVTensorRT' fp16_mode = False max_workspace_size = 1 << 30 data = mmcv.load('./tests/data/batched_nms_data.pkl') nms_cfg = dict(type='nms', iou_threshold=0.7, score_threshold=0.1) boxes = torch.from_numpy(data['boxes']).cuda() scores = torch.from_numpy(data['scores']).cuda() idxs = torch.from_numpy(data['idxs']).cuda() class_agnostic = False nms = partial(batched_nms, nms_cfg=nms_cfg, class_agnostic=class_agnostic) wrapped_model = WrapFunction(nms) wrapped_model.cpu().eval() input_data = (boxes.detach().cpu(), scores.detach().cpu(), idxs.detach().cpu()) input_names = ['boxes', 'scores', 'idxs'] output_names = ['dets', 'inds'] with torch.no_grad(): torch.onnx.export( wrapped_model, input_data, onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=input_names, output_names=output_names, opset_version=11) onnx_model = onnx.load(onnx_file) # create trt engine and wrapper opt_shape_dict = { 'boxes': [list(boxes.shape), list(boxes.shape), list(boxes.shape)], 'scores': [list(scores.shape), list(scores.shape), list(scores.shape)], 'idxs': [list(idxs.shape), list(idxs.shape), list(idxs.shape)] } trt_engine = onnx2trt( onnx_model, opt_shape_dict, fp16_mode=fp16_mode, max_workspace_size=max_workspace_size) save_trt_engine(trt_engine, trt_file) trt_model = TRTWrapper(trt_file, input_names, output_names) with torch.no_grad(): trt_outputs = trt_model({ 'boxes': boxes, 'scores': scores, 'idxs': idxs }) trt_dets = trt_outputs['dets'] trt_inds = trt_outputs['inds'] trt_inds = trt_inds.long() # compute pytorch_output with torch.no_grad(): pytorch_outputs = wrapped_model(boxes, scores, idxs) pytorch_dets, pytorch_inds = pytorch_outputs # allclose if os.path.exists(onnx_file): os.remove(onnx_file) if os.path.exists(trt_file): os.remove(trt_file) num_boxes = pytorch_dets.shape[0] trt_dets = trt_dets[:num_boxes, ...] trt_inds = trt_inds[:num_boxes] trt_scores = trt_dets[:, 4] pytorch_scores = pytorch_dets[:, 4] os.environ.pop('ONNX_BACKEND') assert torch.allclose(pytorch_scores, trt_scores) assert torch.equal(pytorch_inds, trt_inds) def test_scatternd(): def func(data): data[:, :-2] += 1 data[:2, :] -= 1 return data data = torch.zeros(4, 4).cuda() wrapped_model = WrapFunction(func).eval().cuda() input_names = ['input'] output_names = ['output'] with torch.no_grad(): torch.onnx.export( wrapped_model, (data.clone(), ), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=input_names, output_names=output_names, opset_version=11) onnx_model = onnx.load(onnx_file) # create trt engine and wrapper opt_shape_dict = { 'input': [list(data.shape), list(data.shape), list(data.shape)], } # trt config fp16_mode = False max_workspace_size = 1 << 30 trt_engine = onnx2trt( onnx_model, opt_shape_dict, fp16_mode=fp16_mode, max_workspace_size=max_workspace_size) save_trt_engine(trt_engine, trt_file) trt_model = TRTWrapper(trt_file, input_names, output_names) with torch.no_grad(): trt_outputs = trt_model({'input': data.clone()}) trt_results = trt_outputs['output'] # compute pytorch_output with torch.no_grad(): pytorch_results = wrapped_model(data.clone()) # allclose if os.path.exists(onnx_file): os.remove(onnx_file) if os.path.exists(trt_file): os.remove(trt_file) assert torch.allclose(pytorch_results, trt_results) def test_deform_conv(): try: from mmcv.ops import DeformConv2dPack except (ImportError, ModuleNotFoundError): pytest.skip('test requires compilation') input = [[[[1., 2., 3.], [0., 1., 2.], [3., 5., 2.]]]] offset_weight = [[[0.1, 0.4, 0.6, 0.1]], [[0.3, 0.2, 0.1, 0.3]], [[0.5, 0.5, 0.2, 0.8]], [[0.8, 0.3, 0.9, 0.1]], [[0.3, 0.1, 0.2, 0.5]], [[0.3, 0.7, 0.5, 0.3]], [[0.6, 0.2, 0.5, 0.3]], [[0.4, 0.1, 0.8, 0.4]]] offset_bias = [0.7, 0.1, 0.8, 0.5, 0.6, 0.5, 0.4, 0.7] deform_weight = [[[0.4, 0.2, 0.1, 0.9]]] c_in = 1 c_out = 1 x = torch.Tensor(input).cuda() x.requires_grad = True model = DeformConv2dPack(c_in, c_out, 2, stride=1, padding=0) model.conv_offset.weight.data = torch.nn.Parameter( torch.Tensor(offset_weight).reshape(8, 1, 2, 2)) model.conv_offset.bias.data = torch.nn.Parameter( torch.Tensor(offset_bias).reshape(8)) model.weight.data = torch.nn.Parameter( torch.Tensor(deform_weight).reshape(1, 1, 2, 2)) model.cuda().eval() input_names = ['input'] output_names = ['output'] with torch.no_grad(): torch.onnx.export( model, (x.clone(), ), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=input_names, output_names=output_names, opset_version=11) onnx_model = onnx.load(onnx_file) # create trt engine and wrapper opt_shape_dict = { 'input': [list(x.shape), list(x.shape), list(x.shape)], } # trt config fp16_mode = False max_workspace_size = 1 << 30 trt_engine = onnx2trt( onnx_model, opt_shape_dict, fp16_mode=fp16_mode, max_workspace_size=max_workspace_size) save_trt_engine(trt_engine, trt_file) trt_model = TRTWrapper(trt_file, input_names, output_names) with torch.no_grad(): trt_outputs = trt_model({'input': x.clone()}) trt_results = trt_outputs['output'] # compute pytorch_output with torch.no_grad(): pytorch_results = model(x.clone()) # allclose if os.path.exists(onnx_file): os.remove(onnx_file) if os.path.exists(trt_file): os.remove(trt_file) assert torch.allclose(pytorch_results, trt_results) @pytest.mark.parametrize('with_bias', [True, False]) def test_modulated_deform_conv(with_bias): try: from mmcv.ops import ModulatedDeformConv2dPack except (ImportError, ModuleNotFoundError): pytest.skip('test requires compilation') input = [[[[1., 2., 3.], [0., 1., 2.], [3., 5., 2.]]]] x = torch.Tensor(input).cuda() model = ModulatedDeformConv2dPack( 1, 1, kernel_size=(2, 2), stride=1, padding=1, deform_groups=1, bias=with_bias) model.weight.data.fill_(1.) model.type(torch.float32) model = model.cuda().eval() input_names = ['input'] output_names = ['output'] with torch.no_grad(): torch.onnx.export( model, (x.clone(), ), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=input_names, output_names=output_names, opset_version=11) onnx_model = onnx.load(onnx_file) # create trt engine and wrapper opt_shape_dict = { 'input': [list(x.shape), list(x.shape), list(x.shape)], } # trt config fp16_mode = False max_workspace_size = 1 << 30 trt_engine = onnx2trt( onnx_model, opt_shape_dict, fp16_mode=fp16_mode, max_workspace_size=max_workspace_size) save_trt_engine(trt_engine, trt_file) trt_model = TRTWrapper(trt_file, input_names, output_names) with torch.no_grad(): trt_outputs = trt_model({'input': x.clone()}) trt_results = trt_outputs['output'] # compute pytorch_output with torch.no_grad(): pytorch_results = model(x.clone()) # allclose if os.path.exists(onnx_file): os.remove(onnx_file) if os.path.exists(trt_file): os.remove(trt_file) torch.testing.assert_allclose(pytorch_results, trt_results) @pytest.mark.parametrize('mode', ['bilinear', 'nearest']) @pytest.mark.parametrize('padding_mode', ['zeros', 'border', 'reflection']) @pytest.mark.parametrize('align_corners', [True, False]) def test_grid_sample(mode, padding_mode, align_corners): from mmcv.onnx.symbolic import register_extra_symbolics register_extra_symbolics(11) input = torch.rand(1, 1, 10, 10).cuda() grid = torch.Tensor([[[1, 0, 0], [0, 1, 0]]]) grid = F.affine_grid(grid, (1, 1, 15, 15)).type_as(input).cuda() def func(input, grid): return F.grid_sample( input, grid, mode=mode, padding_mode=padding_mode, align_corners=align_corners) wrapped_model = WrapFunction(func).eval().cuda() input_names = ['input', 'grid'] output_names = ['output'] with torch.no_grad(): torch.onnx.export( wrapped_model, (input.clone(), grid.clone()), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=input_names, output_names=output_names, opset_version=11) onnx_model = onnx.load(onnx_file) # create trt engine and wrapper opt_shape_dict = { 'input': [list(input.shape), list(input.shape), list(input.shape)], 'grid': [list(grid.shape), list(grid.shape), list(grid.shape)], } # trt config fp16_mode = False max_workspace_size = 1 << 30 trt_engine = onnx2trt( onnx_model, opt_shape_dict, fp16_mode=fp16_mode, max_workspace_size=max_workspace_size) save_trt_engine(trt_engine, trt_file) trt_model = TRTWrapper(trt_file, input_names, output_names) with torch.no_grad(): trt_outputs = trt_model({'input': input.clone(), 'grid': grid.clone()}) trt_results = trt_outputs['output'] # compute pytorch_output with torch.no_grad(): pytorch_results = wrapped_model(input.clone(), grid.clone()) # allclose if os.path.exists(onnx_file): os.remove(onnx_file) if os.path.exists(trt_file): os.remove(trt_file) assert torch.allclose(pytorch_results, trt_results) @pytest.mark.parametrize('func', [torch.cummax, torch.cummin]) def test_cummin_cummax(func: Callable): # Note generally `cummax` or `cummin` is exportable to ONNX # as long as the pytorch version >= 1.5.0, since `torch.cummax` # is only supported with torch >= 1.5.0. # But when `cummax` or `cummin` serves as an intermediate component # whose outputs is used as inputs for another modules, it's expected # that pytorch version must be >= 1.7.0. Otherwise error appears like: # `RuntimeError: tuple appears in op that does not forward tuples, # unsupported 'kind: prim::PythonOp`. from packaging import version if version.parse(torch.__version__) < version.parse('1.7.0'): pytest.skip('test_cummax_cummin should be ran with pytorch >= 1.7.0') opset = 11 # register custom op `mmcv::cummax` and `mmcv::cummin` from mmcv.onnx.symbolic import register_extra_symbolics register_extra_symbolics(opset) input_list = [ # arbitrary shape, e.g. 1-D, 2-D, 3-D, ... torch.rand((2, 3, 4, 1, 5)).cuda(), torch.rand((1)).cuda() ] input_names = ['input'] output_names = ['output', 'indices'] for input in input_list: ndims = input.dim() # valid dim range is [-ndims, ndims-1] # test for all `dim` value which is valid for dim in range(-ndims, ndims): cummax_func = partial(func, dim=dim) wrapped_model = WrapFunction(cummax_func).eval().cuda() with torch.no_grad(): torch.onnx.export( wrapped_model, input, onnx_file, export_params=True, keep_initializers_as_inputs=False, input_names=input_names, output_names=output_names, opset_version=opset) onnx_model = onnx.load(onnx_file) # create trt engine and wrapper opt_shape_dict = { 'input': [list(input.shape), list(input.shape), list(input.shape)] } # trt config fp16_mode = False max_workspace_size = 1 << 30 trt_engine = onnx2trt( onnx_model, opt_shape_dict, fp16_mode=fp16_mode, max_workspace_size=max_workspace_size) # remove ONNX model after conversion if os.path.exists(onnx_file): os.remove(onnx_file) # save TensorRT model save_trt_engine(trt_engine, trt_file) # load and wrap TensorRT model trt_model = TRTWrapper(trt_file) # remove trt model after loading if os.path.exists(trt_file): os.remove(trt_file) # compute trt output with torch.no_grad(): trt_results = trt_model({'input': input.contiguous().clone()}) trt_output = trt_results['output'] trt_indices = trt_results['indices'] # compute pytorch output with torch.no_grad(): pytorch_results = wrapped_model(input.clone()) pytorch_output = pytorch_results[0] pytorch_indices = pytorch_results[1] torch.testing.assert_allclose(trt_output, pytorch_output) torch.testing.assert_allclose(trt_indices, pytorch_indices) @pytest.mark.parametrize('dynamic_export', [True, False]) @pytest.mark.parametrize('fp16_mode', [True, False]) def test_instance_norm(dynamic_export, fp16_mode): n, c, h, w = 2, 3, 10, 10 data = torch.randn(n, c, h, w).cuda() norm = nn.InstanceNorm2d(c, affine=True) wrapped_model = WrapFunction(norm).eval().cuda() input_names = ['input'] output_names = ['output'] dynamic_axes = None if dynamic_export: dynamic_axes = { 'input': { 0: 'n', 2: 'h', 3: 'w', }, 'output': { 0: 'n', 2: 'h', 3: 'w', }, } with torch.no_grad(): torch.onnx.export( wrapped_model, (data.clone(), ), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=input_names, output_names=output_names, dynamic_axes=dynamic_axes, opset_version=11) onnx_model = onnx.load(onnx_file) # create trt engine and wrapper if dynamic_export: opt_shape_dict = { 'input': [list(data.shape), list(data.shape), [2 * n, c, 2 * h, 2 * w]], } else: opt_shape_dict = { 'input': [list(data.shape), list(data.shape), list(data.shape)], } # trt config max_workspace_size = 1 << 30 trt_engine = onnx2trt( onnx_model, opt_shape_dict, fp16_mode=fp16_mode, max_workspace_size=max_workspace_size) save_trt_engine(trt_engine, trt_file) trt_model = TRTWrapper(trt_file, input_names, output_names) with torch.no_grad(): trt_outputs = trt_model({'input': data.clone()}) trt_results = trt_outputs['output'] # compute pytorch_output with torch.no_grad(): pytorch_results = wrapped_model(data.clone()) # allclose if os.path.exists(onnx_file): os.remove(onnx_file) if os.path.exists(trt_file): os.remove(trt_file) assert torch.allclose(pytorch_results, trt_results) @pytest.mark.parametrize('mode', ['top', 'bottom', 'left', 'right']) def test_corner_pool(mode): try: from mmcv.ops import CornerPool except (ImportError, ModuleNotFoundError): pytest.skip('test requires compilation') opset = 11 # register custom op `mmcv::MMCVCornerPool` from mmcv.onnx.symbolic import register_extra_symbolics register_extra_symbolics(opset) # trt config fp16_mode = False max_workspace_size = 1 << 30 inputs = [ # (n, c, h, w) torch.rand((2, 3, 5, 5)), torch.rand((1, 2, 4, 6)), torch.rand((2, 1, 3, 2)), ] class CornerPoolWrapper(CornerPool): def __init__(self, mode): super(CornerPoolWrapper, self).__init__(mode) def forward(self, x): # no use `torch.cummax`, instead `corner_pool` is used # for various torch version return self.corner_pool.apply(x) wrapped_model = CornerPoolWrapper(mode).cuda() for input in inputs: input = input.cuda() with torch.no_grad(): torch.onnx.export( wrapped_model, (input, ), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=['input'], output_names=['output'], opset_version=opset) onnx_model = onnx.load(onnx_file) # create trt engine and wrapper opt_shape_dict = { 'input': [list(input.shape), list(input.shape), list(input.shape)], } trt_engine = onnx2trt( onnx_model, opt_shape_dict, fp16_mode=fp16_mode, max_workspace_size=max_workspace_size) save_trt_engine(trt_engine, trt_file) trt_model = TRTWrapper(trt_file, ['input'], ['output']) with torch.no_grad(): trt_outputs = trt_model({'input': input}) trt_pool_feat = trt_outputs['output'] # compute pytorch_output with torch.no_grad(): pytorch_pool_feat = wrapped_model(input) # allclose if os.path.exists(onnx_file): os.remove(onnx_file) if os.path.exists(trt_file): os.remove(trt_file) assert torch.allclose(pytorch_pool_feat, trt_pool_feat, atol=1e-5) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_tensorrt_preprocess.py ================================================ import os from functools import wraps import onnx import torch from mmcv.ops import nms from mmcv.tensorrt.preprocess import preprocess_onnx def remove_tmp_file(func): @wraps(func) def wrapper(*args, **kwargs): onnx_file = 'tmp.onnx' kwargs['onnx_file'] = onnx_file try: result = func(*args, **kwargs) finally: if os.path.exists(onnx_file): os.remove(onnx_file) return result return wrapper @remove_tmp_file def export_nms_module_to_onnx(module, onnx_file): torch_model = module() torch_model.eval() input = (torch.rand([100, 4], dtype=torch.float32), torch.rand([100], dtype=torch.float32)) torch.onnx.export( torch_model, input, onnx_file, opset_version=11, input_names=['boxes', 'scores'], output_names=['output']) onnx_model = onnx.load(onnx_file) return onnx_model def test_can_handle_nms_with_constant_maxnum(): class ModuleNMS(torch.nn.Module): def forward(self, boxes, scores): return nms(boxes, scores, iou_threshold=0.4, max_num=10) onnx_model = export_nms_module_to_onnx(ModuleNMS) preprocess_onnx_model = preprocess_onnx(onnx_model) for node in preprocess_onnx_model.graph.node: if 'NonMaxSuppression' in node.name: assert len(node.attribute) == 5, 'The NMS must have 5 attributes.' def test_can_handle_nms_with_undefined_maxnum(): class ModuleNMS(torch.nn.Module): def forward(self, boxes, scores): return nms(boxes, scores, iou_threshold=0.4) onnx_model = export_nms_module_to_onnx(ModuleNMS) preprocess_onnx_model = preprocess_onnx(onnx_model) for node in preprocess_onnx_model.graph.node: if 'NonMaxSuppression' in node.name: assert len(node.attribute) == 5, \ 'The NMS must have 5 attributes.' assert node.attribute[2].i > 0, \ 'The max_output_boxes_per_class is not defined correctly.' ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_three_interpolate.py ================================================ import pytest import torch from mmcv.ops import three_interpolate @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_three_interpolate(): features = torch.tensor([[[2.4350, 4.7516, 4.4995, 2.4350, 2.4350, 2.4350], [3.1236, 2.6278, 3.0447, 3.1236, 3.1236, 3.1236], [2.6732, 2.8677, 2.6436, 2.6732, 2.6732, 2.6732], [0.0124, 7.0150, 7.0199, 0.0124, 0.0124, 0.0124], [0.3207, 0.0000, 0.3411, 0.3207, 0.3207, 0.3207]], [[0.0000, 0.9544, 2.4532, 0.0000, 0.0000, 0.0000], [0.5346, 1.9176, 1.4715, 0.5346, 0.5346, 0.5346], [0.0000, 0.2744, 2.0842, 0.0000, 0.0000, 0.0000], [0.3414, 1.5063, 1.6209, 0.3414, 0.3414, 0.3414], [0.5814, 0.0103, 0.0000, 0.5814, 0.5814, 0.5814]]]).cuda() idx = torch.tensor([[[0, 1, 2], [2, 3, 4], [2, 3, 4], [0, 1, 2], [0, 1, 2], [0, 1, 3]], [[0, 2, 3], [1, 3, 4], [2, 1, 4], [0, 2, 4], [0, 2, 4], [0, 1, 2]]]).int().cuda() weight = torch.tensor([[[3.3333e-01, 3.3333e-01, 3.3333e-01], [1.0000e+00, 5.8155e-08, 2.2373e-08], [1.0000e+00, 1.7737e-08, 1.7356e-08], [3.3333e-01, 3.3333e-01, 3.3333e-01], [3.3333e-01, 3.3333e-01, 3.3333e-01], [3.3333e-01, 3.3333e-01, 3.3333e-01]], [[3.3333e-01, 3.3333e-01, 3.3333e-01], [1.0000e+00, 1.3651e-08, 7.7312e-09], [1.0000e+00, 1.7148e-08, 1.4070e-08], [3.3333e-01, 3.3333e-01, 3.3333e-01], [3.3333e-01, 3.3333e-01, 3.3333e-01], [3.3333e-01, 3.3333e-01, 3.3333e-01]]]).cuda() output = three_interpolate(features, idx, weight) expected_output = torch.tensor([[[ 3.8953e+00, 4.4995e+00, 4.4995e+00, 3.8953e+00, 3.8953e+00, 3.2072e+00 ], [ 2.9320e+00, 3.0447e+00, 3.0447e+00, 2.9320e+00, 2.9320e+00, 2.9583e+00 ], [ 2.7281e+00, 2.6436e+00, 2.6436e+00, 2.7281e+00, 2.7281e+00, 2.7380e+00 ], [ 4.6824e+00, 7.0199e+00, 7.0199e+00, 4.6824e+00, 4.6824e+00, 2.3466e+00 ], [ 2.2060e-01, 3.4110e-01, 3.4110e-01, 2.2060e-01, 2.2060e-01, 2.1380e-01 ]], [[ 8.1773e-01, 9.5440e-01, 2.4532e+00, 8.1773e-01, 8.1773e-01, 1.1359e+00 ], [ 8.4689e-01, 1.9176e+00, 1.4715e+00, 8.4689e-01, 8.4689e-01, 1.3079e+00 ], [ 6.9473e-01, 2.7440e-01, 2.0842e+00, 6.9473e-01, 6.9473e-01, 7.8619e-01 ], [ 7.6789e-01, 1.5063e+00, 1.6209e+00, 7.6789e-01, 7.6789e-01, 1.1562e+00 ], [ 3.8760e-01, 1.0300e-02, 8.3569e-09, 3.8760e-01, 3.8760e-01, 1.9723e-01 ]]]).cuda() assert torch.allclose(output, expected_output, 1e-4) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_three_nn.py ================================================ import pytest import torch from mmcv.ops import three_nn @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_three_nn(): known = torch.tensor([[[-1.8373, 3.5605, -0.7867], [0.7615, 2.9420, 0.2314], [-0.6503, 3.6637, -1.0622], [-1.8373, 3.5605, -0.7867], [-1.8373, 3.5605, -0.7867]], [[-1.3399, 1.9991, -0.3698], [-0.0799, 0.9698, -0.8457], [0.0858, 2.4721, -0.1928], [-1.3399, 1.9991, -0.3698], [-1.3399, 1.9991, -0.3698]]]).cuda() unknown = torch.tensor([[[-1.8373, 3.5605, -0.7867], [0.7615, 2.9420, 0.2314], [-0.6503, 3.6637, -1.0622], [-1.5237, 2.3976, -0.8097], [-0.0722, 3.4017, -0.2880], [0.5198, 3.0661, -0.4605], [-2.0185, 3.5019, -0.3236], [0.5098, 3.1020, 0.5799], [-1.6137, 3.8443, -0.5269], [0.7341, 2.9626, -0.3189]], [[-1.3399, 1.9991, -0.3698], [-0.0799, 0.9698, -0.8457], [0.0858, 2.4721, -0.1928], [-0.9022, 1.6560, -1.3090], [0.1156, 1.6901, -0.4366], [-0.6477, 2.3576, -0.1563], [-0.8482, 1.1466, -1.2704], [-0.8753, 2.0845, -0.3460], [-0.5621, 1.4233, -1.2858], [-0.5883, 1.3114, -1.2899]]]).cuda() dist, idx = three_nn(unknown, known) expected_dist = torch.tensor([[[0.0000, 0.0000, 0.0000], [0.0000, 2.0463, 2.8588], [0.0000, 1.2229, 1.2229], [1.2047, 1.2047, 1.2047], [1.0011, 1.0845, 1.8411], [0.7433, 1.4451, 2.4304], [0.5007, 0.5007, 0.5007], [0.4587, 2.0875, 2.7544], [0.4450, 0.4450, 0.4450], [0.5514, 1.7206, 2.6811]], [[0.0000, 0.0000, 0.0000], [0.0000, 1.6464, 1.6952], [0.0000, 1.5125, 1.5125], [1.0915, 1.0915, 1.0915], [0.8197, 0.8511, 1.4894], [0.7433, 0.8082, 0.8082], [0.8955, 1.3340, 1.3340], [0.4730, 0.4730, 0.4730], [0.7949, 1.3325, 1.3325], [0.7566, 1.3727, 1.3727]]]).cuda() expected_idx = torch.tensor([[[0, 3, 4], [1, 2, 0], [2, 0, 3], [0, 3, 4], [2, 1, 0], [1, 2, 0], [0, 3, 4], [1, 2, 0], [0, 3, 4], [1, 2, 0]], [[0, 3, 4], [1, 2, 0], [2, 0, 3], [0, 3, 4], [2, 1, 0], [2, 0, 3], [1, 0, 3], [0, 3, 4], [1, 0, 3], [1, 0, 3]]]).cuda() assert torch.allclose(dist, expected_dist, 1e-4) assert torch.all(idx == expected_idx) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_tin_shift.py ================================================ import os import numpy as np import pytest import torch _USING_PARROTS = True try: from parrots.autograd import gradcheck except ImportError: from torch.autograd import gradcheck _USING_PARROTS = False cur_dir = os.path.dirname(os.path.abspath(__file__)) inputs = ([[[[0.88572276, 0.46422583], [0.97408265, 0.59547687], [0.030812204, 0.96236038], [0.75418317, 0.44058233], [0.33279222, 0.00084149837], [0.7069388, 0.23255438], [0.13547045, 0.81549376], [0.40174931, 0.36317211]], [[0.57444429, 0.15905505], [0.39897251, 0.25790238], [0.93282568, 0.18451685], [0.92526674, 0.18283755], [0.31664443, 0.59323865], [0.1957739, 0.42505842], [0.081158757, 0.81340349], [0.43456328, 0.30195212]], [[0.8198145, 0.05990988], [0.98062474, 0.34803438], [0.10412294, 0.37183142], [0.15021622, 0.038857818], [0.40985721, 0.42253625], [0.71150124, 0.59778064], [0.83851069, 0.15194464], [0.097513378, 0.74820143]], [[0.80680406, 0.49327564], [0.17821097, 0.12980539], [0.50657678, 0.14446253], [0.04178369, 0.53071898], [0.84983683, 0.3826949], [0.32193625, 0.91275406], [0.75628334, 0.52934098], [0.27994192, 0.3053292]]], [[[0.082397044, 0.4210068], [0.23563534, 0.7938987], [0.63669145, 0.69397897], [0.8844561, 0.97854084], [0.79027033, 0.60640401], [0.63528901, 0.72172403], [0.0097346902, 0.70800996], [0.87891227, 0.13674974]], [[0.74329448, 0.0243572], [0.82178867, 0.85750699], [0.7568835, 0.73146772], [0.5031184, 0.30479157], [0.28713053, 0.47414285], [0.4682079, 0.067471564], [0.48368263, 0.14590704], [0.25397325, 0.19946373]], [[0.4291026, 0.068739474], [0.7159555, 0.79903615], [0.76412082, 0.85348046], [0.081224024, 0.82264912], [0.97173303, 0.24291694], [0.48957139, 0.43488795], [0.67382395, 0.21889746], [0.36712623, 0.67127824]], [[0.12054044, 0.18096751], [0.86675781, 0.54755616], [0.68208277, 0.15164375], [0.79991871, 0.80811197], [0.85256428, 0.68253738], [0.185983, 0.95642138], [0.48102546, 0.28009653], [0.35726011, 0.58168036]]]]) shifts = [([[1, 0, 1, -2], [-2, 1, -1, 1]]), ([[2, 1, 2, -1], [-1, 2, 0, 2]])] outputs = [([[[[0.0, 0.0], [0.0, 0.0], [0.030812, 0.96236], [0.75418, 0.44058], [0.0, 0.0], [0.0, 0.0], [0.83851, 0.15194], [0.097513, 0.7482]], [[0.88572, 0.46423], [0.97408, 0.59548], [0.93283, 0.18452], [0.92527, 0.18284], [0.33279, 0.0008415], [0.70694, 0.23255], [0.75628, 0.52934], [0.27994, 0.30533]], [[0.57444, 0.15906], [0.39897, 0.2579], [0.10412, 0.37183], [0.15022, 0.038858], [0.31664, 0.59324], [0.19577, 0.42506], [0.0, 0.0], [0.0, 0.0]], [[0.81981, 0.05991], [0.98062, 0.34803], [0.50658, 0.14446], [0.041784, 0.53072], [0.40986, 0.42254], [0.7115, 0.59778], [0.0, 0.0], [0.0, 0.0]]], [[[0.4291, 0.068739], [0.71596, 0.79904], [0.0, 0.0], [0.0, 0.0], [0.28713, 0.47414], [0.46821, 0.067472], [0.0, 0.0], [0.0, 0.0]], [[0.12054, 0.18097], [0.86676, 0.54756], [0.63669, 0.69398], [0.88446, 0.97854], [0.97173, 0.24292], [0.48957, 0.43489], [0.0097347, 0.70801], [0.87891, 0.13675]], [[0.0, 0.0], [0.0, 0.0], [0.75688, 0.73147], [0.50312, 0.30479], [0.85256, 0.68254], [0.18598, 0.95642], [0.48368, 0.14591], [0.25397, 0.19946]], [[0.0, 0.0], [0.0, 0.0], [0.76412, 0.85348], [0.081224, 0.82265], [0.0, 0.0], [0.0, 0.0], [0.67382, 0.2189], [0.36713, 0.67128]]]]), ([[[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.081159, 0.8134], [0.43456, 0.30195]], [[0.0, 0.0], [0.0, 0.0], [0.030812, 0.96236], [0.75418, 0.44058], [0.0, 0.0], [0.0, 0.0], [0.83851, 0.15194], [0.097513, 0.7482]], [[0.88572, 0.46423], [0.97408, 0.59548], [0.93283, 0.18452], [0.92527, 0.18284], [0.33279, 0.0008415], [0.70694, 0.23255], [0.75628, 0.52934], [0.27994, 0.30533]], [[0.57444, 0.15906], [0.39897, 0.2579], [0.10412, 0.37183], [0.15022, 0.038858], [0.31664, 0.59324], [0.19577, 0.42506], [0.0, 0.0], [0.0, 0.0]]], [[[0.74329, 0.024357], [0.82179, 0.85751], [0.0, 0.0], [0.0, 0.0], [0.79027, 0.6064], [0.63529, 0.72172], [0.0, 0.0], [0.0, 0.0]], [[0.4291, 0.068739], [0.71596, 0.79904], [0.0, 0.0], [0.0, 0.0], [0.28713, 0.47414], [0.46821, 0.067472], [0.0, 0.0], [0.0, 0.0]], [[0.12054, 0.18097], [0.86676, 0.54756], [0.63669, 0.69398], [0.88446, 0.97854], [0.97173, 0.24292], [0.48957, 0.43489], [0.0097347, 0.70801], [0.87891, 0.13675]], [[0.0, 0.0], [0.0, 0.0], [0.75688, 0.73147], [0.50312, 0.30479], [0.85256, 0.68254], [0.18598, 0.95642], [0.48368, 0.14591], [0.25397, 0.19946]]]])] grads = [ [[[[0., 0.], [0., 0.], [1., 1.], [1., 1.], [0., 0.], [0., 0.], [1., 1.], [1., 1.]], [[1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.]], [[1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [0., 0.], [0., 0.]], [[1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [0., 0.], [0., 0.]]], [[[1., 1.], [1., 1.], [0., 0.], [0., 0.], [1., 1.], [1., 1.], [0., 0.], [0., 0.]], [[1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.]], [[0., 0.], [0., 0.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.]], [[0., 0.], [0., 0.], [1., 1.], [1., 1.], [0., 0.], [0., 0.], [1., 1.], [1., 1.]]]], [[[[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.], [1., 1.], [1., 1.]], [[0., 0.], [0., 0.], [1., 1.], [1., 1.], [0., 0.], [0., 0.], [1., 1.], [1., 1.]], [[1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.]], [[1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [0., 0.], [0., 0.]]], [[[1., 1.], [1., 1.], [0., 0.], [0., 0.], [1., 1.], [1., 1.], [0., 0.], [0., 0.]], [[1., 1.], [1., 1.], [0., 0.], [0., 0.], [1., 1.], [1., 1.], [0., 0.], [0., 0.]], [[1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.]], [[0., 0.], [0., 0.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.]]]] ] def _test_tinshift_gradcheck(dtype): try: from mmcv.ops import tin_shift except ModuleNotFoundError: pytest.skip('TINShift op is not successfully compiled') if dtype == torch.half: pytest.skip('"add_cpu/sub_cpu" not implemented for Half') for shift in shifts: np_input = np.array(inputs) np_shift = np.array(shift) x = torch.tensor( np_input, dtype=dtype, device='cuda', requires_grad=True) shift = torch.tensor(np_shift, device='cuda').int() if torch.__version__ == 'parrots': gradcheck(tin_shift, (x, shift)) else: gradcheck(tin_shift, (x, shift), atol=1, rtol=0.1) def _test_tinshift_allclose(dtype): try: from mmcv.ops import tin_shift except ModuleNotFoundError: pytest.skip('TINShift op is not successfully compiled') for shift, output, grad in zip(shifts, outputs, grads): np_input = np.array(inputs) np_shift = np.array(shift) np_output = np.array(output) np_grad = np.array(grad) x = torch.tensor( np_input, dtype=dtype, device='cuda', requires_grad=True) shift = torch.tensor(np_shift, device='cuda').int() output = tin_shift(x, shift) output.backward(torch.ones_like(output)) assert np.allclose( output.data.type(torch.float).cpu().numpy(), np_output, 1e-3) assert np.allclose( x.grad.data.type(torch.float).cpu().numpy(), np_grad, 1e-3) def _test_tinshift_assert(dtype): try: from mmcv.ops import tin_shift except ModuleNotFoundError: pytest.skip('TINShift op is not successfully compiled') inputs = [torch.rand(2, 3, 4, 2), torch.rand(2, 3, 4, 2)] shifts = [torch.rand(2, 3), torch.rand(2, 5)] for x, shift in zip(inputs, shifts): x = x.cuda() shift = shift.cuda() # A ValueError should be raised if ops get inputs with wrong shapes. with pytest.raises(ValueError): tin_shift(x, shift) @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') @pytest.mark.parametrize('dtype', [torch.float, torch.double, torch.half]) def test_tinshift(dtype): _test_tinshift_allclose(dtype=dtype) _test_tinshift_gradcheck(dtype=dtype) _test_tinshift_assert(dtype=dtype) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_upfirdn2d.py ================================================ import pytest import torch _USING_PARROTS = True try: from parrots.autograd import gradcheck except ImportError: from torch.autograd import gradcheck, gradgradcheck _USING_PARROTS = False class TestUpFirDn2d(object): """Unit test for UpFirDn2d. Here, we just test the basic case of upsample version. More gerneal tests will be included in other unit test for UpFirDnUpsample and UpFirDnDownSample modules. """ @classmethod def setup_class(cls): kernel_1d = torch.tensor([1., 3., 3., 1.]) cls.kernel = kernel_1d[:, None] * kernel_1d[None, :] cls.kernel = cls.kernel / cls.kernel.sum() cls.factor = 2 pad = cls.kernel.shape[0] - cls.factor cls.pad = ((pad + 1) // 2 + cls.factor - 1, pad // 2) cls.input_tensor = torch.randn((2, 3, 4, 4), requires_grad=True) @pytest.mark.skipif(not torch.cuda.is_available(), reason='requires cuda') def test_upfirdn2d(self): from mmcv.ops import upfirdn2d if _USING_PARROTS: gradcheck( upfirdn2d, (self.input_tensor.cuda(), self.kernel.type_as( self.input_tensor).cuda(), self.factor, 1, self.pad), delta=1e-4, pt_atol=1e-3) else: gradcheck( upfirdn2d, (self.input_tensor.cuda(), self.kernel.type_as( self.input_tensor).cuda(), self.factor, 1, self.pad), eps=1e-4, atol=1e-3) gradgradcheck( upfirdn2d, (self.input_tensor.cuda(), self.kernel.type_as( self.input_tensor).cuda(), self.factor, 1, self.pad), eps=1e-4, atol=1e-3) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_voxelization.py ================================================ import numpy as np import pytest import torch from mmcv.ops import Voxelization def _get_voxel_points_indices(points, coors, voxel): result_form = np.equal(coors, voxel) return result_form[:, 0] & result_form[:, 1] & result_form[:, 2] @pytest.mark.parametrize('device_type', [ 'cpu', pytest.param( 'cuda:0', marks=pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support')) ]) def test_voxelization(device_type): voxel_size = [0.5, 0.5, 0.5] point_cloud_range = [0, -40, -3, 70.4, 40, 1] voxel_dict = np.load( 'tests/data/for_3d_ops/test_voxel.npy', allow_pickle=True).item() expected_coors = voxel_dict['coors'] expected_voxels = voxel_dict['voxels'] expected_num_points_per_voxel = voxel_dict['num_points_per_voxel'] points = voxel_dict['points'] points = torch.tensor(points) max_num_points = -1 dynamic_voxelization = Voxelization(voxel_size, point_cloud_range, max_num_points) max_num_points = 1000 hard_voxelization = Voxelization(voxel_size, point_cloud_range, max_num_points) device = torch.device(device_type) # test hard_voxelization on cpu/gpu points = points.contiguous().to(device) coors, voxels, num_points_per_voxel = hard_voxelization.forward(points) coors = coors.cpu().detach().numpy() voxels = voxels.cpu().detach().numpy() num_points_per_voxel = num_points_per_voxel.cpu().detach().numpy() assert np.all(coors == expected_coors) assert np.all(voxels == expected_voxels) assert np.all(num_points_per_voxel == expected_num_points_per_voxel) # test dynamic_voxelization on cpu/gpu coors = dynamic_voxelization.forward(points) coors = coors.cpu().detach().numpy() points = points.cpu().detach().numpy() for i in range(expected_voxels.shape[0]): indices = _get_voxel_points_indices(points, coors, expected_voxels[i]) num_points_current_voxel = points[indices].shape[0] assert num_points_current_voxel > 0 assert np.all( points[indices] == expected_coors[i][:num_points_current_voxel]) assert num_points_current_voxel == expected_num_points_per_voxel[i] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_parallel.py ================================================ from unittest.mock import MagicMock, patch import pytest import torch import torch.nn as nn from torch.nn.parallel import DataParallel, DistributedDataParallel from mmcv.parallel import (MODULE_WRAPPERS, MMDataParallel, MMDistributedDataParallel, is_module_wrapper) from mmcv.parallel._functions import Scatter, get_input_device, scatter from mmcv.parallel.distributed_deprecated import \ MMDistributedDataParallel as DeprecatedMMDDP def mock(*args, **kwargs): pass @patch('torch.distributed._broadcast_coalesced', mock) @patch('torch.distributed.broadcast', mock) @patch('torch.nn.parallel.DistributedDataParallel._ddp_init_helper', mock) def test_is_module_wrapper(): class Model(nn.Module): def __init__(self): super().__init__() self.conv = nn.Conv2d(2, 2, 1) def forward(self, x): return self.conv(x) # _verify_model_across_ranks is added in torch1.9.0 so we should check # whether _verify_model_across_ranks is the member of torch.distributed # before mocking if hasattr(torch.distributed, '_verify_model_across_ranks'): torch.distributed._verify_model_across_ranks = mock model = Model() assert not is_module_wrapper(model) dp = DataParallel(model) assert is_module_wrapper(dp) mmdp = MMDataParallel(model) assert is_module_wrapper(mmdp) ddp = DistributedDataParallel(model, process_group=MagicMock()) assert is_module_wrapper(ddp) mmddp = MMDistributedDataParallel(model, process_group=MagicMock()) assert is_module_wrapper(mmddp) deprecated_mmddp = DeprecatedMMDDP(model) assert is_module_wrapper(deprecated_mmddp) # test module wrapper registry @MODULE_WRAPPERS.register_module() class ModuleWrapper(object): def __init__(self, module): self.module = module def forward(self, *args, **kwargs): return self.module(*args, **kwargs) module_wraper = ModuleWrapper(model) assert is_module_wrapper(module_wraper) def test_get_input_device(): # if the device is CPU, return -1 input = torch.zeros([1, 3, 3, 3]) assert get_input_device(input) == -1 inputs = [torch.zeros([1, 3, 3, 3]), torch.zeros([1, 4, 4, 4])] assert get_input_device(inputs) == -1 # if the device is GPU, return the index of device if torch.cuda.is_available(): input = torch.zeros([1, 3, 3, 3]).cuda() assert get_input_device(input) == 0 inputs = [ torch.zeros([1, 3, 3, 3]).cuda(), torch.zeros([1, 4, 4, 4]).cuda() ] assert get_input_device(inputs) == 0 # input should be a tensor or list of tensor with pytest.raises(Exception): get_input_device(5) def test_scatter(): # if the device is CPU, just return the input input = torch.zeros([1, 3, 3, 3]) output = scatter(input=input, devices=[-1]) assert torch.allclose(input, output) inputs = [torch.zeros([1, 3, 3, 3]), torch.zeros([1, 4, 4, 4])] outputs = scatter(input=inputs, devices=[-1]) for input, output in zip(inputs, outputs): assert torch.allclose(input, output) # if the device is GPU, copy the input from CPU to GPU if torch.cuda.is_available(): input = torch.zeros([1, 3, 3, 3]) output = scatter(input=input, devices=[0]) assert torch.allclose(input.cuda(), output) inputs = [torch.zeros([1, 3, 3, 3]), torch.zeros([1, 4, 4, 4])] outputs = scatter(input=inputs, devices=[0]) for input, output in zip(inputs, outputs): assert torch.allclose(input.cuda(), output) # input should be a tensor or list of tensor with pytest.raises(Exception): scatter(5, [-1]) def test_Scatter(): # if the device is CPU, just return the input target_gpus = [-1] input = torch.zeros([1, 3, 3, 3]) outputs = Scatter.forward(target_gpus, input) assert isinstance(outputs, tuple) assert torch.allclose(input, outputs[0]) target_gpus = [-1] inputs = [torch.zeros([1, 3, 3, 3]), torch.zeros([1, 4, 4, 4])] outputs = Scatter.forward(target_gpus, inputs) assert isinstance(outputs, tuple) for input, output in zip(inputs, outputs): assert torch.allclose(input, output) # if the device is GPU, copy the input from CPU to GPU if torch.cuda.is_available(): target_gpus = [0] input = torch.zeros([1, 3, 3, 3]) outputs = Scatter.forward(target_gpus, input) assert isinstance(outputs, tuple) assert torch.allclose(input.cuda(), outputs[0]) target_gpus = [0] inputs = [torch.zeros([1, 3, 3, 3]), torch.zeros([1, 4, 4, 4])] outputs = Scatter.forward(target_gpus, inputs) assert isinstance(outputs, tuple) for input, output in zip(inputs, outputs): assert torch.allclose(input.cuda(), output[0]) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_runner/test_basemodule.py ================================================ import tempfile import pytest import torch from torch import nn import mmcv from mmcv.cnn.utils.weight_init import update_init_info from mmcv.runner import BaseModule, ModuleDict, ModuleList, Sequential from mmcv.utils import Registry, build_from_cfg COMPONENTS = Registry('component') FOOMODELS = Registry('model') @COMPONENTS.register_module() class FooConv1d(BaseModule): def __init__(self, init_cfg=None): super().__init__(init_cfg) self.conv1d = nn.Conv1d(4, 1, 4) def forward(self, x): return self.conv1d(x) @COMPONENTS.register_module() class FooConv2d(BaseModule): def __init__(self, init_cfg=None): super().__init__(init_cfg) self.conv2d = nn.Conv2d(3, 1, 3) def forward(self, x): return self.conv2d(x) @COMPONENTS.register_module() class FooLinear(BaseModule): def __init__(self, init_cfg=None): super().__init__(init_cfg) self.linear = nn.Linear(3, 4) def forward(self, x): return self.linear(x) @COMPONENTS.register_module() class FooLinearConv1d(BaseModule): def __init__(self, linear=None, conv1d=None, init_cfg=None): super().__init__(init_cfg) if linear is not None: self.linear = build_from_cfg(linear, COMPONENTS) if conv1d is not None: self.conv1d = build_from_cfg(conv1d, COMPONENTS) def forward(self, x): x = self.linear(x) return self.conv1d(x) @FOOMODELS.register_module() class FooModel(BaseModule): def __init__(self, component1=None, component2=None, component3=None, component4=None, init_cfg=None) -> None: super().__init__(init_cfg) if component1 is not None: self.component1 = build_from_cfg(component1, COMPONENTS) if component2 is not None: self.component2 = build_from_cfg(component2, COMPONENTS) if component3 is not None: self.component3 = build_from_cfg(component3, COMPONENTS) if component4 is not None: self.component4 = build_from_cfg(component4, COMPONENTS) # its type is not BaseModule, it can be initialized # with "override" key. self.reg = nn.Linear(3, 4) def test_initilization_info_logger(): # 'override' has higher priority import torch.nn as nn from mmcv.utils.logging import get_logger import os class OverloadInitConv(nn.Conv2d, BaseModule): def init_weights(self): for p in self.parameters(): with torch.no_grad(): p.fill_(1) class CheckLoggerModel(BaseModule): def __init__(self, init_cfg=None): super(CheckLoggerModel, self).__init__(init_cfg) self.conv1 = nn.Conv2d(1, 1, 1, 1) self.conv2 = OverloadInitConv(1, 1, 1, 1) self.conv3 = nn.Conv2d(1, 1, 1, 1) self.fc1 = nn.Linear(1, 1) init_cfg = [ dict( type='Normal', layer='Conv2d', std=0.01, override=dict( type='Normal', name='conv3', std=0.01, bias_prob=0.01)), dict(type='Constant', layer='Linear', val=0., bias=1.) ] model = CheckLoggerModel(init_cfg=init_cfg) train_log = '20210720_132454.log' workdir = tempfile.mkdtemp() log_file = os.path.join(workdir, train_log) # create a logger get_logger('init_logger', log_file=log_file) assert not hasattr(model, '_params_init_info') model.init_weights() # assert `_params_init_info` would be deleted after `init_weights` assert not hasattr(model, '_params_init_info') # assert initialization information has been dumped assert os.path.exists(log_file) lines = mmcv.list_from_file(log_file) # check initialization information is right for i, line in enumerate(lines): if 'conv1.weight' in line: assert 'NormalInit' in lines[i + 1] if 'conv2.weight' in line: assert 'OverloadInitConv' in lines[i + 1] if 'fc1.weight' in line: assert 'ConstantInit' in lines[i + 1] # test corner case class OverloadInitConvFc(nn.Conv2d, BaseModule): def __init__(self, *args, **kwargs): super(OverloadInitConvFc, self).__init__(*args, **kwargs) self.conv1 = nn.Linear(1, 1) def init_weights(self): for p in self.parameters(): with torch.no_grad(): p.fill_(1) class CheckLoggerModel(BaseModule): def __init__(self, init_cfg=None): super(CheckLoggerModel, self).__init__(init_cfg) self.conv1 = nn.Conv2d(1, 1, 1, 1) self.conv2 = OverloadInitConvFc(1, 1, 1, 1) self.conv3 = nn.Conv2d(1, 1, 1, 1) self.fc1 = nn.Linear(1, 1) class TopLevelModule(BaseModule): def __init__(self, init_cfg=None, checklog_init_cfg=None): super(TopLevelModule, self).__init__(init_cfg) self.module1 = CheckLoggerModel(checklog_init_cfg) self.module2 = OverloadInitConvFc(1, 1, 1, 1) checklog_init_cfg = [ dict( type='Normal', layer='Conv2d', std=0.01, override=dict( type='Normal', name='conv3', std=0.01, bias_prob=0.01)), dict(type='Constant', layer='Linear', val=0., bias=1.) ] top_level_init_cfg = [ dict( type='Normal', layer='Conv2d', std=0.01, override=dict( type='Normal', name='module2', std=0.01, bias_prob=0.01)) ] model = TopLevelModule( init_cfg=top_level_init_cfg, checklog_init_cfg=checklog_init_cfg) model.module1.init_weights() model.module2.init_weights() model.init_weights() model.module1.init_weights() model.module2.init_weights() assert not hasattr(model, '_params_init_info') model.init_weights() # assert `_params_init_info` would be deleted after `init_weights` assert not hasattr(model, '_params_init_info') # assert initialization information has been dumped assert os.path.exists(log_file) lines = mmcv.list_from_file(log_file) # check initialization information is right for i, line in enumerate(lines): if 'TopLevelModule' in line and 'init_cfg' not in line: # have been set init_flag assert 'the same' in line def test_update_init_info(): class DummyModel(BaseModule): def __init__(self, init_cfg=None): super().__init__(init_cfg) self.conv1 = nn.Conv2d(1, 1, 1, 1) self.conv3 = nn.Conv2d(1, 1, 1, 1) self.fc1 = nn.Linear(1, 1) model = DummyModel() from collections import defaultdict model._params_init_info = defaultdict(dict) for name, param in model.named_parameters(): model._params_init_info[param]['init_info'] = 'init' model._params_init_info[param]['tmp_mean_value'] = param.data.mean() with torch.no_grad(): for p in model.parameters(): p.fill_(1) update_init_info(model, init_info='fill_1') for item in model._params_init_info.values(): assert item['init_info'] == 'fill_1' assert item['tmp_mean_value'] == 1 # test assert for new parameters model.conv1.bias = nn.Parameter(torch.ones_like(model.conv1.bias)) with pytest.raises(AssertionError): update_init_info(model, init_info=' ') def test_model_weight_init(): """ Config model (FooModel, Linear: weight=1, bias=2, Conv1d: weight=3, bias=4, Conv2d: weight=5, bias=6) ├──component1 (FooConv1d) ├──component2 (FooConv2d) ├──component3 (FooLinear) ├──component4 (FooLinearConv1d) ├──linear (FooLinear) ├──conv1d (FooConv1d) ├──reg (nn.Linear) Parameters after initialization model (FooModel) ├──component1 (FooConv1d, weight=3, bias=4) ├──component2 (FooConv2d, weight=5, bias=6) ├──component3 (FooLinear, weight=1, bias=2) ├──component4 (FooLinearConv1d) ├──linear (FooLinear, weight=1, bias=2) ├──conv1d (FooConv1d, weight=3, bias=4) ├──reg (nn.Linear, weight=1, bias=2) """ model_cfg = dict( type='FooModel', init_cfg=[ dict(type='Constant', val=1, bias=2, layer='Linear'), dict(type='Constant', val=3, bias=4, layer='Conv1d'), dict(type='Constant', val=5, bias=6, layer='Conv2d') ], component1=dict(type='FooConv1d'), component2=dict(type='FooConv2d'), component3=dict(type='FooLinear'), component4=dict( type='FooLinearConv1d', linear=dict(type='FooLinear'), conv1d=dict(type='FooConv1d'))) model = build_from_cfg(model_cfg, FOOMODELS) model.init_weights() assert torch.equal(model.component1.conv1d.weight, torch.full(model.component1.conv1d.weight.shape, 3.0)) assert torch.equal(model.component1.conv1d.bias, torch.full(model.component1.conv1d.bias.shape, 4.0)) assert torch.equal(model.component2.conv2d.weight, torch.full(model.component2.conv2d.weight.shape, 5.0)) assert torch.equal(model.component2.conv2d.bias, torch.full(model.component2.conv2d.bias.shape, 6.0)) assert torch.equal(model.component3.linear.weight, torch.full(model.component3.linear.weight.shape, 1.0)) assert torch.equal(model.component3.linear.bias, torch.full(model.component3.linear.bias.shape, 2.0)) assert torch.equal( model.component4.linear.linear.weight, torch.full(model.component4.linear.linear.weight.shape, 1.0)) assert torch.equal( model.component4.linear.linear.bias, torch.full(model.component4.linear.linear.bias.shape, 2.0)) assert torch.equal( model.component4.conv1d.conv1d.weight, torch.full(model.component4.conv1d.conv1d.weight.shape, 3.0)) assert torch.equal( model.component4.conv1d.conv1d.bias, torch.full(model.component4.conv1d.conv1d.bias.shape, 4.0)) assert torch.equal(model.reg.weight, torch.full(model.reg.weight.shape, 1.0)) assert torch.equal(model.reg.bias, torch.full(model.reg.bias.shape, 2.0)) def test_nest_components_weight_init(): """ Config model (FooModel, Linear: weight=1, bias=2, Conv1d: weight=3, bias=4, Conv2d: weight=5, bias=6) ├──component1 (FooConv1d, Conv1d: weight=7, bias=8) ├──component2 (FooConv2d, Conv2d: weight=9, bias=10) ├──component3 (FooLinear) ├──component4 (FooLinearConv1d, Linear: weight=11, bias=12) ├──linear (FooLinear, Linear: weight=11, bias=12) ├──conv1d (FooConv1d) ├──reg (nn.Linear, weight=13, bias=14) Parameters after initialization model (FooModel) ├──component1 (FooConv1d, weight=7, bias=8) ├──component2 (FooConv2d, weight=9, bias=10) ├──component3 (FooLinear, weight=1, bias=2) ├──component4 (FooLinearConv1d) ├──linear (FooLinear, weight=1, bias=2) ├──conv1d (FooConv1d, weight=3, bias=4) ├──reg (nn.Linear, weight=13, bias=14) """ model_cfg = dict( type='FooModel', init_cfg=[ dict( type='Constant', val=1, bias=2, layer='Linear', override=dict(type='Constant', name='reg', val=13, bias=14)), dict(type='Constant', val=3, bias=4, layer='Conv1d'), dict(type='Constant', val=5, bias=6, layer='Conv2d'), ], component1=dict( type='FooConv1d', init_cfg=dict(type='Constant', layer='Conv1d', val=7, bias=8)), component2=dict( type='FooConv2d', init_cfg=dict(type='Constant', layer='Conv2d', val=9, bias=10)), component3=dict(type='FooLinear'), component4=dict( type='FooLinearConv1d', linear=dict(type='FooLinear'), conv1d=dict(type='FooConv1d'))) model = build_from_cfg(model_cfg, FOOMODELS) model.init_weights() assert torch.equal(model.component1.conv1d.weight, torch.full(model.component1.conv1d.weight.shape, 7.0)) assert torch.equal(model.component1.conv1d.bias, torch.full(model.component1.conv1d.bias.shape, 8.0)) assert torch.equal(model.component2.conv2d.weight, torch.full(model.component2.conv2d.weight.shape, 9.0)) assert torch.equal(model.component2.conv2d.bias, torch.full(model.component2.conv2d.bias.shape, 10.0)) assert torch.equal(model.component3.linear.weight, torch.full(model.component3.linear.weight.shape, 1.0)) assert torch.equal(model.component3.linear.bias, torch.full(model.component3.linear.bias.shape, 2.0)) assert torch.equal( model.component4.linear.linear.weight, torch.full(model.component4.linear.linear.weight.shape, 1.0)) assert torch.equal( model.component4.linear.linear.bias, torch.full(model.component4.linear.linear.bias.shape, 2.0)) assert torch.equal( model.component4.conv1d.conv1d.weight, torch.full(model.component4.conv1d.conv1d.weight.shape, 3.0)) assert torch.equal( model.component4.conv1d.conv1d.bias, torch.full(model.component4.conv1d.conv1d.bias.shape, 4.0)) assert torch.equal(model.reg.weight, torch.full(model.reg.weight.shape, 13.0)) assert torch.equal(model.reg.bias, torch.full(model.reg.bias.shape, 14.0)) def test_without_layer_weight_init(): model_cfg = dict( type='FooModel', init_cfg=[ dict(type='Constant', val=1, bias=2, layer='Linear'), dict(type='Constant', val=3, bias=4, layer='Conv1d'), dict(type='Constant', val=5, bias=6, layer='Conv2d') ], component1=dict( type='FooConv1d', init_cfg=dict(type='Constant', val=7, bias=8)), component2=dict(type='FooConv2d'), component3=dict(type='FooLinear')) model = build_from_cfg(model_cfg, FOOMODELS) model.init_weights() assert torch.equal(model.component1.conv1d.weight, torch.full(model.component1.conv1d.weight.shape, 3.0)) assert torch.equal(model.component1.conv1d.bias, torch.full(model.component1.conv1d.bias.shape, 4.0)) # init_cfg in component1 does not have layer key, so it does nothing assert torch.equal(model.component2.conv2d.weight, torch.full(model.component2.conv2d.weight.shape, 5.0)) assert torch.equal(model.component2.conv2d.bias, torch.full(model.component2.conv2d.bias.shape, 6.0)) assert torch.equal(model.component3.linear.weight, torch.full(model.component3.linear.weight.shape, 1.0)) assert torch.equal(model.component3.linear.bias, torch.full(model.component3.linear.bias.shape, 2.0)) assert torch.equal(model.reg.weight, torch.full(model.reg.weight.shape, 1.0)) assert torch.equal(model.reg.bias, torch.full(model.reg.bias.shape, 2.0)) def test_override_weight_init(): # only initialize 'override' model_cfg = dict( type='FooModel', init_cfg=[ dict(type='Constant', val=10, bias=20, override=dict(name='reg')) ], component1=dict(type='FooConv1d'), component3=dict(type='FooLinear')) model = build_from_cfg(model_cfg, FOOMODELS) model.init_weights() assert torch.equal(model.reg.weight, torch.full(model.reg.weight.shape, 10.0)) assert torch.equal(model.reg.bias, torch.full(model.reg.bias.shape, 20.0)) # do not initialize others assert not torch.equal( model.component1.conv1d.weight, torch.full(model.component1.conv1d.weight.shape, 10.0)) assert not torch.equal( model.component1.conv1d.bias, torch.full(model.component1.conv1d.bias.shape, 20.0)) assert not torch.equal( model.component3.linear.weight, torch.full(model.component3.linear.weight.shape, 10.0)) assert not torch.equal( model.component3.linear.bias, torch.full(model.component3.linear.bias.shape, 20.0)) # 'override' has higher priority model_cfg = dict( type='FooModel', init_cfg=[ dict( type='Constant', val=1, bias=2, override=dict(name='reg', type='Constant', val=30, bias=40)) ], component1=dict(type='FooConv1d'), component2=dict(type='FooConv2d'), component3=dict(type='FooLinear')) model = build_from_cfg(model_cfg, FOOMODELS) model.init_weights() assert torch.equal(model.reg.weight, torch.full(model.reg.weight.shape, 30.0)) assert torch.equal(model.reg.bias, torch.full(model.reg.bias.shape, 40.0)) def test_sequential_model_weight_init(): seq_model_cfg = [ dict( type='FooConv1d', init_cfg=dict(type='Constant', layer='Conv1d', val=0., bias=1.)), dict( type='FooConv2d', init_cfg=dict(type='Constant', layer='Conv2d', val=2., bias=3.)), ] layers = [build_from_cfg(cfg, COMPONENTS) for cfg in seq_model_cfg] seq_model = Sequential(*layers) seq_model.init_weights() assert torch.equal(seq_model[0].conv1d.weight, torch.full(seq_model[0].conv1d.weight.shape, 0.)) assert torch.equal(seq_model[0].conv1d.bias, torch.full(seq_model[0].conv1d.bias.shape, 1.)) assert torch.equal(seq_model[1].conv2d.weight, torch.full(seq_model[1].conv2d.weight.shape, 2.)) assert torch.equal(seq_model[1].conv2d.bias, torch.full(seq_model[1].conv2d.bias.shape, 3.)) # inner init_cfg has higher priority layers = [build_from_cfg(cfg, COMPONENTS) for cfg in seq_model_cfg] seq_model = Sequential( *layers, init_cfg=dict( type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.)) seq_model.init_weights() assert torch.equal(seq_model[0].conv1d.weight, torch.full(seq_model[0].conv1d.weight.shape, 0.)) assert torch.equal(seq_model[0].conv1d.bias, torch.full(seq_model[0].conv1d.bias.shape, 1.)) assert torch.equal(seq_model[1].conv2d.weight, torch.full(seq_model[1].conv2d.weight.shape, 2.)) assert torch.equal(seq_model[1].conv2d.bias, torch.full(seq_model[1].conv2d.bias.shape, 3.)) def test_modulelist_weight_init(): models_cfg = [ dict( type='FooConv1d', init_cfg=dict(type='Constant', layer='Conv1d', val=0., bias=1.)), dict( type='FooConv2d', init_cfg=dict(type='Constant', layer='Conv2d', val=2., bias=3.)), ] layers = [build_from_cfg(cfg, COMPONENTS) for cfg in models_cfg] modellist = ModuleList(layers) modellist.init_weights() assert torch.equal(modellist[0].conv1d.weight, torch.full(modellist[0].conv1d.weight.shape, 0.)) assert torch.equal(modellist[0].conv1d.bias, torch.full(modellist[0].conv1d.bias.shape, 1.)) assert torch.equal(modellist[1].conv2d.weight, torch.full(modellist[1].conv2d.weight.shape, 2.)) assert torch.equal(modellist[1].conv2d.bias, torch.full(modellist[1].conv2d.bias.shape, 3.)) # inner init_cfg has higher priority layers = [build_from_cfg(cfg, COMPONENTS) for cfg in models_cfg] modellist = ModuleList( layers, init_cfg=dict( type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.)) modellist.init_weights() assert torch.equal(modellist[0].conv1d.weight, torch.full(modellist[0].conv1d.weight.shape, 0.)) assert torch.equal(modellist[0].conv1d.bias, torch.full(modellist[0].conv1d.bias.shape, 1.)) assert torch.equal(modellist[1].conv2d.weight, torch.full(modellist[1].conv2d.weight.shape, 2.)) assert torch.equal(modellist[1].conv2d.bias, torch.full(modellist[1].conv2d.bias.shape, 3.)) def test_moduledict_weight_init(): models_cfg = dict( foo_conv_1d=dict( type='FooConv1d', init_cfg=dict(type='Constant', layer='Conv1d', val=0., bias=1.)), foo_conv_2d=dict( type='FooConv2d', init_cfg=dict(type='Constant', layer='Conv2d', val=2., bias=3.)), ) layers = { name: build_from_cfg(cfg, COMPONENTS) for name, cfg in models_cfg.items() } modeldict = ModuleDict(layers) modeldict.init_weights() assert torch.equal( modeldict['foo_conv_1d'].conv1d.weight, torch.full(modeldict['foo_conv_1d'].conv1d.weight.shape, 0.)) assert torch.equal( modeldict['foo_conv_1d'].conv1d.bias, torch.full(modeldict['foo_conv_1d'].conv1d.bias.shape, 1.)) assert torch.equal( modeldict['foo_conv_2d'].conv2d.weight, torch.full(modeldict['foo_conv_2d'].conv2d.weight.shape, 2.)) assert torch.equal( modeldict['foo_conv_2d'].conv2d.bias, torch.full(modeldict['foo_conv_2d'].conv2d.bias.shape, 3.)) # inner init_cfg has higher priority layers = { name: build_from_cfg(cfg, COMPONENTS) for name, cfg in models_cfg.items() } modeldict = ModuleDict( layers, init_cfg=dict( type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.)) modeldict.init_weights() assert torch.equal( modeldict['foo_conv_1d'].conv1d.weight, torch.full(modeldict['foo_conv_1d'].conv1d.weight.shape, 0.)) assert torch.equal( modeldict['foo_conv_1d'].conv1d.bias, torch.full(modeldict['foo_conv_1d'].conv1d.bias.shape, 1.)) assert torch.equal( modeldict['foo_conv_2d'].conv2d.weight, torch.full(modeldict['foo_conv_2d'].conv2d.weight.shape, 2.)) assert torch.equal( modeldict['foo_conv_2d'].conv2d.bias, torch.full(modeldict['foo_conv_2d'].conv2d.bias.shape, 3.)) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_runner/test_checkpoint.py ================================================ import sys from collections import OrderedDict from tempfile import TemporaryDirectory from unittest.mock import MagicMock, patch import pytest import torch import torch.nn as nn import torch.optim as optim from torch.nn.parallel import DataParallel from mmcv.fileio.file_client import PetrelBackend from mmcv.parallel.registry import MODULE_WRAPPERS from mmcv.runner.checkpoint import (_load_checkpoint_with_prefix, get_state_dict, load_checkpoint, load_from_local, load_from_pavi, save_checkpoint) sys.modules['petrel_client'] = MagicMock() sys.modules['petrel_client.client'] = MagicMock() @MODULE_WRAPPERS.register_module() class DDPWrapper(object): def __init__(self, module): self.module = module class Block(nn.Module): def __init__(self): super().__init__() self.conv = nn.Conv2d(3, 3, 1) self.norm = nn.BatchNorm2d(3) class Model(nn.Module): def __init__(self): super().__init__() self.block = Block() self.conv = nn.Conv2d(3, 3, 1) class Mockpavimodel(object): def __init__(self, name='fakename'): self.name = name def download(self, file): pass def assert_tensor_equal(tensor_a, tensor_b): assert tensor_a.eq(tensor_b).all() def test_get_state_dict(): if torch.__version__ == 'parrots': state_dict_keys = set([ 'block.conv.weight', 'block.conv.bias', 'block.norm.weight', 'block.norm.bias', 'block.norm.running_mean', 'block.norm.running_var', 'conv.weight', 'conv.bias' ]) else: state_dict_keys = set([ 'block.conv.weight', 'block.conv.bias', 'block.norm.weight', 'block.norm.bias', 'block.norm.running_mean', 'block.norm.running_var', 'block.norm.num_batches_tracked', 'conv.weight', 'conv.bias' ]) model = Model() state_dict = get_state_dict(model) assert isinstance(state_dict, OrderedDict) assert set(state_dict.keys()) == state_dict_keys assert_tensor_equal(state_dict['block.conv.weight'], model.block.conv.weight) assert_tensor_equal(state_dict['block.conv.bias'], model.block.conv.bias) assert_tensor_equal(state_dict['block.norm.weight'], model.block.norm.weight) assert_tensor_equal(state_dict['block.norm.bias'], model.block.norm.bias) assert_tensor_equal(state_dict['block.norm.running_mean'], model.block.norm.running_mean) assert_tensor_equal(state_dict['block.norm.running_var'], model.block.norm.running_var) if torch.__version__ != 'parrots': assert_tensor_equal(state_dict['block.norm.num_batches_tracked'], model.block.norm.num_batches_tracked) assert_tensor_equal(state_dict['conv.weight'], model.conv.weight) assert_tensor_equal(state_dict['conv.bias'], model.conv.bias) wrapped_model = DDPWrapper(model) state_dict = get_state_dict(wrapped_model) assert isinstance(state_dict, OrderedDict) assert set(state_dict.keys()) == state_dict_keys assert_tensor_equal(state_dict['block.conv.weight'], wrapped_model.module.block.conv.weight) assert_tensor_equal(state_dict['block.conv.bias'], wrapped_model.module.block.conv.bias) assert_tensor_equal(state_dict['block.norm.weight'], wrapped_model.module.block.norm.weight) assert_tensor_equal(state_dict['block.norm.bias'], wrapped_model.module.block.norm.bias) assert_tensor_equal(state_dict['block.norm.running_mean'], wrapped_model.module.block.norm.running_mean) assert_tensor_equal(state_dict['block.norm.running_var'], wrapped_model.module.block.norm.running_var) if torch.__version__ != 'parrots': assert_tensor_equal( state_dict['block.norm.num_batches_tracked'], wrapped_model.module.block.norm.num_batches_tracked) assert_tensor_equal(state_dict['conv.weight'], wrapped_model.module.conv.weight) assert_tensor_equal(state_dict['conv.bias'], wrapped_model.module.conv.bias) # wrapped inner module for name, module in wrapped_model.module._modules.items(): module = DataParallel(module) wrapped_model.module._modules[name] = module state_dict = get_state_dict(wrapped_model) assert isinstance(state_dict, OrderedDict) assert set(state_dict.keys()) == state_dict_keys assert_tensor_equal(state_dict['block.conv.weight'], wrapped_model.module.block.module.conv.weight) assert_tensor_equal(state_dict['block.conv.bias'], wrapped_model.module.block.module.conv.bias) assert_tensor_equal(state_dict['block.norm.weight'], wrapped_model.module.block.module.norm.weight) assert_tensor_equal(state_dict['block.norm.bias'], wrapped_model.module.block.module.norm.bias) assert_tensor_equal(state_dict['block.norm.running_mean'], wrapped_model.module.block.module.norm.running_mean) assert_tensor_equal(state_dict['block.norm.running_var'], wrapped_model.module.block.module.norm.running_var) if torch.__version__ != 'parrots': assert_tensor_equal( state_dict['block.norm.num_batches_tracked'], wrapped_model.module.block.module.norm.num_batches_tracked) assert_tensor_equal(state_dict['conv.weight'], wrapped_model.module.conv.module.weight) assert_tensor_equal(state_dict['conv.bias'], wrapped_model.module.conv.module.bias) def test_load_pavimodel_dist(): sys.modules['pavi'] = MagicMock() sys.modules['pavi.modelcloud'] = MagicMock() pavimodel = Mockpavimodel() import pavi pavi.modelcloud.get = MagicMock(return_value=pavimodel) with pytest.raises(AssertionError): # test pavi prefix _ = load_from_pavi('MyPaviFolder/checkpoint.pth') with pytest.raises(FileNotFoundError): # there is not such checkpoint for us to load _ = load_from_pavi('pavi://checkpoint.pth') def test_load_checkpoint_with_prefix(): class FooModule(nn.Module): def __init__(self): super().__init__() self.linear = nn.Linear(1, 2) self.conv2d = nn.Conv2d(3, 1, 3) self.conv2d_2 = nn.Conv2d(3, 2, 3) model = FooModule() nn.init.constant_(model.linear.weight, 1) nn.init.constant_(model.linear.bias, 2) nn.init.constant_(model.conv2d.weight, 3) nn.init.constant_(model.conv2d.bias, 4) nn.init.constant_(model.conv2d_2.weight, 5) nn.init.constant_(model.conv2d_2.bias, 6) with TemporaryDirectory(): torch.save(model.state_dict(), 'model.pth') prefix = 'conv2d' state_dict = _load_checkpoint_with_prefix(prefix, 'model.pth') assert torch.equal(model.conv2d.state_dict()['weight'], state_dict['weight']) assert torch.equal(model.conv2d.state_dict()['bias'], state_dict['bias']) # test whether prefix is in pretrained model with pytest.raises(AssertionError): prefix = 'back' _load_checkpoint_with_prefix(prefix, 'model.pth') def test_load_checkpoint(): import os import re import tempfile class PrefixModel(nn.Module): def __init__(self): super().__init__() self.backbone = Model() pmodel = PrefixModel() model = Model() checkpoint_path = os.path.join(tempfile.gettempdir(), 'checkpoint.pth') # add prefix torch.save(model.state_dict(), checkpoint_path) state_dict = load_checkpoint( pmodel, checkpoint_path, revise_keys=[(r'^', 'backbone.')]) for key in pmodel.backbone.state_dict().keys(): assert torch.equal(pmodel.backbone.state_dict()[key], state_dict[key]) # strip prefix torch.save(pmodel.state_dict(), checkpoint_path) state_dict = load_checkpoint( model, checkpoint_path, revise_keys=[(r'^backbone\.', '')]) for key in state_dict.keys(): key_stripped = re.sub(r'^backbone\.', '', key) assert torch.equal(model.state_dict()[key_stripped], state_dict[key]) os.remove(checkpoint_path) def test_load_checkpoint_metadata(): import os import tempfile from mmcv.runner import load_checkpoint, save_checkpoint class ModelV1(nn.Module): def __init__(self): super().__init__() self.block = Block() self.conv1 = nn.Conv2d(3, 3, 1) self.conv2 = nn.Conv2d(3, 3, 1) nn.init.normal_(self.conv1.weight) nn.init.normal_(self.conv2.weight) class ModelV2(nn.Module): _version = 2 def __init__(self): super().__init__() self.block = Block() self.conv0 = nn.Conv2d(3, 3, 1) self.conv1 = nn.Conv2d(3, 3, 1) nn.init.normal_(self.conv0.weight) nn.init.normal_(self.conv1.weight) def _load_from_state_dict(self, state_dict, prefix, local_metadata, *args, **kwargs): """load checkpoints.""" # Names of some parameters in has been changed. version = local_metadata.get('version', None) if version is None or version < 2: state_dict_keys = list(state_dict.keys()) convert_map = {'conv1': 'conv0', 'conv2': 'conv1'} for k in state_dict_keys: for ori_str, new_str in convert_map.items(): if k.startswith(prefix + ori_str): new_key = k.replace(ori_str, new_str) state_dict[new_key] = state_dict[k] del state_dict[k] super()._load_from_state_dict(state_dict, prefix, local_metadata, *args, **kwargs) model_v1 = ModelV1() model_v1_conv0_weight = model_v1.conv1.weight.detach() model_v1_conv1_weight = model_v1.conv2.weight.detach() model_v2 = ModelV2() model_v2_conv0_weight = model_v2.conv0.weight.detach() model_v2_conv1_weight = model_v2.conv1.weight.detach() ckpt_v1_path = os.path.join(tempfile.gettempdir(), 'checkpoint_v1.pth') ckpt_v2_path = os.path.join(tempfile.gettempdir(), 'checkpoint_v2.pth') # Save checkpoint save_checkpoint(model_v1, ckpt_v1_path) save_checkpoint(model_v2, ckpt_v2_path) # test load v1 model load_checkpoint(model_v2, ckpt_v1_path) assert torch.allclose(model_v2.conv0.weight, model_v1_conv0_weight) assert torch.allclose(model_v2.conv1.weight, model_v1_conv1_weight) # test load v2 model load_checkpoint(model_v2, ckpt_v2_path) assert torch.allclose(model_v2.conv0.weight, model_v2_conv0_weight) assert torch.allclose(model_v2.conv1.weight, model_v2_conv1_weight) def test_load_classes_name(): import os import tempfile from mmcv.runner import load_checkpoint, save_checkpoint checkpoint_path = os.path.join(tempfile.gettempdir(), 'checkpoint.pth') model = Model() save_checkpoint(model, checkpoint_path) checkpoint = load_checkpoint(model, checkpoint_path) assert 'meta' in checkpoint and 'CLASSES' not in checkpoint['meta'] model.CLASSES = ('class1', 'class2') save_checkpoint(model, checkpoint_path) checkpoint = load_checkpoint(model, checkpoint_path) assert 'meta' in checkpoint and 'CLASSES' in checkpoint['meta'] assert checkpoint['meta']['CLASSES'] == ('class1', 'class2') model = Model() wrapped_model = DDPWrapper(model) save_checkpoint(wrapped_model, checkpoint_path) checkpoint = load_checkpoint(wrapped_model, checkpoint_path) assert 'meta' in checkpoint and 'CLASSES' not in checkpoint['meta'] wrapped_model.module.CLASSES = ('class1', 'class2') save_checkpoint(wrapped_model, checkpoint_path) checkpoint = load_checkpoint(wrapped_model, checkpoint_path) assert 'meta' in checkpoint and 'CLASSES' in checkpoint['meta'] assert checkpoint['meta']['CLASSES'] == ('class1', 'class2') # remove the temp file os.remove(checkpoint_path) def test_checkpoint_loader(): import os import tempfile from mmcv.runner import CheckpointLoader, _load_checkpoint, save_checkpoint checkpoint_path = os.path.join(tempfile.gettempdir(), 'checkpoint.pth') model = Model() save_checkpoint(model, checkpoint_path) checkpoint = _load_checkpoint(checkpoint_path) assert 'meta' in checkpoint and 'CLASSES' not in checkpoint['meta'] # remove the temp file os.remove(checkpoint_path) filenames = [ 'http://xx.xx/xx.pth', 'https://xx.xx/xx.pth', 'modelzoo://xx.xx/xx.pth', 'torchvision://xx.xx/xx.pth', 'open-mmlab://xx.xx/xx.pth', 'openmmlab://xx.xx/xx.pth', 'mmcls://xx.xx/xx.pth', 'pavi://xx.xx/xx.pth', 's3://xx.xx/xx.pth', 'ss3://xx.xx/xx.pth', ' s3://xx.xx/xx.pth', 'open-mmlab:s3://xx.xx/xx.pth', 'openmmlab:s3://xx.xx/xx.pth', 'openmmlabs3://xx.xx/xx.pth', ':s3://xx.xx/xx.path' ] fn_names = [ 'load_from_http', 'load_from_http', 'load_from_torchvision', 'load_from_torchvision', 'load_from_openmmlab', 'load_from_openmmlab', 'load_from_mmcls', 'load_from_pavi', 'load_from_ceph', 'load_from_local', 'load_from_local', 'load_from_ceph', 'load_from_ceph', 'load_from_local', 'load_from_local' ] for filename, fn_name in zip(filenames, fn_names): loader = CheckpointLoader._get_checkpoint_loader(filename) assert loader.__name__ == fn_name @CheckpointLoader.register_scheme(prefixes='ftp://') def load_from_ftp(filename, map_location): return dict(filename=filename) # test register_loader filename = 'ftp://xx.xx/xx.pth' loader = CheckpointLoader._get_checkpoint_loader(filename) assert loader.__name__ == 'load_from_ftp' def load_from_ftp1(filename, map_location): return dict(filename=filename) # test duplicate registered error with pytest.raises(KeyError): CheckpointLoader.register_scheme('ftp://', load_from_ftp1) # test force param CheckpointLoader.register_scheme('ftp://', load_from_ftp1, force=True) checkpoint = CheckpointLoader.load_checkpoint(filename) assert checkpoint['filename'] == filename # test print function name loader = CheckpointLoader._get_checkpoint_loader(filename) assert loader.__name__ == 'load_from_ftp1' # test sort @CheckpointLoader.register_scheme(prefixes='a/b') def load_from_ab(filename, map_location): return dict(filename=filename) @CheckpointLoader.register_scheme(prefixes='a/b/c') def load_from_abc(filename, map_location): return dict(filename=filename) filename = 'a/b/c/d' loader = CheckpointLoader._get_checkpoint_loader(filename) assert loader.__name__ == 'load_from_abc' def test_save_checkpoint(tmp_path): model = Model() optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9) # meta is not a dict with pytest.raises(TypeError): save_checkpoint(model, '/path/of/your/filename', meta='invalid type') # 1. save to disk filename = str(tmp_path / 'checkpoint1.pth') save_checkpoint(model, filename) filename = str(tmp_path / 'checkpoint2.pth') save_checkpoint(model, filename, optimizer) filename = str(tmp_path / 'checkpoint3.pth') save_checkpoint(model, filename, meta={'test': 'test'}) filename = str(tmp_path / 'checkpoint4.pth') save_checkpoint(model, filename, file_client_args={'backend': 'disk'}) # 2. save to petrel oss with patch.object(PetrelBackend, 'put') as mock_method: filename = 's3://path/of/your/checkpoint1.pth' save_checkpoint(model, filename) mock_method.assert_called() with patch.object(PetrelBackend, 'put') as mock_method: filename = 's3://path//of/your/checkpoint2.pth' save_checkpoint( model, filename, file_client_args={'backend': 'petrel'}) mock_method.assert_called() def test_load_from_local(): import os home_path = os.path.expanduser('~') checkpoint_path = os.path.join( home_path, 'dummy_checkpoint_used_to_test_load_from_local.pth') model = Model() save_checkpoint(model, checkpoint_path) checkpoint = load_from_local( '~/dummy_checkpoint_used_to_test_load_from_local.pth', map_location=None) assert_tensor_equal(checkpoint['state_dict']['block.conv.weight'], model.block.conv.weight) os.remove(checkpoint_path) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_runner/test_dist_utils.py ================================================ import os from unittest.mock import patch import pytest from mmcv.runner import init_dist @patch('torch.cuda.device_count', return_value=1) @patch('torch.cuda.set_device') @patch('torch.distributed.init_process_group') @patch('subprocess.getoutput', return_value='127.0.0.1') def test_init_dist(mock_getoutput, mock_dist_init, mock_set_device, mock_device_count): with pytest.raises(ValueError): # launcher must be one of {'pytorch', 'mpi', 'slurm'} init_dist('invaliad_launcher') # test initialize with slurm launcher os.environ['SLURM_PROCID'] = '0' os.environ['SLURM_NTASKS'] = '1' os.environ['SLURM_NODELIST'] = '[0]' # haven't check the correct form init_dist('slurm') # no port is specified, use default port 29500 assert os.environ['MASTER_PORT'] == '29500' assert os.environ['MASTER_ADDR'] == '127.0.0.1' assert os.environ['WORLD_SIZE'] == '1' assert os.environ['RANK'] == '0' mock_set_device.assert_called_with(0) mock_getoutput.assert_called_with('scontrol show hostname [0] | head -n1') mock_dist_init.assert_called_with(backend='nccl') init_dist('slurm', port=29505) # port is specified with argument 'port' assert os.environ['MASTER_PORT'] == '29505' assert os.environ['MASTER_ADDR'] == '127.0.0.1' assert os.environ['WORLD_SIZE'] == '1' assert os.environ['RANK'] == '0' mock_set_device.assert_called_with(0) mock_getoutput.assert_called_with('scontrol show hostname [0] | head -n1') mock_dist_init.assert_called_with(backend='nccl') init_dist('slurm') # port is specified by environment variable 'MASTER_PORT' assert os.environ['MASTER_PORT'] == '29505' assert os.environ['MASTER_ADDR'] == '127.0.0.1' assert os.environ['WORLD_SIZE'] == '1' assert os.environ['RANK'] == '0' mock_set_device.assert_called_with(0) mock_getoutput.assert_called_with('scontrol show hostname [0] | head -n1') mock_dist_init.assert_called_with(backend='nccl') ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_runner/test_eval_hook.py ================================================ import json import os.path as osp import sys import tempfile import unittest.mock as mock from collections import OrderedDict from unittest.mock import MagicMock, patch import pytest import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader, Dataset from mmcv.fileio.file_client import PetrelBackend from mmcv.runner import DistEvalHook as BaseDistEvalHook from mmcv.runner import EpochBasedRunner from mmcv.runner import EvalHook as BaseEvalHook from mmcv.runner import IterBasedRunner from mmcv.utils import get_logger, scandir sys.modules['petrel_client'] = MagicMock() sys.modules['petrel_client.client'] = MagicMock() class ExampleDataset(Dataset): def __init__(self): self.index = 0 self.eval_result = [1, 4, 3, 7, 2, -3, 4, 6] def __getitem__(self, idx): results = dict(x=torch.tensor([1])) return results def __len__(self): return 1 @mock.create_autospec def evaluate(self, results, logger=None): pass class EvalDataset(ExampleDataset): def evaluate(self, results, logger=None): acc = self.eval_result[self.index] output = OrderedDict( acc=acc, index=self.index, score=acc, loss_top=acc) self.index += 1 return output class Model(nn.Module): def __init__(self): super().__init__() self.param = nn.Parameter(torch.tensor([1.0])) def forward(self, x, **kwargs): return self.param * x def train_step(self, data_batch, optimizer, **kwargs): return {'loss': torch.sum(self(data_batch['x']))} def val_step(self, data_batch, optimizer, **kwargs): return {'loss': torch.sum(self(data_batch['x']))} def _build_epoch_runner(): model = Model() tmp_dir = tempfile.mkdtemp() runner = EpochBasedRunner( model=model, work_dir=tmp_dir, logger=get_logger('demo')) return runner def _build_iter_runner(): model = Model() tmp_dir = tempfile.mkdtemp() runner = IterBasedRunner( model=model, work_dir=tmp_dir, logger=get_logger('demo')) return runner class EvalHook(BaseEvalHook): _default_greater_keys = ['acc', 'top'] _default_less_keys = ['loss', 'loss_top'] def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) class DistEvalHook(BaseDistEvalHook): greater_keys = ['acc', 'top'] less_keys = ['loss', 'loss_top'] def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def test_eval_hook(): with pytest.raises(AssertionError): # `save_best` should be a str test_dataset = Model() data_loader = DataLoader(test_dataset) EvalHook(data_loader, save_best=True) with pytest.raises(TypeError): # dataloader must be a pytorch DataLoader test_dataset = Model() data_loader = [DataLoader(test_dataset)] EvalHook(data_loader) with pytest.raises(ValueError): # key_indicator must be valid when rule_map is None test_dataset = ExampleDataset() data_loader = DataLoader(test_dataset) EvalHook(data_loader, save_best='unsupport') with pytest.raises(KeyError): # rule must be in keys of rule_map test_dataset = ExampleDataset() data_loader = DataLoader(test_dataset) EvalHook(data_loader, save_best='auto', rule='unsupport') # if eval_res is an empty dict, print a warning information with pytest.warns(UserWarning) as record_warnings: class _EvalDataset(ExampleDataset): def evaluate(self, results, logger=None): return {} test_dataset = _EvalDataset() data_loader = DataLoader(test_dataset) eval_hook = EvalHook(data_loader, save_best='auto') runner = _build_epoch_runner() runner.register_hook(eval_hook) runner.run([data_loader], [('train', 1)], 1) # Since there will be many warnings thrown, we just need to check if the # expected exceptions are thrown expected_message = ('Since `eval_res` is an empty dict, the behavior to ' 'save the best checkpoint will be skipped in this ' 'evaluation.') for warning in record_warnings: if str(warning.message) == expected_message: break else: assert False test_dataset = ExampleDataset() loader = DataLoader(test_dataset) model = Model() data_loader = DataLoader(test_dataset) eval_hook = EvalHook(data_loader, save_best=None) with tempfile.TemporaryDirectory() as tmpdir: # total_epochs = 1 logger = get_logger('test_eval') runner = EpochBasedRunner(model=model, work_dir=tmpdir, logger=logger) runner.register_hook(eval_hook) runner.run([loader], [('train', 1)], 1) test_dataset.evaluate.assert_called_with( test_dataset, [torch.tensor([1])], logger=runner.logger) assert runner.meta is None or 'best_score' not in runner.meta[ 'hook_msgs'] assert runner.meta is None or 'best_ckpt' not in runner.meta[ 'hook_msgs'] # when `save_best` is set to 'auto', first metric will be used. loader = DataLoader(EvalDataset()) model = Model() data_loader = DataLoader(EvalDataset()) eval_hook = EvalHook(data_loader, interval=1, save_best='auto') with tempfile.TemporaryDirectory() as tmpdir: logger = get_logger('test_eval') runner = EpochBasedRunner(model=model, work_dir=tmpdir, logger=logger) runner.register_checkpoint_hook(dict(interval=1)) runner.register_hook(eval_hook) runner.run([loader], [('train', 1)], 8) ckpt_path = osp.join(tmpdir, 'best_acc_epoch_4.pth') assert runner.meta['hook_msgs']['best_ckpt'] == ckpt_path assert osp.exists(ckpt_path) assert runner.meta['hook_msgs']['best_score'] == 7 # total_epochs = 8, return the best acc and corresponding epoch loader = DataLoader(EvalDataset()) model = Model() data_loader = DataLoader(EvalDataset()) eval_hook = EvalHook(data_loader, interval=1, save_best='acc') with tempfile.TemporaryDirectory() as tmpdir: logger = get_logger('test_eval') runner = EpochBasedRunner(model=model, work_dir=tmpdir, logger=logger) runner.register_checkpoint_hook(dict(interval=1)) runner.register_hook(eval_hook) runner.run([loader], [('train', 1)], 8) ckpt_path = osp.join(tmpdir, 'best_acc_epoch_4.pth') assert runner.meta['hook_msgs']['best_ckpt'] == ckpt_path assert osp.exists(ckpt_path) assert runner.meta['hook_msgs']['best_score'] == 7 # total_epochs = 8, return the best loss_top and corresponding epoch loader = DataLoader(EvalDataset()) model = Model() data_loader = DataLoader(EvalDataset()) eval_hook = EvalHook(data_loader, interval=1, save_best='loss_top') with tempfile.TemporaryDirectory() as tmpdir: logger = get_logger('test_eval') runner = EpochBasedRunner(model=model, work_dir=tmpdir, logger=logger) runner.register_checkpoint_hook(dict(interval=1)) runner.register_hook(eval_hook) runner.run([loader], [('train', 1)], 8) ckpt_path = osp.join(tmpdir, 'best_loss_top_epoch_6.pth') assert runner.meta['hook_msgs']['best_ckpt'] == ckpt_path assert osp.exists(ckpt_path) assert runner.meta['hook_msgs']['best_score'] == -3 # total_epochs = 8, return the best score and corresponding epoch data_loader = DataLoader(EvalDataset()) eval_hook = EvalHook( data_loader, interval=1, save_best='score', rule='greater') with tempfile.TemporaryDirectory() as tmpdir: logger = get_logger('test_eval') runner = EpochBasedRunner(model=model, work_dir=tmpdir, logger=logger) runner.register_checkpoint_hook(dict(interval=1)) runner.register_hook(eval_hook) runner.run([loader], [('train', 1)], 8) ckpt_path = osp.join(tmpdir, 'best_score_epoch_4.pth') assert runner.meta['hook_msgs']['best_ckpt'] == ckpt_path assert osp.exists(ckpt_path) assert runner.meta['hook_msgs']['best_score'] == 7 # total_epochs = 8, return the best score using less compare func # and indicate corresponding epoch data_loader = DataLoader(EvalDataset()) eval_hook = EvalHook(data_loader, save_best='acc', rule='less') with tempfile.TemporaryDirectory() as tmpdir: logger = get_logger('test_eval') runner = EpochBasedRunner(model=model, work_dir=tmpdir, logger=logger) runner.register_checkpoint_hook(dict(interval=1)) runner.register_hook(eval_hook) runner.run([loader], [('train', 1)], 8) ckpt_path = osp.join(tmpdir, 'best_acc_epoch_6.pth') assert runner.meta['hook_msgs']['best_ckpt'] == ckpt_path assert osp.exists(ckpt_path) assert runner.meta['hook_msgs']['best_score'] == -3 # Test the EvalHook when resume happened data_loader = DataLoader(EvalDataset()) eval_hook = EvalHook(data_loader, save_best='acc') with tempfile.TemporaryDirectory() as tmpdir: logger = get_logger('test_eval') runner = EpochBasedRunner(model=model, work_dir=tmpdir, logger=logger) runner.register_checkpoint_hook(dict(interval=1)) runner.register_hook(eval_hook) runner.run([loader], [('train', 1)], 2) old_ckpt_path = osp.join(tmpdir, 'best_acc_epoch_2.pth') assert runner.meta['hook_msgs']['best_ckpt'] == old_ckpt_path assert osp.exists(old_ckpt_path) assert runner.meta['hook_msgs']['best_score'] == 4 resume_from = old_ckpt_path loader = DataLoader(ExampleDataset()) eval_hook = EvalHook(data_loader, save_best='acc') runner = EpochBasedRunner(model=model, work_dir=tmpdir, logger=logger) runner.register_checkpoint_hook(dict(interval=1)) runner.register_hook(eval_hook) runner.resume(resume_from) assert runner.meta['hook_msgs']['best_ckpt'] == old_ckpt_path assert osp.exists(old_ckpt_path) assert runner.meta['hook_msgs']['best_score'] == 4 runner.run([loader], [('train', 1)], 8) ckpt_path = osp.join(tmpdir, 'best_acc_epoch_4.pth') assert runner.meta['hook_msgs']['best_ckpt'] == ckpt_path assert osp.exists(ckpt_path) assert runner.meta['hook_msgs']['best_score'] == 7 assert not osp.exists(old_ckpt_path) # test EvalHook with customer test_fn and greater/less keys loader = DataLoader(EvalDataset()) model = Model() data_loader = DataLoader(EvalDataset()) eval_hook = EvalHook( data_loader, save_best='acc', test_fn=mock.MagicMock(return_value={}), greater_keys=[], less_keys=['acc']) with tempfile.TemporaryDirectory() as tmpdir: logger = get_logger('test_eval') runner = EpochBasedRunner(model=model, work_dir=tmpdir, logger=logger) runner.register_checkpoint_hook(dict(interval=1)) runner.register_hook(eval_hook) runner.run([loader], [('train', 1)], 8) ckpt_path = osp.join(tmpdir, 'best_acc_epoch_6.pth') assert runner.meta['hook_msgs']['best_ckpt'] == ckpt_path assert osp.exists(ckpt_path) assert runner.meta['hook_msgs']['best_score'] == -3 # test EvalHook with specified `out_dir` loader = DataLoader(EvalDataset()) model = Model() data_loader = DataLoader(EvalDataset()) out_dir = 's3://user/data' eval_hook = EvalHook( data_loader, interval=1, save_best='auto', out_dir=out_dir) with patch.object(PetrelBackend, 'put') as mock_put, \ patch.object(PetrelBackend, 'remove') as mock_remove, \ patch.object(PetrelBackend, 'isfile') as mock_isfile, \ tempfile.TemporaryDirectory() as tmpdir: logger = get_logger('test_eval') runner = EpochBasedRunner(model=model, work_dir=tmpdir, logger=logger) runner.register_checkpoint_hook(dict(interval=1)) runner.register_hook(eval_hook) runner.run([loader], [('train', 1)], 8) basename = osp.basename(runner.work_dir.rstrip(osp.sep)) ckpt_path = f'{out_dir}/{basename}/best_acc_epoch_4.pth' assert runner.meta['hook_msgs']['best_ckpt'] == ckpt_path assert runner.meta['hook_msgs']['best_score'] == 7 assert mock_put.call_count == 3 assert mock_remove.call_count == 2 assert mock_isfile.call_count == 2 @patch('mmcv.engine.single_gpu_test', MagicMock) @patch('mmcv.engine.multi_gpu_test', MagicMock) @pytest.mark.parametrize('EvalHookParam', [EvalHook, DistEvalHook]) @pytest.mark.parametrize('_build_demo_runner,by_epoch', [(_build_epoch_runner, True), (_build_iter_runner, False)]) def test_start_param(EvalHookParam, _build_demo_runner, by_epoch): # create dummy data dataloader = DataLoader(EvalDataset()) # 0.1. dataloader is not a DataLoader object with pytest.raises(TypeError): EvalHookParam(dataloader=MagicMock(), interval=-1) # 0.2. negative interval with pytest.raises(ValueError): EvalHookParam(dataloader, interval=-1) # 0.3. negative start with pytest.raises(ValueError): EvalHookParam(dataloader, start=-1) # 1. start=None, interval=1: perform evaluation after each epoch. runner = _build_demo_runner() evalhook = EvalHookParam(dataloader, interval=1, by_epoch=by_epoch) evalhook.evaluate = MagicMock() runner.register_hook(evalhook) runner.run([dataloader], [('train', 1)], 2) assert evalhook.evaluate.call_count == 2 # after epoch 1 & 2 # 2. start=1, interval=1: perform evaluation after each epoch. runner = _build_demo_runner() evalhook = EvalHookParam( dataloader, start=1, interval=1, by_epoch=by_epoch) evalhook.evaluate = MagicMock() runner.register_hook(evalhook) runner.run([dataloader], [('train', 1)], 2) assert evalhook.evaluate.call_count == 2 # after epoch 1 & 2 # 3. start=None, interval=2: perform evaluation after epoch 2, 4, 6, etc runner = _build_demo_runner() evalhook = EvalHookParam(dataloader, interval=2, by_epoch=by_epoch) evalhook.evaluate = MagicMock() runner.register_hook(evalhook) runner.run([dataloader], [('train', 1)], 2) assert evalhook.evaluate.call_count == 1 # after epoch 2 # 4. start=1, interval=2: perform evaluation after epoch 1, 3, 5, etc runner = _build_demo_runner() evalhook = EvalHookParam( dataloader, start=1, interval=2, by_epoch=by_epoch) evalhook.evaluate = MagicMock() runner.register_hook(evalhook) runner.run([dataloader], [('train', 1)], 3) assert evalhook.evaluate.call_count == 2 # after epoch 1 & 3 # 5. start=0, interval=1: perform evaluation after each epoch and # before epoch 1. runner = _build_demo_runner() evalhook = EvalHookParam(dataloader, start=0, by_epoch=by_epoch) evalhook.evaluate = MagicMock() runner.register_hook(evalhook) runner.run([dataloader], [('train', 1)], 2) assert evalhook.evaluate.call_count == 3 # before epoch1 and after e1 & e2 # 6. resuming from epoch i, start = x (x<=i), interval =1: perform # evaluation after each epoch and before the first epoch. runner = _build_demo_runner() evalhook = EvalHookParam(dataloader, start=1, by_epoch=by_epoch) evalhook.evaluate = MagicMock() runner.register_hook(evalhook) if by_epoch: runner._epoch = 2 else: runner._iter = 2 runner.run([dataloader], [('train', 1)], 3) assert evalhook.evaluate.call_count == 2 # before & after epoch 3 # 7. resuming from epoch i, start = i+1/None, interval =1: perform # evaluation after each epoch. runner = _build_demo_runner() evalhook = EvalHookParam(dataloader, start=2, by_epoch=by_epoch) evalhook.evaluate = MagicMock() runner.register_hook(evalhook) if by_epoch: runner._epoch = 1 else: runner._iter = 1 runner.run([dataloader], [('train', 1)], 3) assert evalhook.evaluate.call_count == 2 # after epoch 2 & 3 @pytest.mark.parametrize('runner,by_epoch,eval_hook_priority', [(EpochBasedRunner, True, 'NORMAL'), (EpochBasedRunner, True, 'LOW'), (IterBasedRunner, False, 'LOW')]) def test_logger(runner, by_epoch, eval_hook_priority): loader = DataLoader(EvalDataset()) model = Model() data_loader = DataLoader(EvalDataset()) eval_hook = EvalHook( data_loader, interval=1, by_epoch=by_epoch, save_best='acc') with tempfile.TemporaryDirectory() as tmpdir: logger = get_logger('test_logger') optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) runner = EpochBasedRunner( model=model, optimizer=optimizer, work_dir=tmpdir, logger=logger) runner.register_logger_hooks( dict( interval=1, hooks=[dict(type='TextLoggerHook', by_epoch=by_epoch)])) runner.register_timer_hook(dict(type='IterTimerHook')) runner.register_hook(eval_hook, priority=eval_hook_priority) runner.run([loader], [('train', 1)], 1) path = osp.join(tmpdir, next(scandir(tmpdir, '.json'))) with open(path) as fr: fr.readline() # skip the first line which is `hook_msg` train_log = json.loads(fr.readline()) assert train_log['mode'] == 'train' and 'time' in train_log val_log = json.loads(fr.readline()) assert val_log['mode'] == 'val' and 'time' not in val_log ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_runner/test_fp16.py ================================================ import numpy as np import pytest import torch import torch.nn as nn from mmcv.runner.fp16_utils import auto_fp16, cast_tensor_type, force_fp32 def test_cast_tensor_type(): inputs = torch.FloatTensor([5.]) src_type = torch.float32 dst_type = torch.int32 outputs = cast_tensor_type(inputs, src_type, dst_type) assert isinstance(outputs, torch.Tensor) assert outputs.dtype == dst_type # convert torch.float to torch.half inputs = torch.FloatTensor([5.]) src_type = torch.float dst_type = torch.half outputs = cast_tensor_type(inputs, src_type, dst_type) assert isinstance(outputs, torch.Tensor) assert outputs.dtype == dst_type # skip the conversion when the type of input is not the same as src_type inputs = torch.IntTensor([5]) src_type = torch.float dst_type = torch.half outputs = cast_tensor_type(inputs, src_type, dst_type) assert isinstance(outputs, torch.Tensor) assert outputs.dtype == inputs.dtype inputs = 'tensor' src_type = str dst_type = str outputs = cast_tensor_type(inputs, src_type, dst_type) assert isinstance(outputs, str) inputs = np.array([5.]) src_type = np.ndarray dst_type = np.ndarray outputs = cast_tensor_type(inputs, src_type, dst_type) assert isinstance(outputs, np.ndarray) inputs = dict( tensor_a=torch.FloatTensor([1.]), tensor_b=torch.FloatTensor([2.])) src_type = torch.float32 dst_type = torch.int32 outputs = cast_tensor_type(inputs, src_type, dst_type) assert isinstance(outputs, dict) assert outputs['tensor_a'].dtype == dst_type assert outputs['tensor_b'].dtype == dst_type inputs = [torch.FloatTensor([1.]), torch.FloatTensor([2.])] src_type = torch.float32 dst_type = torch.int32 outputs = cast_tensor_type(inputs, src_type, dst_type) assert isinstance(outputs, list) assert outputs[0].dtype == dst_type assert outputs[1].dtype == dst_type inputs = 5 outputs = cast_tensor_type(inputs, None, None) assert isinstance(outputs, int) def test_auto_fp16(): with pytest.raises(TypeError): # ExampleObject is not a subclass of nn.Module class ExampleObject(object): @auto_fp16() def __call__(self, x): return x model = ExampleObject() input_x = torch.ones(1, dtype=torch.float32) model(input_x) # apply to all input args class ExampleModule(nn.Module): @auto_fp16() def forward(self, x, y): return x, y model = ExampleModule() input_x = torch.ones(1, dtype=torch.float32) input_y = torch.ones(1, dtype=torch.float32) output_x, output_y = model(input_x, input_y) assert output_x.dtype == torch.float32 assert output_y.dtype == torch.float32 model.fp16_enabled = True output_x, output_y = model(input_x, input_y) assert output_x.dtype == torch.half assert output_y.dtype == torch.half if torch.cuda.is_available(): model.cuda() output_x, output_y = model(input_x.cuda(), input_y.cuda()) assert output_x.dtype == torch.half assert output_y.dtype == torch.half # apply to specified input args class ExampleModule(nn.Module): @auto_fp16(apply_to=('x', )) def forward(self, x, y): return x, y model = ExampleModule() input_x = torch.ones(1, dtype=torch.float32) input_y = torch.ones(1, dtype=torch.float32) output_x, output_y = model(input_x, input_y) assert output_x.dtype == torch.float32 assert output_y.dtype == torch.float32 model.fp16_enabled = True output_x, output_y = model(input_x, input_y) assert output_x.dtype == torch.half assert output_y.dtype == torch.float32 if torch.cuda.is_available(): model.cuda() output_x, output_y = model(input_x.cuda(), input_y.cuda()) assert output_x.dtype == torch.half assert output_y.dtype == torch.float32 # apply to optional input args class ExampleModule(nn.Module): @auto_fp16(apply_to=('x', 'y')) def forward(self, x, y=None, z=None): return x, y, z model = ExampleModule() input_x = torch.ones(1, dtype=torch.float32) input_y = torch.ones(1, dtype=torch.float32) input_z = torch.ones(1, dtype=torch.float32) output_x, output_y, output_z = model(input_x, y=input_y, z=input_z) assert output_x.dtype == torch.float32 assert output_y.dtype == torch.float32 assert output_z.dtype == torch.float32 model.fp16_enabled = True output_x, output_y, output_z = model(input_x, y=input_y, z=input_z) assert output_x.dtype == torch.half assert output_y.dtype == torch.half assert output_z.dtype == torch.float32 if torch.cuda.is_available(): model.cuda() output_x, output_y, output_z = model( input_x.cuda(), y=input_y.cuda(), z=input_z.cuda()) assert output_x.dtype == torch.half assert output_y.dtype == torch.half assert output_z.dtype == torch.float32 # out_fp32=True class ExampleModule(nn.Module): @auto_fp16(apply_to=('x', 'y'), out_fp32=True) def forward(self, x, y=None, z=None): return x, y, z model = ExampleModule() input_x = torch.ones(1, dtype=torch.half) input_y = torch.ones(1, dtype=torch.float32) input_z = torch.ones(1, dtype=torch.float32) output_x, output_y, output_z = model(input_x, y=input_y, z=input_z) assert output_x.dtype == torch.half assert output_y.dtype == torch.float32 assert output_z.dtype == torch.float32 model.fp16_enabled = True output_x, output_y, output_z = model(input_x, y=input_y, z=input_z) assert output_x.dtype == torch.float32 assert output_y.dtype == torch.float32 assert output_z.dtype == torch.float32 if torch.cuda.is_available(): model.cuda() output_x, output_y, output_z = model( input_x.cuda(), y=input_y.cuda(), z=input_z.cuda()) assert output_x.dtype == torch.float32 assert output_y.dtype == torch.float32 assert output_z.dtype == torch.float32 def test_force_fp32(): with pytest.raises(TypeError): # ExampleObject is not a subclass of nn.Module class ExampleObject(object): @force_fp32() def __call__(self, x): return x model = ExampleObject() input_x = torch.ones(1, dtype=torch.float32) model(input_x) # apply to all input args class ExampleModule(nn.Module): @force_fp32() def forward(self, x, y): return x, y model = ExampleModule() input_x = torch.ones(1, dtype=torch.half) input_y = torch.ones(1, dtype=torch.half) output_x, output_y = model(input_x, input_y) assert output_x.dtype == torch.half assert output_y.dtype == torch.half model.fp16_enabled = True output_x, output_y = model(input_x, input_y) assert output_x.dtype == torch.float32 assert output_y.dtype == torch.float32 if torch.cuda.is_available(): model.cuda() output_x, output_y = model(input_x.cuda(), input_y.cuda()) assert output_x.dtype == torch.float32 assert output_y.dtype == torch.float32 # apply to specified input args class ExampleModule(nn.Module): @force_fp32(apply_to=('x', )) def forward(self, x, y): return x, y model = ExampleModule() input_x = torch.ones(1, dtype=torch.half) input_y = torch.ones(1, dtype=torch.half) output_x, output_y = model(input_x, input_y) assert output_x.dtype == torch.half assert output_y.dtype == torch.half model.fp16_enabled = True output_x, output_y = model(input_x, input_y) assert output_x.dtype == torch.float32 assert output_y.dtype == torch.half if torch.cuda.is_available(): model.cuda() output_x, output_y = model(input_x.cuda(), input_y.cuda()) assert output_x.dtype == torch.float32 assert output_y.dtype == torch.half # apply to optional input args class ExampleModule(nn.Module): @force_fp32(apply_to=('x', 'y')) def forward(self, x, y=None, z=None): return x, y, z model = ExampleModule() input_x = torch.ones(1, dtype=torch.half) input_y = torch.ones(1, dtype=torch.half) input_z = torch.ones(1, dtype=torch.half) output_x, output_y, output_z = model(input_x, y=input_y, z=input_z) assert output_x.dtype == torch.half assert output_y.dtype == torch.half assert output_z.dtype == torch.half model.fp16_enabled = True output_x, output_y, output_z = model(input_x, y=input_y, z=input_z) assert output_x.dtype == torch.float32 assert output_y.dtype == torch.float32 assert output_z.dtype == torch.half if torch.cuda.is_available(): model.cuda() output_x, output_y, output_z = model( input_x.cuda(), y=input_y.cuda(), z=input_z.cuda()) assert output_x.dtype == torch.float32 assert output_y.dtype == torch.float32 assert output_z.dtype == torch.half # out_fp16=True class ExampleModule(nn.Module): @force_fp32(apply_to=('x', 'y'), out_fp16=True) def forward(self, x, y=None, z=None): return x, y, z model = ExampleModule() input_x = torch.ones(1, dtype=torch.float32) input_y = torch.ones(1, dtype=torch.half) input_z = torch.ones(1, dtype=torch.half) output_x, output_y, output_z = model(input_x, y=input_y, z=input_z) assert output_x.dtype == torch.float32 assert output_y.dtype == torch.half assert output_z.dtype == torch.half model.fp16_enabled = True output_x, output_y, output_z = model(input_x, y=input_y, z=input_z) assert output_x.dtype == torch.half assert output_y.dtype == torch.half assert output_z.dtype == torch.half if torch.cuda.is_available(): model.cuda() output_x, output_y, output_z = model( input_x.cuda(), y=input_y.cuda(), z=input_z.cuda()) assert output_x.dtype == torch.half assert output_y.dtype == torch.half assert output_z.dtype == torch.half ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_runner/test_hooks.py ================================================ """Tests the hooks with runners. CommandLine: pytest tests/test_runner/test_hooks.py xdoctest tests/test_hooks.py zero """ import logging import os.path as osp import platform import random import re import shutil import sys import tempfile from unittest.mock import MagicMock, Mock, call, patch import pytest import torch import torch.nn as nn from torch.nn.init import constant_ from torch.utils.data import DataLoader from mmcv.fileio.file_client import PetrelBackend from mmcv.runner import (CheckpointHook, DvcliveLoggerHook, EMAHook, Fp16OptimizerHook, GradientCumulativeFp16OptimizerHook, GradientCumulativeOptimizerHook, IterTimerHook, MlflowLoggerHook, NeptuneLoggerHook, OptimizerHook, PaviLoggerHook, WandbLoggerHook, build_runner) from mmcv.runner.fp16_utils import auto_fp16 from mmcv.runner.hooks.hook import HOOKS, Hook from mmcv.runner.hooks.lr_updater import (CosineRestartLrUpdaterHook, CyclicLrUpdaterHook, FlatCosineAnnealingLrUpdaterHook, OneCycleLrUpdaterHook, StepLrUpdaterHook) from mmcv.utils import TORCH_VERSION sys.modules['petrel_client'] = MagicMock() sys.modules['petrel_client.client'] = MagicMock() def test_optimizerhook(): class Model(nn.Module): def __init__(self): super().__init__() self.conv1 = nn.Conv2d( in_channels=1, out_channels=2, kernel_size=3, stride=1, padding=1, dilation=1) self.conv2 = nn.Conv2d( in_channels=2, out_channels=2, kernel_size=3, stride=1, padding=1, dilation=1) self.conv3 = nn.Conv2d( in_channels=1, out_channels=2, kernel_size=3, stride=1, padding=1, dilation=1) def forward(self, x): x1 = self.conv1(x) x2 = self.conv2(x1) return x1, x2 model = Model() x = torch.rand(1, 1, 3, 3) dummy_runner = Mock() dummy_runner.optimizer.zero_grad = Mock(return_value=None) dummy_runner.optimizer.step = Mock(return_value=None) dummy_runner.model = model dummy_runner.outputs = dict() dummy_runner.outputs['num_samples'] = 0 class DummyLogger(): def __init__(self): self.msg = '' def log(self, msg=None, **kwargs): self.msg += msg dummy_runner.logger = DummyLogger() optimizer_hook = OptimizerHook( dict(max_norm=2), detect_anomalous_params=True) dummy_runner.outputs['loss'] = model(x)[0].sum() optimizer_hook.after_train_iter(dummy_runner) # assert the parameters of conv2 and conv3 are not in the # computational graph which is with x1.sum() as root. assert 'conv2.weight' in dummy_runner.logger.msg assert 'conv2.bias' in dummy_runner.logger.msg assert 'conv3.weight' in dummy_runner.logger.msg assert 'conv3.bias' in dummy_runner.logger.msg assert 'conv1.weight' not in dummy_runner.logger.msg assert 'conv1.bias' not in dummy_runner.logger.msg dummy_runner.outputs['loss'] = model(x)[1].sum() dummy_runner.logger.msg = '' optimizer_hook.after_train_iter(dummy_runner) # assert the parameters of conv3 are not in the computational graph assert 'conv3.weight' in dummy_runner.logger.msg assert 'conv3.bias' in dummy_runner.logger.msg assert 'conv2.weight' not in dummy_runner.logger.msg assert 'conv2.bias' not in dummy_runner.logger.msg assert 'conv1.weight' not in dummy_runner.logger.msg assert 'conv1.bias' not in dummy_runner.logger.msg def test_checkpoint_hook(tmp_path): """xdoctest -m tests/test_runner/test_hooks.py test_checkpoint_hook.""" # test epoch based runner loader = DataLoader(torch.ones((5, 2))) runner = _build_demo_runner('EpochBasedRunner', max_epochs=1) runner.meta = dict() checkpointhook = CheckpointHook(interval=1, by_epoch=True) runner.register_hook(checkpointhook) runner.run([loader], [('train', 1)]) assert runner.meta['hook_msgs']['last_ckpt'] == osp.join( runner.work_dir, 'epoch_1.pth') shutil.rmtree(runner.work_dir) # test petrel oss when the type of runner is `EpochBasedRunner` runner = _build_demo_runner('EpochBasedRunner', max_epochs=4) runner.meta = dict() out_dir = 's3://user/data' with patch.object(PetrelBackend, 'put') as mock_put, \ patch.object(PetrelBackend, 'remove') as mock_remove, \ patch.object(PetrelBackend, 'isfile') as mock_isfile: checkpointhook = CheckpointHook( interval=1, out_dir=out_dir, by_epoch=True, max_keep_ckpts=2) runner.register_hook(checkpointhook) runner.run([loader], [('train', 1)]) basename = osp.basename(runner.work_dir.rstrip(osp.sep)) assert runner.meta['hook_msgs']['last_ckpt'] == \ '/'.join([out_dir, basename, 'epoch_4.pth']) mock_put.assert_called() mock_remove.assert_called() mock_isfile.assert_called() shutil.rmtree(runner.work_dir) # test iter based runner runner = _build_demo_runner( 'IterBasedRunner', max_iters=1, max_epochs=None) runner.meta = dict() checkpointhook = CheckpointHook(interval=1, by_epoch=False) runner.register_hook(checkpointhook) runner.run([loader], [('train', 1)]) assert runner.meta['hook_msgs']['last_ckpt'] == osp.join( runner.work_dir, 'iter_1.pth') shutil.rmtree(runner.work_dir) # test petrel oss when the type of runner is `IterBasedRunner` runner = _build_demo_runner( 'IterBasedRunner', max_iters=4, max_epochs=None) runner.meta = dict() out_dir = 's3://user/data' with patch.object(PetrelBackend, 'put') as mock_put, \ patch.object(PetrelBackend, 'remove') as mock_remove, \ patch.object(PetrelBackend, 'isfile') as mock_isfile: checkpointhook = CheckpointHook( interval=1, out_dir=out_dir, by_epoch=False, max_keep_ckpts=2) runner.register_hook(checkpointhook) runner.run([loader], [('train', 1)]) basename = osp.basename(runner.work_dir.rstrip(osp.sep)) assert runner.meta['hook_msgs']['last_ckpt'] == \ '/'.join([out_dir, basename, 'iter_4.pth']) mock_put.assert_called() mock_remove.assert_called() mock_isfile.assert_called() shutil.rmtree(runner.work_dir) def test_ema_hook(): """xdoctest -m tests/test_hooks.py test_ema_hook.""" class DemoModel(nn.Module): def __init__(self): super().__init__() self.conv = nn.Conv2d( in_channels=1, out_channels=2, kernel_size=1, padding=1, bias=True) self._init_weight() def _init_weight(self): constant_(self.conv.weight, 0) constant_(self.conv.bias, 0) def forward(self, x): return self.conv(x).sum() def train_step(self, x, optimizer, **kwargs): return dict(loss=self(x)) def val_step(self, x, optimizer, **kwargs): return dict(loss=self(x)) loader = DataLoader(torch.ones((1, 1, 1, 1))) runner = _build_demo_runner() demo_model = DemoModel() runner.model = demo_model emahook = EMAHook(momentum=0.1, interval=2, warm_up=100, resume_from=None) checkpointhook = CheckpointHook(interval=1, by_epoch=True) runner.register_hook(emahook, priority='HIGHEST') runner.register_hook(checkpointhook) runner.run([loader, loader], [('train', 1), ('val', 1)]) checkpoint = torch.load(f'{runner.work_dir}/epoch_1.pth') contain_ema_buffer = False for name, value in checkpoint['state_dict'].items(): if 'ema' in name: contain_ema_buffer = True assert value.sum() == 0 value.fill_(1) else: assert value.sum() == 0 assert contain_ema_buffer torch.save(checkpoint, f'{runner.work_dir}/epoch_1.pth') work_dir = runner.work_dir resume_ema_hook = EMAHook( momentum=0.5, warm_up=0, resume_from=f'{work_dir}/epoch_1.pth') runner = _build_demo_runner(max_epochs=2) runner.model = demo_model runner.register_hook(resume_ema_hook, priority='HIGHEST') checkpointhook = CheckpointHook(interval=1, by_epoch=True) runner.register_hook(checkpointhook) runner.run([loader, loader], [('train', 1), ('val', 1)]) checkpoint = torch.load(f'{runner.work_dir}/epoch_2.pth') contain_ema_buffer = False for name, value in checkpoint['state_dict'].items(): if 'ema' in name: contain_ema_buffer = True assert value.sum() == 2 else: assert value.sum() == 1 assert contain_ema_buffer shutil.rmtree(runner.work_dir) shutil.rmtree(work_dir) def test_custom_hook(): @HOOKS.register_module() class ToyHook(Hook): def __init__(self, info, *args, **kwargs): super().__init__() self.info = info runner = _build_demo_runner_without_hook('EpochBasedRunner', max_epochs=1) # test if custom_hooks is None runner.register_custom_hooks(None) assert len(runner.hooks) == 0 # test if custom_hooks is dict list custom_hooks_cfg = [ dict(type='ToyHook', priority=51, info=51), dict(type='ToyHook', priority=49, info=49) ] runner.register_custom_hooks(custom_hooks_cfg) assert [hook.info for hook in runner.hooks] == [49, 51] # test if custom_hooks is object and without priority runner.register_custom_hooks(ToyHook(info='default')) assert len(runner.hooks) == 3 and runner.hooks[1].info == 'default' shutil.rmtree(runner.work_dir) runner = _build_demo_runner_without_hook('EpochBasedRunner', max_epochs=1) # test custom_hooks with string priority setting priority_ranks = [ 'HIGHEST', 'VERY_HIGH', 'HIGH', 'ABOVE_NORMAL', 'NORMAL', 'BELOW_NORMAL', 'LOW', 'VERY_LOW', 'LOWEST' ] random_priority_ranks = priority_ranks.copy() random.shuffle(random_priority_ranks) custom_hooks_cfg = [ dict(type='ToyHook', priority=rank, info=rank) for rank in random_priority_ranks ] runner.register_custom_hooks(custom_hooks_cfg) assert [hook.info for hook in runner.hooks] == priority_ranks shutil.rmtree(runner.work_dir) runner = _build_demo_runner_without_hook('EpochBasedRunner', max_epochs=1) # test register_training_hooks order custom_hooks_cfg = [ dict(type='ToyHook', priority=1, info='custom 1'), dict(type='ToyHook', priority='NORMAL', info='custom normal'), dict(type='ToyHook', priority=89, info='custom 89') ] runner.register_training_hooks( lr_config=ToyHook('lr'), optimizer_config=ToyHook('optimizer'), checkpoint_config=ToyHook('checkpoint'), log_config=dict(interval=1, hooks=[dict(type='ToyHook', info='log')]), momentum_config=ToyHook('momentum'), timer_config=ToyHook('timer'), custom_hooks_config=custom_hooks_cfg) # If custom hooks have same priority with default hooks, custom hooks # will be triggered after default hooks. hooks_order = [ 'custom 1', 'lr', 'momentum', 'optimizer', 'checkpoint', 'custom normal', 'timer', 'custom 89', 'log' ] assert [hook.info for hook in runner.hooks] == hooks_order shutil.rmtree(runner.work_dir) def test_pavi_hook(): sys.modules['pavi'] = MagicMock() loader = DataLoader(torch.ones((5, 2))) runner = _build_demo_runner() runner.meta = dict(config_dict=dict(lr=0.02, gpu_ids=range(1))) hook = PaviLoggerHook(add_graph=False, add_last_ckpt=True) runner.register_hook(hook) runner.run([loader, loader], [('train', 1), ('val', 1)]) shutil.rmtree(runner.work_dir) assert hasattr(hook, 'writer') hook.writer.add_scalars.assert_called_with('val', { 'learning_rate': 0.02, 'momentum': 0.95 }, 1) # in Windows environment, the latest checkpoint is copied from epoch_1.pth if platform.system() == 'Windows': snapshot_file_path = osp.join(runner.work_dir, 'latest.pth') else: snapshot_file_path = osp.join(runner.work_dir, 'epoch_1.pth') hook.writer.add_snapshot_file.assert_called_with( tag=runner.work_dir.split('/')[-1], snapshot_file_path=snapshot_file_path, iteration=1) def test_sync_buffers_hook(): loader = DataLoader(torch.ones((5, 2))) runner = _build_demo_runner() runner.register_hook_from_cfg(dict(type='SyncBuffersHook')) runner.run([loader, loader], [('train', 1), ('val', 1)]) shutil.rmtree(runner.work_dir) @pytest.mark.parametrize('multi_optimizers, max_iters, gamma, cyclic_times', [(True, 8, 1, 1), (False, 8, 0.5, 2)]) def test_momentum_runner_hook(multi_optimizers, max_iters, gamma, cyclic_times): """xdoctest -m tests/test_hooks.py test_momentum_runner_hook.""" sys.modules['pavi'] = MagicMock() loader = DataLoader(torch.ones((10, 2))) runner = _build_demo_runner(multi_optimizers=multi_optimizers) # add momentum scheduler hook_cfg = dict( type='CyclicMomentumUpdaterHook', by_epoch=False, target_ratio=(0.85 / 0.95, 1), cyclic_times=cyclic_times, step_ratio_up=0.4, gamma=gamma) runner.register_hook_from_cfg(hook_cfg) # add momentum LR scheduler hook_cfg = dict( type='CyclicLrUpdaterHook', by_epoch=False, target_ratio=(10, 1), cyclic_times=1, step_ratio_up=0.4) runner.register_hook_from_cfg(hook_cfg) runner.register_hook_from_cfg(dict(type='IterTimerHook')) # add pavi hook hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True) runner.register_hook(hook) runner.run([loader], [('train', 1)]) shutil.rmtree(runner.work_dir) # TODO: use a more elegant way to check values assert hasattr(hook, 'writer') if multi_optimizers: calls = [ call( 'train', { 'learning_rate/model1': 0.01999999999999999, 'learning_rate/model2': 0.009999999999999995, 'momentum/model1': 0.95, 'momentum/model2': 0.9, }, 1), call( 'train', { 'learning_rate/model1': 0.2, 'learning_rate/model2': 0.1, 'momentum/model1': 0.85, 'momentum/model2': 0.8052631578947369, }, 5), call( 'train', { 'learning_rate/model1': 0.155, 'learning_rate/model2': 0.0775, 'momentum/model1': 0.875, 'momentum/model2': 0.8289473684210527, }, 7) ] else: calls = [ call('train', { 'learning_rate': 0.01999999999999999, 'momentum': 0.95 }, 1), call('train', { 'learning_rate': 0.11, 'momentum': 0.85 }, 3), call('train', { 'learning_rate': 0.1879422863405995, 'momentum': 0.95 }, 6), call('train', { 'learning_rate': 0.11000000000000001, 'momentum': 0.9 }, 8), ] hook.writer.add_scalars.assert_has_calls(calls, any_order=True) # test constant momentum warmup sys.modules['pavi'] = MagicMock() runner = _build_demo_runner(multi_optimizers=multi_optimizers) # add momentum scheduler hook_cfg = dict( type='StepMomentumUpdaterHook', by_epoch=False, warmup='constant', warmup_iters=5, warmup_ratio=0.5, step=[10], ) runner.register_hook_from_cfg(hook_cfg) runner.register_hook_from_cfg(dict(type='IterTimerHook')) hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True) runner.register_hook(hook) runner.run([loader], [('train', 1)]) shutil.rmtree(runner.work_dir) assert hasattr(hook, 'writer') if multi_optimizers: calls = [ call( 'train', { 'learning_rate/model1': 0.02, 'learning_rate/model2': 0.01, 'momentum/model1': 1.9, 'momentum/model2': 1.8, }, 1), call( 'train', { 'learning_rate/model1': 0.02, 'learning_rate/model2': 0.01, 'momentum/model1': 1.9, 'momentum/model2': 1.8, }, 5), call( 'train', { 'learning_rate/model1': 0.02, 'learning_rate/model2': 0.01, 'momentum/model1': 0.95, 'momentum/model2': 0.9, }, 10), ] else: calls = [ call('train', { 'learning_rate': 0.02, 'momentum': 1.9 }, 1), call('train', { 'learning_rate': 0.02, 'momentum': 1.9 }, 5), call('train', { 'learning_rate': 0.02, 'momentum': 0.95 }, 10), ] hook.writer.add_scalars.assert_has_calls(calls, any_order=True) # test linear momentum warmup sys.modules['pavi'] = MagicMock() runner = _build_demo_runner(multi_optimizers=multi_optimizers) # add momentum scheduler hook_cfg = dict( type='StepMomentumUpdaterHook', by_epoch=False, warmup='linear', warmup_iters=5, warmup_ratio=0.5, step=[10], ) runner.register_hook_from_cfg(hook_cfg) runner.register_hook_from_cfg(dict(type='IterTimerHook')) hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True) runner.register_hook(hook) runner.run([loader], [('train', 1)]) shutil.rmtree(runner.work_dir) assert hasattr(hook, 'writer') if multi_optimizers: calls = [ call( 'train', { 'learning_rate/model1': 0.02, 'learning_rate/model2': 0.01, 'momentum/model1': 1.9, 'momentum/model2': 1.8, }, 1), call( 'train', { 'learning_rate/model1': 0.02, 'learning_rate/model2': 0.01, 'momentum/model1': 1.3571428571428572, 'momentum/model2': 1.2857142857142858, }, 3), call( 'train', { 'learning_rate/model1': 0.02, 'learning_rate/model2': 0.01, 'momentum/model1': 0.95, 'momentum/model2': 0.9, }, 10), ] else: calls = [ call('train', { 'learning_rate': 0.02, 'momentum': 1.9 }, 1), call('train', { 'learning_rate': 0.02, 'momentum': 1.3571428571428572 }, 3), call('train', { 'learning_rate': 0.02, 'momentum': 0.95 }, 10), ] hook.writer.add_scalars.assert_has_calls(calls, any_order=True) # test exponentially momentum warmup sys.modules['pavi'] = MagicMock() runner = _build_demo_runner(multi_optimizers=multi_optimizers) # add momentum scheduler hook_cfg = dict( type='StepMomentumUpdaterHook', by_epoch=False, warmup='exp', warmup_iters=5, warmup_ratio=0.5, step=[10], ) runner.register_hook_from_cfg(hook_cfg) runner.register_hook_from_cfg(dict(type='IterTimerHook')) hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True) runner.register_hook(hook) runner.run([loader], [('train', 1)]) shutil.rmtree(runner.work_dir) assert hasattr(hook, 'writer') if multi_optimizers: calls = [ call( 'train', { 'learning_rate/model1': 0.02, 'learning_rate/model2': 0.01, 'momentum/model1': 1.9, 'momentum/model2': 1.8, }, 1), call( 'train', { 'learning_rate/model1': 0.02, 'learning_rate/model2': 0.01, 'momentum/model1': 1.4399307381848783, 'momentum/model2': 1.3641449098593583, }, 3), call( 'train', { 'learning_rate/model1': 0.02, 'learning_rate/model2': 0.01, 'momentum/model1': 0.95, 'momentum/model2': 0.9, }, 10), ] else: calls = [ call('train', { 'learning_rate': 0.02, 'momentum': 1.9 }, 1), call('train', { 'learning_rate': 0.02, 'momentum': 1.4399307381848783 }, 3), call('train', { 'learning_rate': 0.02, 'momentum': 0.95 }, 10), ] hook.writer.add_scalars.assert_has_calls(calls, any_order=True) @pytest.mark.parametrize('multi_optimizers', (True, False)) def test_cosine_runner_hook(multi_optimizers): """xdoctest -m tests/test_hooks.py test_cosine_runner_hook.""" sys.modules['pavi'] = MagicMock() loader = DataLoader(torch.ones((10, 2))) runner = _build_demo_runner(multi_optimizers=multi_optimizers) # add momentum scheduler hook_cfg = dict( type='CosineAnnealingMomentumUpdaterHook', min_momentum_ratio=0.99 / 0.95, by_epoch=False, warmup_iters=2, warmup_ratio=0.9 / 0.95) runner.register_hook_from_cfg(hook_cfg) # add momentum LR scheduler hook_cfg = dict( type='CosineAnnealingLrUpdaterHook', by_epoch=False, min_lr_ratio=0, warmup_iters=2, warmup_ratio=0.9) runner.register_hook_from_cfg(hook_cfg) runner.register_hook_from_cfg(dict(type='IterTimerHook')) runner.register_hook(IterTimerHook()) # add pavi hook hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True) runner.register_hook(hook) runner.run([loader], [('train', 1)]) shutil.rmtree(runner.work_dir) # TODO: use a more elegant way to check values assert hasattr(hook, 'writer') if multi_optimizers: calls = [ call( 'train', { 'learning_rate/model1': 0.02, 'learning_rate/model2': 0.01, 'momentum/model1': 0.95, 'momentum/model2': 0.9, }, 1), call( 'train', { 'learning_rate/model1': 0.01, 'learning_rate/model2': 0.005, 'momentum/model1': 0.97, 'momentum/model2': 0.9189473684210527, }, 6), call( 'train', { 'learning_rate/model1': 0.0004894348370484647, 'learning_rate/model2': 0.00024471741852423234, 'momentum/model1': 0.9890211303259032, 'momentum/model2': 0.9369673866245399, }, 10) ] else: calls = [ call('train', { 'learning_rate': 0.02, 'momentum': 0.95 }, 1), call('train', { 'learning_rate': 0.01, 'momentum': 0.97 }, 6), call( 'train', { 'learning_rate': 0.0004894348370484647, 'momentum': 0.9890211303259032 }, 10) ] hook.writer.add_scalars.assert_has_calls(calls, any_order=True) @pytest.mark.parametrize('multi_optimizers, by_epoch', [(False, False), (True, False), (False, True), (True, True)]) def test_flat_cosine_runner_hook(multi_optimizers, by_epoch): """xdoctest -m tests/test_hooks.py test_flat_cosine_runner_hook.""" sys.modules['pavi'] = MagicMock() loader = DataLoader(torch.ones((10, 2))) max_epochs = 10 if by_epoch else 1 runner = _build_demo_runner( multi_optimizers=multi_optimizers, max_epochs=max_epochs) with pytest.raises(ValueError): # start_percent: expected float between 0 and 1 FlatCosineAnnealingLrUpdaterHook(start_percent=-0.1, min_lr_ratio=0) # add LR scheduler hook_cfg = dict( type='FlatCosineAnnealingLrUpdaterHook', by_epoch=by_epoch, min_lr_ratio=0, warmup='linear', warmup_iters=10 if by_epoch else 2, warmup_ratio=0.9, start_percent=0.5) runner.register_hook_from_cfg(hook_cfg) runner.register_hook_from_cfg(dict(type='IterTimerHook')) runner.register_hook(IterTimerHook()) # add pavi hook hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True) runner.register_hook(hook) runner.run([loader], [('train', 1)]) shutil.rmtree(runner.work_dir) # TODO: use a more elegant way to check values assert hasattr(hook, 'writer') if multi_optimizers: if by_epoch: calls = [ call( 'train', { 'learning_rate/model1': 0.018000000000000002, 'learning_rate/model2': 0.009000000000000001, 'momentum/model1': 0.95, 'momentum/model2': 0.9, }, 1), call( 'train', { 'learning_rate/model1': 0.02, 'learning_rate/model2': 0.01, 'momentum/model1': 0.95, 'momentum/model2': 0.9, }, 11), call( 'train', { 'learning_rate/model1': 0.018090169943749474, 'learning_rate/model2': 0.009045084971874737, 'momentum/model1': 0.95, 'momentum/model2': 0.9, }, 61), call( 'train', { 'learning_rate/model1': 0.0019098300562505265, 'learning_rate/model2': 0.0009549150281252633, 'momentum/model1': 0.95, 'momentum/model2': 0.9, }, 100) ] else: calls = [ call( 'train', { 'learning_rate/model1': 0.018000000000000002, 'learning_rate/model2': 0.009000000000000001, 'momentum/model1': 0.95, 'momentum/model2': 0.9 }, 1), call( 'train', { 'learning_rate/model1': 0.02, 'learning_rate/model2': 0.01, 'momentum/model1': 0.95, 'momentum/model2': 0.9 }, 6), call( 'train', { 'learning_rate/model1': 0.018090169943749474, 'learning_rate/model2': 0.009045084971874737, 'momentum/model1': 0.95, 'momentum/model2': 0.9 }, 7), call( 'train', { 'learning_rate/model1': 0.0019098300562505265, 'learning_rate/model2': 0.0009549150281252633, 'momentum/model1': 0.95, 'momentum/model2': 0.9 }, 10) ] else: if by_epoch: calls = [ call('train', { 'learning_rate': 0.018000000000000002, 'momentum': 0.95 }, 1), call('train', { 'learning_rate': 0.02, 'momentum': 0.95 }, 11), call('train', { 'learning_rate': 0.018090169943749474, 'momentum': 0.95 }, 61), call('train', { 'learning_rate': 0.0019098300562505265, 'momentum': 0.95 }, 100) ] else: calls = [ call('train', { 'learning_rate': 0.018000000000000002, 'momentum': 0.95 }, 1), call('train', { 'learning_rate': 0.02, 'momentum': 0.95 }, 6), call('train', { 'learning_rate': 0.018090169943749474, 'momentum': 0.95 }, 7), call('train', { 'learning_rate': 0.0019098300562505265, 'momentum': 0.95 }, 10) ] hook.writer.add_scalars.assert_has_calls(calls, any_order=True) @pytest.mark.parametrize('multi_optimizers, max_iters', [(True, 10), (True, 2), (False, 10), (False, 2)]) def test_one_cycle_runner_hook(multi_optimizers, max_iters): """Test OneCycleLrUpdaterHook and OneCycleMomentumUpdaterHook.""" with pytest.raises(AssertionError): # by_epoch should be False OneCycleLrUpdaterHook(max_lr=0.1, by_epoch=True) with pytest.raises(ValueError): # expected float between 0 and 1 OneCycleLrUpdaterHook(max_lr=0.1, pct_start=-0.1) with pytest.raises(ValueError): # anneal_strategy should be either 'cos' or 'linear' OneCycleLrUpdaterHook(max_lr=0.1, anneal_strategy='sin') sys.modules['pavi'] = MagicMock() loader = DataLoader(torch.ones((10, 2))) runner = _build_demo_runner(multi_optimizers=multi_optimizers) # add momentum scheduler hook_cfg = dict( type='OneCycleMomentumUpdaterHook', base_momentum=0.85, max_momentum=0.95, pct_start=0.5, anneal_strategy='cos', three_phase=False) runner.register_hook_from_cfg(hook_cfg) # add LR scheduler hook_cfg = dict( type='OneCycleLrUpdaterHook', max_lr=0.01, pct_start=0.5, anneal_strategy='cos', div_factor=25, final_div_factor=1e4, three_phase=False) runner.register_hook_from_cfg(hook_cfg) runner.register_hook_from_cfg(dict(type='IterTimerHook')) runner.register_hook(IterTimerHook()) # add pavi hook hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True) runner.register_hook(hook) runner.run([loader], [('train', 1)]) shutil.rmtree(runner.work_dir) # TODO: use a more elegant way to check values assert hasattr(hook, 'writer') if multi_optimizers: calls = [ call( 'train', { 'learning_rate/model1': 0.0003999999999999993, 'learning_rate/model2': 0.0003999999999999993, 'momentum/model1': 0.95, 'momentum/model2': 0.95, }, 1), call( 'train', { 'learning_rate/model1': 0.00904508879153485, 'learning_rate/model2': 0.00904508879153485, 'momentum/model1': 0.8595491502812526, 'momentum/model2': 0.8595491502812526, }, 6), call( 'train', { 'learning_rate/model1': 4e-08, 'learning_rate/model2': 4e-08, 'momentum/model1': 0.95, 'momentum/model2': 0.95, }, 10) ] else: calls = [ call('train', { 'learning_rate': 0.0003999999999999993, 'momentum': 0.95 }, 1), call( 'train', { 'learning_rate': 0.00904508879153485, 'momentum': 0.8595491502812526 }, 6), call('train', { 'learning_rate': 4e-08, 'momentum': 0.95 }, 10) ] hook.writer.add_scalars.assert_has_calls(calls, any_order=True) # Test OneCycleLrUpdaterHook sys.modules['pavi'] = MagicMock() loader = DataLoader(torch.ones((10, 2))) runner = _build_demo_runner( runner_type='IterBasedRunner', max_epochs=None, max_iters=max_iters) args = dict( max_lr=0.01, total_steps=5, pct_start=0.5, anneal_strategy='linear', div_factor=25, final_div_factor=1e4, ) hook = OneCycleLrUpdaterHook(**args) runner.register_hook(hook) if max_iters == 10: # test total_steps < max_iters with pytest.raises(ValueError): runner.run([loader], [('train', 1)]) else: # test total_steps > max_iters runner.run([loader], [('train', 1)]) lr_last = runner.current_lr() t = torch.tensor([0.0], requires_grad=True) optim = torch.optim.SGD([t], lr=0.01) lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(optim, **args) lr_target = [] for _ in range(max_iters): optim.step() lr_target.append(optim.param_groups[0]['lr']) lr_scheduler.step() assert lr_target[-1] == lr_last[0] @pytest.mark.parametrize('multi_optimizers', (True, False)) def test_cosine_restart_lr_update_hook(multi_optimizers): """Test CosineRestartLrUpdaterHook.""" with pytest.raises(AssertionError): # either `min_lr` or `min_lr_ratio` should be specified CosineRestartLrUpdaterHook( by_epoch=False, periods=[2, 10], restart_weights=[0.5, 0.5], min_lr=0.1, min_lr_ratio=0) with pytest.raises(AssertionError): # periods and restart_weights should have the same length CosineRestartLrUpdaterHook( by_epoch=False, periods=[2, 10], restart_weights=[0.5], min_lr_ratio=0) with pytest.raises(ValueError): # the last cumulative_periods 7 (out of [5, 7]) should >= 10 sys.modules['pavi'] = MagicMock() loader = DataLoader(torch.ones((10, 2))) runner = _build_demo_runner() # add cosine restart LR scheduler hook = CosineRestartLrUpdaterHook( by_epoch=False, periods=[5, 2], # cumulative_periods [5, 7 (5 + 2)] restart_weights=[0.5, 0.5], min_lr=0.0001) runner.register_hook(hook) runner.register_hook(IterTimerHook()) # add pavi hook hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True) runner.register_hook(hook) runner.run([loader], [('train', 1)]) shutil.rmtree(runner.work_dir) sys.modules['pavi'] = MagicMock() loader = DataLoader(torch.ones((10, 2))) runner = _build_demo_runner(multi_optimizers=multi_optimizers) # add cosine restart LR scheduler hook = CosineRestartLrUpdaterHook( by_epoch=False, periods=[5, 5], restart_weights=[0.5, 0.5], min_lr_ratio=0) runner.register_hook(hook) runner.register_hook(IterTimerHook()) # add pavi hook hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True) runner.register_hook(hook) runner.run([loader], [('train', 1)]) shutil.rmtree(runner.work_dir) # TODO: use a more elegant way to check values assert hasattr(hook, 'writer') if multi_optimizers: calls = [ call( 'train', { 'learning_rate/model1': 0.01, 'learning_rate/model2': 0.005, 'momentum/model1': 0.95, 'momentum/model2': 0.9, }, 1), call( 'train', { 'learning_rate/model1': 0.01, 'learning_rate/model2': 0.005, 'momentum/model1': 0.95, 'momentum/model2': 0.9, }, 6), call( 'train', { 'learning_rate/model1': 0.0009549150281252633, 'learning_rate/model2': 0.00047745751406263163, 'momentum/model1': 0.95, 'momentum/model2': 0.9, }, 10) ] else: calls = [ call('train', { 'learning_rate': 0.01, 'momentum': 0.95 }, 1), call('train', { 'learning_rate': 0.01, 'momentum': 0.95 }, 6), call('train', { 'learning_rate': 0.0009549150281252633, 'momentum': 0.95 }, 10) ] hook.writer.add_scalars.assert_has_calls(calls, any_order=True) @pytest.mark.parametrize('multi_optimizers', (True, False)) def test_step_runner_hook(multi_optimizers): """Test StepLrUpdaterHook.""" with pytest.raises(TypeError): # `step` should be specified StepLrUpdaterHook() with pytest.raises(AssertionError): # if `step` is int, should be positive StepLrUpdaterHook(-10) with pytest.raises(AssertionError): # if `step` is list of int, should all be positive StepLrUpdaterHook([10, 16, -20]) # test StepLrUpdaterHook with int `step` value sys.modules['pavi'] = MagicMock() loader = DataLoader(torch.ones((30, 2))) runner = _build_demo_runner(multi_optimizers=multi_optimizers) # add momentum scheduler hook_cfg = dict( type='StepMomentumUpdaterHook', by_epoch=False, step=5, gamma=0.5, min_momentum=0.05) runner.register_hook_from_cfg(hook_cfg) # add step LR scheduler hook = StepLrUpdaterHook(by_epoch=False, step=5, gamma=0.5, min_lr=1e-3) runner.register_hook(hook) runner.register_hook(IterTimerHook()) # add pavi hook hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True) runner.register_hook(hook) runner.run([loader], [('train', 1)]) shutil.rmtree(runner.work_dir) # TODO: use a more elegant way to check values assert hasattr(hook, 'writer') if multi_optimizers: calls = [ call( 'train', { 'learning_rate/model1': 0.02, 'learning_rate/model2': 0.01, 'momentum/model1': 0.95, 'momentum/model2': 0.9 }, 1), call( 'train', { 'learning_rate/model1': 0.01, 'learning_rate/model2': 0.005, 'momentum/model1': 0.475, 'momentum/model2': 0.45 }, 6), call( 'train', { 'learning_rate/model1': 0.0025, 'learning_rate/model2': 0.00125, 'momentum/model1': 0.11875, 'momentum/model2': 0.1125 }, 16), call( 'train', { 'learning_rate/model1': 0.00125, 'learning_rate/model2': 0.001, 'momentum/model1': 0.059375, 'momentum/model2': 0.05625 }, 21), call( 'train', { 'learning_rate/model1': 0.001, 'learning_rate/model2': 0.001, 'momentum/model1': 0.05, 'momentum/model2': 0.05 }, 26), call( 'train', { 'learning_rate/model1': 0.001, 'learning_rate/model2': 0.001, 'momentum/model1': 0.05, 'momentum/model2': 0.05 }, 30) ] else: calls = [ call('train', { 'learning_rate': 0.02, 'momentum': 0.95 }, 1), call('train', { 'learning_rate': 0.01, 'momentum': 0.475 }, 6), call('train', { 'learning_rate': 0.0025, 'momentum': 0.11875 }, 16), call('train', { 'learning_rate': 0.00125, 'momentum': 0.059375 }, 21), call('train', { 'learning_rate': 0.001, 'momentum': 0.05 }, 26), call('train', { 'learning_rate': 0.001, 'momentum': 0.05 }, 30) ] hook.writer.add_scalars.assert_has_calls(calls, any_order=True) # test StepLrUpdaterHook with list[int] `step` value sys.modules['pavi'] = MagicMock() loader = DataLoader(torch.ones((10, 2))) runner = _build_demo_runner(multi_optimizers=multi_optimizers) # add momentum scheduler hook_cfg = dict( type='StepMomentumUpdaterHook', by_epoch=False, step=[4, 6, 8], gamma=0.1) runner.register_hook_from_cfg(hook_cfg) # add step LR scheduler hook = StepLrUpdaterHook(by_epoch=False, step=[4, 6, 8], gamma=0.1) runner.register_hook(hook) runner.register_hook(IterTimerHook()) # add pavi hook hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True) runner.register_hook(hook) runner.run([loader], [('train', 1)]) shutil.rmtree(runner.work_dir) # TODO: use a more elegant way to check values assert hasattr(hook, 'writer') if multi_optimizers: calls = [ call( 'train', { 'learning_rate/model1': 0.02, 'learning_rate/model2': 0.01, 'momentum/model1': 0.95, 'momentum/model2': 0.9 }, 1), call( 'train', { 'learning_rate/model1': 0.002, 'learning_rate/model2': 0.001, 'momentum/model1': 9.5e-2, 'momentum/model2': 9.000000000000001e-2 }, 5), call( 'train', { 'learning_rate/model1': 2.0000000000000004e-4, 'learning_rate/model2': 1.0000000000000002e-4, 'momentum/model1': 9.500000000000001e-3, 'momentum/model2': 9.000000000000003e-3 }, 7), call( 'train', { 'learning_rate/model1': 2.0000000000000005e-05, 'learning_rate/model2': 1.0000000000000003e-05, 'momentum/model1': 9.500000000000002e-4, 'momentum/model2': 9.000000000000002e-4 }, 9) ] else: calls = [ call('train', { 'learning_rate': 0.02, 'momentum': 0.95 }, 1), call('train', { 'learning_rate': 0.002, 'momentum': 0.095 }, 5), call( 'train', { 'learning_rate': 2.0000000000000004e-4, 'momentum': 9.500000000000001e-3 }, 7), call( 'train', { 'learning_rate': 2.0000000000000005e-05, 'momentum': 9.500000000000002e-4 }, 9) ] hook.writer.add_scalars.assert_has_calls(calls, any_order=True) @pytest.mark.parametrize('multi_optimizers, max_iters, gamma, cyclic_times', [(True, 8, 1, 1), (False, 8, 0.5, 2)]) def test_cyclic_lr_update_hook(multi_optimizers, max_iters, gamma, cyclic_times): """Test CyclicLrUpdateHook.""" with pytest.raises(AssertionError): # by_epoch should be False CyclicLrUpdaterHook(by_epoch=True) with pytest.raises(AssertionError): # target_ratio must be either float or tuple/list of two floats CyclicLrUpdaterHook(by_epoch=False, target_ratio=(10.0, 0.1, 0.2)) with pytest.raises(AssertionError): # step_ratio_up must be in range [0,1) CyclicLrUpdaterHook(by_epoch=False, step_ratio_up=1.4) with pytest.raises(ValueError): # anneal_strategy must be one of "cos" or "linear" CyclicLrUpdaterHook(by_epoch=False, anneal_strategy='sin') with pytest.raises(AssertionError): # gamma must be in range (0, 1] CyclicLrUpdaterHook(by_epoch=False, gamma=0) sys.modules['pavi'] = MagicMock() loader = DataLoader(torch.ones((10, 2))) runner = _build_demo_runner( runner_type='IterBasedRunner', max_epochs=None, max_iters=max_iters, multi_optimizers=multi_optimizers) # add cyclic LR scheduler schedule_hook = CyclicLrUpdaterHook( by_epoch=False, target_ratio=(10.0, 1.0), cyclic_times=cyclic_times, step_ratio_up=0.5, anneal_strategy='linear', gamma=gamma) runner.register_hook(schedule_hook) runner.register_hook_from_cfg(dict(type='IterTimerHook')) runner.register_hook(IterTimerHook()) # add pavi hook hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True) runner.register_hook(hook) runner.run([loader], [('train', 1)]) shutil.rmtree(runner.work_dir) assert hasattr(hook, 'writer') if multi_optimizers: calls = [ call( 'train', { 'learning_rate/model1': 0.02, 'learning_rate/model2': 0.01, 'momentum/model1': 0.95, 'momentum/model2': 0.9, }, 1), call( 'train', { 'learning_rate/model1': 0.155, 'learning_rate/model2': 0.0775, 'momentum/model1': 0.95, 'momentum/model2': 0.9, }, 4), call( 'train', { 'learning_rate/model1': 0.155, 'learning_rate/model2': 0.0775, 'momentum/model1': 0.95, 'momentum/model2': 0.9, }, 6) ] else: calls = [ call('train', { 'learning_rate': 0.02, 'momentum': 0.95 }, 1), call('train', { 'learning_rate': 0.11, 'momentum': 0.95 }, 4), call('train', { 'learning_rate': 0.065, 'momentum': 0.95 }, 6), call('train', { 'learning_rate': 0.11, 'momentum': 0.95 }, 7), ] hook.writer.add_scalars.assert_has_calls(calls, any_order=True) @pytest.mark.parametrize('log_model', (True, False)) def test_mlflow_hook(log_model): sys.modules['mlflow'] = MagicMock() sys.modules['mlflow.pytorch'] = MagicMock() runner = _build_demo_runner() loader = DataLoader(torch.ones((5, 2))) hook = MlflowLoggerHook(exp_name='test', log_model=log_model) runner.register_hook(hook) runner.run([loader, loader], [('train', 1), ('val', 1)]) shutil.rmtree(runner.work_dir) hook.mlflow.set_experiment.assert_called_with('test') hook.mlflow.log_metrics.assert_called_with( { 'learning_rate': 0.02, 'momentum': 0.95 }, step=6) if log_model: hook.mlflow_pytorch.log_model.assert_called_with( runner.model, 'models', pip_requirements=[f'torch=={TORCH_VERSION}']) else: assert not hook.mlflow_pytorch.log_model.called def test_wandb_hook(): sys.modules['wandb'] = MagicMock() runner = _build_demo_runner() hook = WandbLoggerHook(log_artifact=True) loader = DataLoader(torch.ones((5, 2))) runner.register_hook(hook) runner.run([loader, loader], [('train', 1), ('val', 1)]) shutil.rmtree(runner.work_dir) hook.wandb.init.assert_called_with() hook.wandb.log.assert_called_with({ 'learning_rate': 0.02, 'momentum': 0.95 }, step=6, commit=True) hook.wandb.log_artifact.assert_called() hook.wandb.join.assert_called_with() def test_neptune_hook(): sys.modules['neptune'] = MagicMock() sys.modules['neptune.new'] = MagicMock() runner = _build_demo_runner() hook = NeptuneLoggerHook() loader = DataLoader(torch.ones((5, 2))) runner.register_hook(hook) runner.run([loader, loader], [('train', 1), ('val', 1)]) shutil.rmtree(runner.work_dir) hook.neptune.init.assert_called_with() hook.run['momentum'].log.assert_called_with(0.95, step=6) hook.run.stop.assert_called_with() def test_dvclive_hook(): sys.modules['dvclive'] = MagicMock() runner = _build_demo_runner() hook = DvcliveLoggerHook() dvclive_mock = hook.dvclive loader = DataLoader(torch.ones((5, 2))) runner.register_hook(hook) runner.run([loader, loader], [('train', 1), ('val', 1)]) shutil.rmtree(runner.work_dir) dvclive_mock.set_step.assert_called_with(6) dvclive_mock.log.assert_called_with('momentum', 0.95) def test_dvclive_hook_model_file(tmp_path): sys.modules['dvclive'] = MagicMock() runner = _build_demo_runner() hook = DvcliveLoggerHook(model_file=osp.join(runner.work_dir, 'model.pth')) runner.register_hook(hook) loader = torch.utils.data.DataLoader(torch.ones((5, 2))) loader = DataLoader(torch.ones((5, 2))) runner.run([loader, loader], [('train', 1), ('val', 1)]) assert osp.exists(osp.join(runner.work_dir, 'model.pth')) shutil.rmtree(runner.work_dir) def _build_demo_runner_without_hook(runner_type='EpochBasedRunner', max_epochs=1, max_iters=None, multi_optimizers=False): class Model(nn.Module): def __init__(self): super().__init__() self.linear = nn.Linear(2, 1) self.conv = nn.Conv2d(3, 3, 3) def forward(self, x): return self.linear(x) def train_step(self, x, optimizer, **kwargs): return dict(loss=self(x)) def val_step(self, x, optimizer, **kwargs): return dict(loss=self(x)) model = Model() if multi_optimizers: optimizer = { 'model1': torch.optim.SGD(model.linear.parameters(), lr=0.02, momentum=0.95), 'model2': torch.optim.SGD(model.conv.parameters(), lr=0.01, momentum=0.9), } else: optimizer = torch.optim.SGD(model.parameters(), lr=0.02, momentum=0.95) tmp_dir = tempfile.mkdtemp() runner = build_runner( dict(type=runner_type), default_args=dict( model=model, work_dir=tmp_dir, optimizer=optimizer, logger=logging.getLogger(), max_epochs=max_epochs, max_iters=max_iters)) return runner def _build_demo_runner(runner_type='EpochBasedRunner', max_epochs=1, max_iters=None, multi_optimizers=False): log_config = dict( interval=1, hooks=[ dict(type='TextLoggerHook'), ]) runner = _build_demo_runner_without_hook(runner_type, max_epochs, max_iters, multi_optimizers) runner.register_checkpoint_hook(dict(interval=1)) runner.register_logger_hooks(log_config) return runner def test_runner_with_revise_keys(): import os class Model(nn.Module): def __init__(self): super().__init__() self.conv = nn.Conv2d(3, 3, 1) class PrefixModel(nn.Module): def __init__(self): super().__init__() self.backbone = Model() pmodel = PrefixModel() model = Model() checkpoint_path = os.path.join(tempfile.gettempdir(), 'checkpoint.pth') # add prefix torch.save(model.state_dict(), checkpoint_path) runner = _build_demo_runner(runner_type='EpochBasedRunner') runner.model = pmodel state_dict = runner.load_checkpoint( checkpoint_path, revise_keys=[(r'^', 'backbone.')]) for key in pmodel.backbone.state_dict().keys(): assert torch.equal(pmodel.backbone.state_dict()[key], state_dict[key]) # strip prefix torch.save(pmodel.state_dict(), checkpoint_path) runner.model = model state_dict = runner.load_checkpoint( checkpoint_path, revise_keys=[(r'^backbone\.', '')]) for key in state_dict.keys(): key_stripped = re.sub(r'^backbone\.', '', key) assert torch.equal(model.state_dict()[key_stripped], state_dict[key]) os.remove(checkpoint_path) def test_get_triggered_stages(): class ToyHook(Hook): # test normal stage def before_run(): pass # test the method mapped to multi stages. def after_epoch(): pass hook = ToyHook() # stages output have order, so here is list instead of set. expected_stages = ['before_run', 'after_train_epoch', 'after_val_epoch'] assert hook.get_triggered_stages() == expected_stages def test_gradient_cumulative_optimizer_hook(): class ToyModel(nn.Module): def __init__(self, with_norm=False): super().__init__() self.fp16_enabled = False self.fc = nn.Linear(3, 2) nn.init.constant_(self.fc.weight, 1.) nn.init.constant_(self.fc.bias, 1.) self.with_norm = with_norm if with_norm: self.norm = nn.BatchNorm1d(2) def forward(self, x): x = self.fc(x) if self.with_norm: x = self.norm(x) return x def train_step(self, x, optimizer, **kwargs): return dict(loss=self(x).mean(), num_samples=x.shape[0]) def val_step(self, x, optimizer, **kwargs): return dict(loss=self(x).mean(), num_samples=x.shape[0]) def build_toy_runner(config=dict(type='EpochBasedRunner', max_epochs=3)): model = ToyModel() optimizer = torch.optim.SGD(model.parameters(), lr=0.02) tmp_dir = tempfile.mkdtemp() runner = build_runner( config, default_args=dict( model=model, work_dir=tmp_dir, optimizer=optimizer, logger=logging.getLogger(), meta=dict())) return runner with pytest.raises(AssertionError): # cumulative_iters only accepts int GradientCumulativeOptimizerHook(cumulative_iters='str') with pytest.raises(AssertionError): # cumulative_iters only accepts positive number GradientCumulativeOptimizerHook(cumulative_iters=-1) # test epoch based runner data = torch.rand((6, 3)) # optimize with cumulative_iters loader_1 = DataLoader(data, batch_size=1) runner_1 = build_toy_runner() optimizer_hook = GradientCumulativeOptimizerHook( grad_clip=dict(max_norm=0.2), cumulative_iters=3) runner_1.register_hook(optimizer_hook) runner_1.run([loader_1], [('train', 1)]) # optimize without cumulative_iters loader_2 = DataLoader(data, batch_size=3) runner_2 = build_toy_runner() optimizer_hook = OptimizerHook(grad_clip=dict(max_norm=0.2)) runner_2.register_hook(optimizer_hook) runner_2.run([loader_2], [('train', 1)]) # test optimizer works well assert (runner_1.model.fc.weight < 1).all() assert (runner_1.model.fc.bias < 1).all() # test optimizer with cumulative_iters gets the same results assert torch.allclose(runner_1.model.fc.weight, runner_2.model.fc.weight) assert torch.allclose(runner_1.model.fc.bias, runner_2.model.fc.bias) shutil.rmtree(runner_1.work_dir) shutil.rmtree(runner_2.work_dir) # test iter based runner data = torch.rand((8, 3)) # optimize with cumulative_iters loader_1 = DataLoader(data, batch_size=1) runner_1 = build_toy_runner(dict(type='IterBasedRunner', max_iters=8)) optimizer_hook = GradientCumulativeOptimizerHook( grad_clip=dict(max_norm=0.2), cumulative_iters=3) runner_1.register_hook(optimizer_hook) runner_1.run([loader_1], [('train', 1)]) # optimize without cumulative_iters loader_2_divisible = DataLoader(data[:6], batch_size=3) loader_2_remainder = DataLoader(data[6:], batch_size=2) runner_2 = build_toy_runner(dict(type='IterBasedRunner', max_iters=3)) optimizer_hook = OptimizerHook(grad_clip=dict(max_norm=0.2)) runner_2.register_hook(optimizer_hook) runner_2.run([loader_2_divisible, loader_2_remainder], [('train', 2), ('train', 1)]) # test optimizer works well assert (runner_1.model.fc.weight < 1).all() assert (runner_1.model.fc.bias < 1).all() # test optimizer with cumulative_iters gets the same results assert torch.allclose(runner_1.model.fc.weight, runner_2.model.fc.weight) assert torch.allclose(runner_1.model.fc.bias, runner_2.model.fc.bias) shutil.rmtree(runner_1.work_dir) shutil.rmtree(runner_2.work_dir) # test has_batch_norm model = ToyModel(with_norm=True) optimizer_hook = GradientCumulativeOptimizerHook( grad_clip=dict(max_norm=0.2), cumulative_iters=3) assert optimizer_hook.has_batch_norm(model) @pytest.mark.skipif( not torch.cuda.is_available(), reason='requires CUDA support') def test_gradient_cumulative_fp16_optimizer_hook(): class ToyModel(nn.Module): def __init__(self): super().__init__() self.fp16_enabled = False self.fc = nn.Linear(3, 2) nn.init.constant_(self.fc.weight, 1.) nn.init.constant_(self.fc.bias, 1.) @auto_fp16(apply_to=('x', )) def forward(self, x): x = self.fc(x) return x def train_step(self, x, optimizer, **kwargs): return dict(loss=self(x).mean(), num_samples=x.shape[0]) def val_step(self, x, optimizer, **kwargs): return dict(loss=self(x).mean(), num_samples=x.shape[0]) def build_toy_runner(config=dict(type='EpochBasedRunner', max_epochs=3)): model = ToyModel().cuda() optimizer = torch.optim.SGD(model.parameters(), lr=0.02) tmp_dir = tempfile.mkdtemp() runner = build_runner( config, default_args=dict( model=model, work_dir=tmp_dir, optimizer=optimizer, logger=logging.getLogger(), meta=dict())) return runner # test epoch based runner data = torch.rand((6, 3)).cuda() # optimize with cumulative_iters loader_1 = DataLoader(data, batch_size=1) runner_1 = build_toy_runner() optimizer_hook = GradientCumulativeFp16OptimizerHook( grad_clip=dict(max_norm=0.2), cumulative_iters=3) runner_1.register_hook(optimizer_hook) runner_1.run([loader_1], [('train', 1)]) # optimize without cumulative_iters loader_2 = DataLoader(data, batch_size=3) runner_2 = build_toy_runner() optimizer_hook = Fp16OptimizerHook(grad_clip=dict(max_norm=0.2)) runner_2.register_hook(optimizer_hook) runner_2.run([loader_2], [('train', 1)]) # test optimizer works well assert (runner_1.model.fc.weight < 1).all() assert (runner_1.model.fc.bias < 1).all() # test optimizer with cumulative_iters gets the same results assert torch.allclose(runner_1.model.fc.weight, runner_2.model.fc.weight) assert torch.allclose(runner_1.model.fc.bias, runner_2.model.fc.bias) shutil.rmtree(runner_1.work_dir) shutil.rmtree(runner_2.work_dir) # test iter based runner data = torch.rand((8, 3)).cuda() # optimize with cumulative_iters loader_1 = DataLoader(data, batch_size=1) runner_1 = build_toy_runner(dict(type='IterBasedRunner', max_iters=8)) optimizer_hook = GradientCumulativeFp16OptimizerHook( grad_clip=dict(max_norm=0.2), cumulative_iters=3) runner_1.register_hook(optimizer_hook) runner_1.run([loader_1], [('train', 1)]) # optimize without cumulative_iters loader_2_divisible = DataLoader(data[:6], batch_size=3) loader_2_remainder = DataLoader(data[6:], batch_size=2) runner_2 = build_toy_runner(dict(type='IterBasedRunner', max_iters=3)) optimizer_hook = Fp16OptimizerHook(grad_clip=dict(max_norm=0.2)) runner_2.register_hook(optimizer_hook) runner_2.run([loader_2_divisible, loader_2_remainder], [('train', 2), ('train', 1)]) # test optimizer works well assert (runner_1.model.fc.weight < 1).all() assert (runner_1.model.fc.bias < 1).all() # test optimizer with cumulative_iters gets the same results assert torch.allclose(runner_1.model.fc.weight, runner_2.model.fc.weight) assert torch.allclose(runner_1.model.fc.bias, runner_2.model.fc.bias) shutil.rmtree(runner_1.work_dir) shutil.rmtree(runner_2.work_dir) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_runner/test_optimizer.py ================================================ import sys import warnings from unittest.mock import MagicMock import pytest import torch import torch.nn as nn from mmcv.runner import OPTIMIZER_BUILDERS, DefaultOptimizerConstructor from mmcv.runner.optimizer import build_optimizer, build_optimizer_constructor from mmcv.runner.optimizer.builder import TORCH_OPTIMIZERS from mmcv.utils.ext_loader import check_ops_exist OPS_AVAILABLE = check_ops_exist() if not OPS_AVAILABLE: sys.modules['mmcv.ops'] = MagicMock( DeformConv2d=dict, ModulatedDeformConv2d=dict) class SubModel(nn.Module): def __init__(self): super().__init__() self.conv1 = nn.Conv2d(2, 2, kernel_size=1, groups=2) self.gn = nn.GroupNorm(2, 2) self.param1 = nn.Parameter(torch.ones(1)) def forward(self, x): return x class ExampleModel(nn.Module): def __init__(self): super().__init__() self.param1 = nn.Parameter(torch.ones(1)) self.conv1 = nn.Conv2d(3, 4, kernel_size=1, bias=False) self.conv2 = nn.Conv2d(4, 2, kernel_size=1) self.bn = nn.BatchNorm2d(2) self.sub = SubModel() if OPS_AVAILABLE: from mmcv.ops import DeformConv2dPack self.dcn = DeformConv2dPack( 3, 4, kernel_size=3, deformable_groups=1) def forward(self, x): return x class ExampleDuplicateModel(nn.Module): def __init__(self): super().__init__() self.param1 = nn.Parameter(torch.ones(1)) self.conv1 = nn.Sequential(nn.Conv2d(3, 4, kernel_size=1, bias=False)) self.conv2 = nn.Sequential(nn.Conv2d(4, 2, kernel_size=1)) self.bn = nn.BatchNorm2d(2) self.sub = SubModel() self.conv3 = nn.Sequential(nn.Conv2d(3, 4, kernel_size=1, bias=False)) self.conv3[0] = self.conv1[0] if OPS_AVAILABLE: from mmcv.ops import DeformConv2dPack self.dcn = DeformConv2dPack( 3, 4, kernel_size=3, deformable_groups=1) def forward(self, x): return x class PseudoDataParallel(nn.Module): def __init__(self): super().__init__() self.module = ExampleModel() def forward(self, x): return x base_lr = 0.01 base_wd = 0.0001 momentum = 0.9 def check_default_optimizer(optimizer, model, prefix=''): assert isinstance(optimizer, torch.optim.SGD) assert optimizer.defaults['lr'] == base_lr assert optimizer.defaults['momentum'] == momentum assert optimizer.defaults['weight_decay'] == base_wd param_groups = optimizer.param_groups[0] if OPS_AVAILABLE: param_names = [ 'param1', 'conv1.weight', 'conv2.weight', 'conv2.bias', 'bn.weight', 'bn.bias', 'sub.param1', 'sub.conv1.weight', 'sub.conv1.bias', 'sub.gn.weight', 'sub.gn.bias', 'dcn.weight', 'dcn.conv_offset.weight', 'dcn.conv_offset.bias' ] else: param_names = [ 'param1', 'conv1.weight', 'conv2.weight', 'conv2.bias', 'bn.weight', 'bn.bias', 'sub.param1', 'sub.conv1.weight', 'sub.conv1.bias', 'sub.gn.weight', 'sub.gn.bias' ] param_dict = dict(model.named_parameters()) assert len(param_groups['params']) == len(param_names) for i in range(len(param_groups['params'])): assert torch.equal(param_groups['params'][i], param_dict[prefix + param_names[i]]) def check_sgd_optimizer(optimizer, model, prefix='', bias_lr_mult=1, bias_decay_mult=1, norm_decay_mult=1, dwconv_decay_mult=1, dcn_offset_lr_mult=1, bypass_duplicate=False): param_groups = optimizer.param_groups assert isinstance(optimizer, torch.optim.SGD) assert optimizer.defaults['lr'] == base_lr assert optimizer.defaults['momentum'] == momentum assert optimizer.defaults['weight_decay'] == base_wd model_parameters = list(model.parameters()) assert len(param_groups) == len(model_parameters) for i, param in enumerate(model_parameters): param_group = param_groups[i] assert torch.equal(param_group['params'][0], param) assert param_group['momentum'] == momentum # param1 param1 = param_groups[0] assert param1['lr'] == base_lr assert param1['weight_decay'] == base_wd # conv1.weight conv1_weight = param_groups[1] assert conv1_weight['lr'] == base_lr assert conv1_weight['weight_decay'] == base_wd # conv2.weight conv2_weight = param_groups[2] assert conv2_weight['lr'] == base_lr assert conv2_weight['weight_decay'] == base_wd # conv2.bias conv2_bias = param_groups[3] assert conv2_bias['lr'] == base_lr * bias_lr_mult assert conv2_bias['weight_decay'] == base_wd * bias_decay_mult # bn.weight bn_weight = param_groups[4] assert bn_weight['lr'] == base_lr assert bn_weight['weight_decay'] == base_wd * norm_decay_mult # bn.bias bn_bias = param_groups[5] assert bn_bias['lr'] == base_lr assert bn_bias['weight_decay'] == base_wd * norm_decay_mult # sub.param1 sub_param1 = param_groups[6] assert sub_param1['lr'] == base_lr assert sub_param1['weight_decay'] == base_wd # sub.conv1.weight sub_conv1_weight = param_groups[7] assert sub_conv1_weight['lr'] == base_lr assert sub_conv1_weight['weight_decay'] == base_wd * dwconv_decay_mult # sub.conv1.bias sub_conv1_bias = param_groups[8] assert sub_conv1_bias['lr'] == base_lr * bias_lr_mult assert sub_conv1_bias['weight_decay'] == base_wd * dwconv_decay_mult # sub.gn.weight sub_gn_weight = param_groups[9] assert sub_gn_weight['lr'] == base_lr assert sub_gn_weight['weight_decay'] == base_wd * norm_decay_mult # sub.gn.bias sub_gn_bias = param_groups[10] assert sub_gn_bias['lr'] == base_lr assert sub_gn_bias['weight_decay'] == base_wd * norm_decay_mult if torch.cuda.is_available(): dcn_conv_weight = param_groups[11] assert dcn_conv_weight['lr'] == base_lr assert dcn_conv_weight['weight_decay'] == base_wd dcn_offset_weight = param_groups[12] assert dcn_offset_weight['lr'] == base_lr * dcn_offset_lr_mult assert dcn_offset_weight['weight_decay'] == base_wd dcn_offset_bias = param_groups[13] assert dcn_offset_bias['lr'] == base_lr * dcn_offset_lr_mult assert dcn_offset_bias['weight_decay'] == base_wd def test_default_optimizer_constructor(): model = ExampleModel() with pytest.raises(TypeError): # optimizer_cfg must be a dict optimizer_cfg = [] optim_constructor = DefaultOptimizerConstructor(optimizer_cfg) optim_constructor(model) with pytest.raises(TypeError): # paramwise_cfg must be a dict or None optimizer_cfg = dict(lr=0.0001) paramwise_cfg = ['error'] optim_constructor = DefaultOptimizerConstructor( optimizer_cfg, paramwise_cfg) optim_constructor(model) with pytest.raises(ValueError): # bias_decay_mult/norm_decay_mult is specified but weight_decay is None optimizer_cfg = dict(lr=0.0001, weight_decay=None) paramwise_cfg = dict(bias_decay_mult=1, norm_decay_mult=1) optim_constructor = DefaultOptimizerConstructor( optimizer_cfg, paramwise_cfg) optim_constructor(model) # basic config with ExampleModel optimizer_cfg = dict( type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum) optim_constructor = DefaultOptimizerConstructor(optimizer_cfg) optimizer = optim_constructor(model) check_default_optimizer(optimizer, model) # basic config with pseudo data parallel model = PseudoDataParallel() optimizer_cfg = dict( type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum) paramwise_cfg = None optim_constructor = DefaultOptimizerConstructor(optimizer_cfg) optimizer = optim_constructor(model) check_default_optimizer(optimizer, model, prefix='module.') # basic config with DataParallel if torch.cuda.is_available(): model = torch.nn.DataParallel(ExampleModel()) optimizer_cfg = dict( type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum) paramwise_cfg = None optim_constructor = DefaultOptimizerConstructor(optimizer_cfg) optimizer = optim_constructor(model) check_default_optimizer(optimizer, model, prefix='module.') # Empty paramwise_cfg with ExampleModel model = ExampleModel() optimizer_cfg = dict( type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum) paramwise_cfg = dict() optim_constructor = DefaultOptimizerConstructor(optimizer_cfg, paramwise_cfg) optimizer = optim_constructor(model) check_default_optimizer(optimizer, model) # Empty paramwise_cfg with ExampleModel and no grad model = ExampleModel() for param in model.parameters(): param.requires_grad = False optimizer_cfg = dict( type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum) paramwise_cfg = dict() optim_constructor = DefaultOptimizerConstructor(optimizer_cfg) optimizer = optim_constructor(model) check_default_optimizer(optimizer, model) # paramwise_cfg with ExampleModel model = ExampleModel() optimizer_cfg = dict( type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum) paramwise_cfg = dict( bias_lr_mult=2, bias_decay_mult=0.5, norm_decay_mult=0, dwconv_decay_mult=0.1, dcn_offset_lr_mult=0.1) optim_constructor = DefaultOptimizerConstructor(optimizer_cfg, paramwise_cfg) optimizer = optim_constructor(model) check_sgd_optimizer(optimizer, model, **paramwise_cfg) # paramwise_cfg with ExampleModel, weight decay is None model = ExampleModel() optimizer_cfg = dict(type='Rprop', lr=base_lr) paramwise_cfg = dict(bias_lr_mult=2) optim_constructor = DefaultOptimizerConstructor(optimizer_cfg, paramwise_cfg) optimizer = optim_constructor(model) param_groups = optimizer.param_groups assert isinstance(optimizer, torch.optim.Rprop) assert optimizer.defaults['lr'] == base_lr model_parameters = list(model.parameters()) assert len(param_groups) == len(model_parameters) for i, param in enumerate(model_parameters): param_group = param_groups[i] assert torch.equal(param_group['params'][0], param) # param1 assert param_groups[0]['lr'] == base_lr # conv1.weight assert param_groups[1]['lr'] == base_lr # conv2.weight assert param_groups[2]['lr'] == base_lr # conv2.bias assert param_groups[3]['lr'] == base_lr * paramwise_cfg['bias_lr_mult'] # bn.weight assert param_groups[4]['lr'] == base_lr # bn.bias assert param_groups[5]['lr'] == base_lr # sub.param1 assert param_groups[6]['lr'] == base_lr # sub.conv1.weight assert param_groups[7]['lr'] == base_lr # sub.conv1.bias assert param_groups[8]['lr'] == base_lr * paramwise_cfg['bias_lr_mult'] # sub.gn.weight assert param_groups[9]['lr'] == base_lr # sub.gn.bias assert param_groups[10]['lr'] == base_lr if OPS_AVAILABLE: # dcn.weight assert param_groups[11]['lr'] == base_lr # dcn.conv_offset.weight assert param_groups[12]['lr'] == base_lr # dcn.conv_offset.bias assert param_groups[13]['lr'] == base_lr # paramwise_cfg with pseudo data parallel model = PseudoDataParallel() optimizer_cfg = dict( type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum) paramwise_cfg = dict( bias_lr_mult=2, bias_decay_mult=0.5, norm_decay_mult=0, dwconv_decay_mult=0.1, dcn_offset_lr_mult=0.1) optim_constructor = DefaultOptimizerConstructor(optimizer_cfg, paramwise_cfg) optimizer = optim_constructor(model) check_sgd_optimizer(optimizer, model, prefix='module.', **paramwise_cfg) # paramwise_cfg with DataParallel if torch.cuda.is_available(): model = torch.nn.DataParallel(ExampleModel()) optimizer_cfg = dict( type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum) paramwise_cfg = dict( bias_lr_mult=2, bias_decay_mult=0.5, norm_decay_mult=0, dwconv_decay_mult=0.1, dcn_offset_lr_mult=0.1) optim_constructor = DefaultOptimizerConstructor( optimizer_cfg, paramwise_cfg) optimizer = optim_constructor(model) check_sgd_optimizer( optimizer, model, prefix='module.', **paramwise_cfg) # paramwise_cfg with ExampleModel and no grad for param in model.parameters(): param.requires_grad = False optim_constructor = DefaultOptimizerConstructor(optimizer_cfg, paramwise_cfg) optimizer = optim_constructor(model) param_groups = optimizer.param_groups assert isinstance(optimizer, torch.optim.SGD) assert optimizer.defaults['lr'] == base_lr assert optimizer.defaults['momentum'] == momentum assert optimizer.defaults['weight_decay'] == base_wd for i, (name, param) in enumerate(model.named_parameters()): param_group = param_groups[i] assert torch.equal(param_group['params'][0], param) assert param_group['momentum'] == momentum assert param_group['lr'] == base_lr assert param_group['weight_decay'] == base_wd # paramwise_cfg with bypass_duplicate option model = ExampleDuplicateModel() optimizer_cfg = dict( type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum) paramwise_cfg = dict( bias_lr_mult=2, bias_decay_mult=0.5, norm_decay_mult=0, dwconv_decay_mult=0.1) with pytest.raises(ValueError) as excinfo: optim_constructor = DefaultOptimizerConstructor( optimizer_cfg, paramwise_cfg) optim_constructor(model) assert 'some parameters appear in more than one parameter ' \ 'group' == excinfo.value paramwise_cfg = dict( bias_lr_mult=2, bias_decay_mult=0.5, norm_decay_mult=0, dwconv_decay_mult=0.1, dcn_offset_lr_mult=0.1, bypass_duplicate=True) optim_constructor = DefaultOptimizerConstructor(optimizer_cfg, paramwise_cfg) with warnings.catch_warnings(record=True) as w: optimizer = optim_constructor(model) warnings.simplefilter('always') assert len(w) == 1 assert str(w[0].message) == 'conv3.0 is duplicate. It is skipped ' \ 'since bypass_duplicate=True' model_parameters = list(model.parameters()) num_params = 14 if OPS_AVAILABLE else 11 assert len(optimizer.param_groups) == len(model_parameters) == num_params check_sgd_optimizer(optimizer, model, **paramwise_cfg) # test DefaultOptimizerConstructor with custom_keys and ExampleModel model = ExampleModel() optimizer_cfg = dict( type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum) paramwise_cfg = dict( custom_keys={ 'param1': dict(lr_mult=10), 'sub': dict(lr_mult=0.1, decay_mult=0), 'sub.gn': dict(lr_mult=0.01), 'non_exist_key': dict(lr_mult=0.0) }, norm_decay_mult=0.5) with pytest.raises(TypeError): # custom_keys should be a dict paramwise_cfg_ = dict(custom_keys=[0.1, 0.0001]) optim_constructor = DefaultOptimizerConstructor( optimizer_cfg, paramwise_cfg_) optimizer = optim_constructor(model) with pytest.raises(ValueError): # if 'decay_mult' is specified in custom_keys, weight_decay should be # specified optimizer_cfg_ = dict(type='SGD', lr=0.01) paramwise_cfg_ = dict(custom_keys={'.backbone': dict(decay_mult=0.5)}) optim_constructor = DefaultOptimizerConstructor( optimizer_cfg_, paramwise_cfg_) optimizer = optim_constructor(model) optim_constructor = DefaultOptimizerConstructor(optimizer_cfg, paramwise_cfg) optimizer = optim_constructor(model) # check optimizer type and default config assert isinstance(optimizer, torch.optim.SGD) assert optimizer.defaults['lr'] == base_lr assert optimizer.defaults['momentum'] == momentum assert optimizer.defaults['weight_decay'] == base_wd # check params groups param_groups = optimizer.param_groups groups = [] group_settings = [] # group 1, matches of 'param1' # 'param1' is the longest match for 'sub.param1' groups.append(['param1', 'sub.param1']) group_settings.append({ 'lr': base_lr * 10, 'momentum': momentum, 'weight_decay': base_wd, }) # group 2, matches of 'sub.gn' groups.append(['sub.gn.weight', 'sub.gn.bias']) group_settings.append({ 'lr': base_lr * 0.01, 'momentum': momentum, 'weight_decay': base_wd, }) # group 3, matches of 'sub' groups.append(['sub.conv1.weight', 'sub.conv1.bias']) group_settings.append({ 'lr': base_lr * 0.1, 'momentum': momentum, 'weight_decay': 0, }) # group 4, bn is configured by 'norm_decay_mult' groups.append(['bn.weight', 'bn.bias']) group_settings.append({ 'lr': base_lr, 'momentum': momentum, 'weight_decay': base_wd * 0.5, }) # group 5, default group groups.append(['conv1.weight', 'conv2.weight', 'conv2.bias']) group_settings.append({ 'lr': base_lr, 'momentum': momentum, 'weight_decay': base_wd }) num_params = 14 if OPS_AVAILABLE else 11 assert len(param_groups) == num_params for i, (name, param) in enumerate(model.named_parameters()): assert torch.equal(param_groups[i]['params'][0], param) for group, settings in zip(groups, group_settings): if name in group: for setting in settings: assert param_groups[i][setting] == settings[ setting], f'{name} {setting}' # test DefaultOptimizerConstructor with custom_keys and ExampleModel 2 model = ExampleModel() optimizer_cfg = dict(type='SGD', lr=base_lr, momentum=momentum) paramwise_cfg = dict(custom_keys={'param1': dict(lr_mult=10)}) optim_constructor = DefaultOptimizerConstructor(optimizer_cfg, paramwise_cfg) optimizer = optim_constructor(model) # check optimizer type and default config assert isinstance(optimizer, torch.optim.SGD) assert optimizer.defaults['lr'] == base_lr assert optimizer.defaults['momentum'] == momentum assert optimizer.defaults['weight_decay'] == 0 # check params groups param_groups = optimizer.param_groups groups = [] group_settings = [] # group 1, matches of 'param1' groups.append(['param1', 'sub.param1']) group_settings.append({ 'lr': base_lr * 10, 'momentum': momentum, 'weight_decay': 0, }) # group 2, default group groups.append([ 'sub.conv1.weight', 'sub.conv1.bias', 'sub.gn.weight', 'sub.gn.bias', 'conv1.weight', 'conv2.weight', 'conv2.bias', 'bn.weight', 'bn.bias' ]) group_settings.append({ 'lr': base_lr, 'momentum': momentum, 'weight_decay': 0 }) num_params = 14 if OPS_AVAILABLE else 11 assert len(param_groups) == num_params for i, (name, param) in enumerate(model.named_parameters()): assert torch.equal(param_groups[i]['params'][0], param) for group, settings in zip(groups, group_settings): if name in group: for setting in settings: assert param_groups[i][setting] == settings[ setting], f'{name} {setting}' def test_torch_optimizers(): torch_optimizers = [ 'ASGD', 'Adadelta', 'Adagrad', 'Adam', 'AdamW', 'Adamax', 'LBFGS', 'Optimizer', 'RMSprop', 'Rprop', 'SGD', 'SparseAdam' ] assert set(torch_optimizers).issubset(set(TORCH_OPTIMIZERS)) def test_build_optimizer_constructor(): model = ExampleModel() optimizer_cfg = dict( type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum) paramwise_cfg = dict( bias_lr_mult=2, bias_decay_mult=0.5, norm_decay_mult=0, dwconv_decay_mult=0.1, dcn_offset_lr_mult=0.1) optim_constructor_cfg = dict( type='DefaultOptimizerConstructor', optimizer_cfg=optimizer_cfg, paramwise_cfg=paramwise_cfg) optim_constructor = build_optimizer_constructor(optim_constructor_cfg) optimizer = optim_constructor(model) check_sgd_optimizer(optimizer, model, **paramwise_cfg) from mmcv.runner import OPTIMIZERS from mmcv.utils import build_from_cfg @OPTIMIZER_BUILDERS.register_module() class MyOptimizerConstructor(DefaultOptimizerConstructor): def __call__(self, model): if hasattr(model, 'module'): model = model.module conv1_lr_mult = self.paramwise_cfg.get('conv1_lr_mult', 1.) params = [] for name, param in model.named_parameters(): param_group = {'params': [param]} if name.startswith('conv1') and param.requires_grad: param_group['lr'] = self.base_lr * conv1_lr_mult params.append(param_group) optimizer_cfg['params'] = params return build_from_cfg(optimizer_cfg, OPTIMIZERS) paramwise_cfg = dict(conv1_lr_mult=5) optim_constructor_cfg = dict( type='MyOptimizerConstructor', optimizer_cfg=optimizer_cfg, paramwise_cfg=paramwise_cfg) optim_constructor = build_optimizer_constructor(optim_constructor_cfg) optimizer = optim_constructor(model) param_groups = optimizer.param_groups assert isinstance(optimizer, torch.optim.SGD) assert optimizer.defaults['lr'] == base_lr assert optimizer.defaults['momentum'] == momentum assert optimizer.defaults['weight_decay'] == base_wd for i, param in enumerate(model.parameters()): param_group = param_groups[i] assert torch.equal(param_group['params'][0], param) assert param_group['momentum'] == momentum # conv1.weight assert param_groups[1]['lr'] == base_lr * paramwise_cfg['conv1_lr_mult'] assert param_groups[1]['weight_decay'] == base_wd def test_build_optimizer(): model = ExampleModel() optimizer_cfg = dict( type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum) optimizer = build_optimizer(model, optimizer_cfg) check_default_optimizer(optimizer, model) model = ExampleModel() optimizer_cfg = dict( type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum, paramwise_cfg=dict( bias_lr_mult=2, bias_decay_mult=0.5, norm_decay_mult=0, dwconv_decay_mult=0.1, dcn_offset_lr_mult=0.1)) optimizer = build_optimizer(model, optimizer_cfg) check_sgd_optimizer(optimizer, model, **optimizer_cfg['paramwise_cfg']) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_runner/test_runner.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import logging import os import os.path as osp import platform import random import string import tempfile import pytest import torch import torch.nn as nn from mmcv.parallel import MMDataParallel from mmcv.runner import (RUNNERS, EpochBasedRunner, IterBasedRunner, build_runner) from mmcv.runner.hooks import IterTimerHook class OldStyleModel(nn.Module): def __init__(self): super().__init__() self.conv = nn.Conv2d(3, 3, 1) class Model(OldStyleModel): def train_step(self): pass def val_step(self): pass def test_build_runner(): temp_root = tempfile.gettempdir() dir_name = ''.join( [random.choice(string.ascii_letters) for _ in range(10)]) default_args = dict( model=Model(), work_dir=osp.join(temp_root, dir_name), logger=logging.getLogger()) cfg = dict(type='EpochBasedRunner', max_epochs=1) runner = build_runner(cfg, default_args=default_args) assert runner._max_epochs == 1 cfg = dict(type='IterBasedRunner', max_iters=1) runner = build_runner(cfg, default_args=default_args) assert runner._max_iters == 1 with pytest.raises(ValueError, match='Only one of'): cfg = dict(type='IterBasedRunner', max_epochs=1, max_iters=1) runner = build_runner(cfg, default_args=default_args) @pytest.mark.parametrize('runner_class', RUNNERS.module_dict.values()) def test_epoch_based_runner(runner_class): with pytest.warns(DeprecationWarning): # batch_processor is deprecated model = OldStyleModel() def batch_processor(): pass _ = runner_class(model, batch_processor, logger=logging.getLogger()) with pytest.raises(TypeError): # batch_processor must be callable model = OldStyleModel() _ = runner_class(model, batch_processor=0, logger=logging.getLogger()) with pytest.raises(TypeError): # optimizer must be a optimizer or a dict of optimizers model = Model() optimizer = 'NotAOptimizer' _ = runner_class( model, optimizer=optimizer, logger=logging.getLogger()) with pytest.raises(TypeError): # optimizer must be a optimizer or a dict of optimizers model = Model() optimizers = dict(optim1=torch.optim.Adam(), optim2='NotAOptimizer') _ = runner_class( model, optimizer=optimizers, logger=logging.getLogger()) with pytest.raises(TypeError): # logger must be a logging.Logger model = Model() _ = runner_class(model, logger=None) with pytest.raises(TypeError): # meta must be a dict or None model = Model() _ = runner_class(model, logger=logging.getLogger(), meta=['list']) with pytest.raises(AssertionError): # model must implement the method train_step() model = OldStyleModel() _ = runner_class(model, logger=logging.getLogger()) with pytest.raises(TypeError): # work_dir must be a str or None model = Model() _ = runner_class(model, work_dir=1, logger=logging.getLogger()) with pytest.raises(RuntimeError): # batch_processor and train_step() cannot be both set def batch_processor(): pass model = Model() _ = runner_class(model, batch_processor, logger=logging.getLogger()) # test work_dir model = Model() temp_root = tempfile.gettempdir() dir_name = ''.join( [random.choice(string.ascii_letters) for _ in range(10)]) work_dir = osp.join(temp_root, dir_name) _ = runner_class(model, work_dir=work_dir, logger=logging.getLogger()) assert osp.isdir(work_dir) _ = runner_class(model, work_dir=work_dir, logger=logging.getLogger()) assert osp.isdir(work_dir) os.removedirs(work_dir) @pytest.mark.parametrize('runner_class', RUNNERS.module_dict.values()) def test_runner_with_parallel(runner_class): def batch_processor(): pass model = MMDataParallel(OldStyleModel()) _ = runner_class(model, batch_processor, logger=logging.getLogger()) model = MMDataParallel(Model()) _ = runner_class(model, logger=logging.getLogger()) with pytest.raises(RuntimeError): # batch_processor and train_step() cannot be both set def batch_processor(): pass model = MMDataParallel(Model()) _ = runner_class(model, batch_processor, logger=logging.getLogger()) @pytest.mark.parametrize('runner_class', RUNNERS.module_dict.values()) def test_save_checkpoint(runner_class): model = Model() runner = runner_class(model=model, logger=logging.getLogger()) with pytest.raises(TypeError): # meta should be None or dict runner.save_checkpoint('.', meta=list()) with tempfile.TemporaryDirectory() as root: runner.save_checkpoint(root) latest_path = osp.join(root, 'latest.pth') assert osp.exists(latest_path) if isinstance(runner, EpochBasedRunner): first_ckp_path = osp.join(root, 'epoch_1.pth') elif isinstance(runner, IterBasedRunner): first_ckp_path = osp.join(root, 'iter_1.pth') assert osp.exists(first_ckp_path) if platform.system() != 'Windows': assert osp.realpath(latest_path) == osp.realpath(first_ckp_path) else: # use copy instead of symlink on windows pass torch.load(latest_path) @pytest.mark.parametrize('runner_class', RUNNERS.module_dict.values()) def test_build_lr_momentum_hook(runner_class): model = Model() runner = runner_class(model=model, logger=logging.getLogger()) # test policy that is already title lr_config = dict( policy='CosineAnnealing', by_epoch=False, min_lr_ratio=0, warmup_iters=2, warmup_ratio=0.9) runner.register_lr_hook(lr_config) assert len(runner.hooks) == 1 # test policy that is already title lr_config = dict( policy='Cyclic', by_epoch=False, target_ratio=(10, 1), cyclic_times=1, step_ratio_up=0.4) runner.register_lr_hook(lr_config) assert len(runner.hooks) == 2 # test policy that is not title lr_config = dict( policy='cyclic', by_epoch=False, target_ratio=(0.85 / 0.95, 1), cyclic_times=1, step_ratio_up=0.4) runner.register_lr_hook(lr_config) assert len(runner.hooks) == 3 # test policy that is title lr_config = dict( policy='Step', warmup='linear', warmup_iters=500, warmup_ratio=1.0 / 3, step=[8, 11]) runner.register_lr_hook(lr_config) assert len(runner.hooks) == 4 # test policy that is not title lr_config = dict( policy='step', warmup='linear', warmup_iters=500, warmup_ratio=1.0 / 3, step=[8, 11]) runner.register_lr_hook(lr_config) assert len(runner.hooks) == 5 # test policy that is already title mom_config = dict( policy='CosineAnnealing', min_momentum_ratio=0.99 / 0.95, by_epoch=False, warmup_iters=2, warmup_ratio=0.9 / 0.95) runner.register_momentum_hook(mom_config) assert len(runner.hooks) == 6 # test policy that is already title mom_config = dict( policy='Cyclic', by_epoch=False, target_ratio=(0.85 / 0.95, 1), cyclic_times=1, step_ratio_up=0.4) runner.register_momentum_hook(mom_config) assert len(runner.hooks) == 7 # test policy that is already title mom_config = dict( policy='cyclic', by_epoch=False, target_ratio=(0.85 / 0.95, 1), cyclic_times=1, step_ratio_up=0.4) runner.register_momentum_hook(mom_config) assert len(runner.hooks) == 8 @pytest.mark.parametrize('runner_class', RUNNERS.module_dict.values()) def test_register_timer_hook(runner_class): model = Model() runner = runner_class(model=model, logger=logging.getLogger()) # test register None timer_config = None runner.register_timer_hook(timer_config) assert len(runner.hooks) == 0 # test register IterTimerHook with config timer_config = dict(type='IterTimerHook') runner.register_timer_hook(timer_config) assert len(runner.hooks) == 1 assert isinstance(runner.hooks[0], IterTimerHook) # test register IterTimerHook timer_config = IterTimerHook() runner.register_timer_hook(timer_config) assert len(runner.hooks) == 2 assert isinstance(runner.hooks[1], IterTimerHook) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_runner/test_utils.py ================================================ import os import random import numpy as np import torch from mmcv.runner import set_random_seed from mmcv.utils import TORCH_VERSION, digit_version is_rocm_pytorch = False if digit_version(TORCH_VERSION) >= digit_version('1.5'): from torch.utils.cpp_extension import ROCM_HOME is_rocm_pytorch = True if ((torch.version.hip is not None) and (ROCM_HOME is not None)) else False def test_set_random_seed(): set_random_seed(0) a_random = random.randint(0, 10) a_np_random = np.random.rand(2, 2) a_torch_random = torch.rand(2, 2) assert torch.backends.cudnn.deterministic is False assert torch.backends.cudnn.benchmark is False assert os.environ['PYTHONHASHSEED'] == str(0) set_random_seed(0, True) b_random = random.randint(0, 10) b_np_random = np.random.rand(2, 2) b_torch_random = torch.rand(2, 2) assert torch.backends.cudnn.deterministic is True if is_rocm_pytorch: assert torch.backends.cudnn.benchmark is True else: assert torch.backends.cudnn.benchmark is False assert a_random == b_random assert np.equal(a_np_random, b_np_random).all() assert torch.equal(a_torch_random, b_torch_random) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_config.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import argparse import json import os import os.path as osp import shutil import tempfile from pathlib import Path import pytest import yaml from mmcv import Config, ConfigDict, DictAction, dump, load data_path = osp.join(osp.dirname(osp.dirname(__file__)), 'data') def test_construct(): cfg = Config() assert cfg.filename is None assert cfg.text == '' assert len(cfg) == 0 assert cfg._cfg_dict == {} with pytest.raises(TypeError): Config([0, 1]) cfg_dict = dict(item1=[1, 2], item2=dict(a=0), item3=True, item4='test') # test a.py cfg_file = osp.join(data_path, 'config/a.py') cfg = Config(cfg_dict, filename=cfg_file) assert isinstance(cfg, Config) assert cfg.filename == cfg_file assert cfg.text == open(cfg_file, 'r').read() assert cfg.dump() == cfg.pretty_text with tempfile.TemporaryDirectory() as temp_config_dir: dump_file = osp.join(temp_config_dir, 'a.py') cfg.dump(dump_file) assert cfg.dump() == open(dump_file, 'r').read() assert Config.fromfile(dump_file) # test b.json cfg_file = osp.join(data_path, 'config/b.json') cfg = Config(cfg_dict, filename=cfg_file) assert isinstance(cfg, Config) assert cfg.filename == cfg_file assert cfg.text == open(cfg_file, 'r').read() assert cfg.dump() == json.dumps(cfg_dict) with tempfile.TemporaryDirectory() as temp_config_dir: dump_file = osp.join(temp_config_dir, 'b.json') cfg.dump(dump_file) assert cfg.dump() == open(dump_file, 'r').read() assert Config.fromfile(dump_file) # test c.yaml cfg_file = osp.join(data_path, 'config/c.yaml') cfg = Config(cfg_dict, filename=cfg_file) assert isinstance(cfg, Config) assert cfg.filename == cfg_file assert cfg.text == open(cfg_file, 'r').read() assert cfg.dump() == yaml.dump(cfg_dict) with tempfile.TemporaryDirectory() as temp_config_dir: dump_file = osp.join(temp_config_dir, 'c.yaml') cfg.dump(dump_file) assert cfg.dump() == open(dump_file, 'r').read() assert Config.fromfile(dump_file) # test h.py cfg_file = osp.join(data_path, 'config/h.py') path = osp.join(osp.dirname(__file__), 'data', 'config') # the value of osp.dirname(__file__) may be `D:\a\xxx` in windows # environment. When dumping the cfg_dict to file, `D:\a\xxx` will be # converted to `D:\x07\xxx` and it will cause unexpected result when # checking whether `D:\a\xxx` equals to `D:\x07\xxx`. Therefore, we forcely # convert a string representation of the path with forward slashes (/) path = Path(path).as_posix() cfg_dict = dict(item1='h.py', item2=path, item3='abc_h') cfg = Config(cfg_dict, filename=cfg_file) assert isinstance(cfg, Config) assert cfg.filename == cfg_file assert cfg.text == open(cfg_file, 'r').read() assert cfg.dump() == cfg.pretty_text with tempfile.TemporaryDirectory() as temp_config_dir: dump_file = osp.join(temp_config_dir, 'h.py') cfg.dump(dump_file) assert cfg.dump() == open(dump_file, 'r').read() assert Config.fromfile(dump_file) assert Config.fromfile(dump_file)['item1'] == cfg_dict['item1'] assert Config.fromfile(dump_file)['item2'] == cfg_dict['item2'] assert Config.fromfile(dump_file)['item3'] == cfg_dict['item3'] # test no use_predefined_variable cfg_dict = dict( item1='{{fileBasename}}', item2='{{ fileDirname}}', item3='abc_{{ fileBasenameNoExtension }}') assert Config.fromfile(cfg_file, False) assert Config.fromfile(cfg_file, False)['item1'] == cfg_dict['item1'] assert Config.fromfile(cfg_file, False)['item2'] == cfg_dict['item2'] assert Config.fromfile(cfg_file, False)['item3'] == cfg_dict['item3'] # test p.yaml cfg_file = osp.join(data_path, 'config/p.yaml') cfg_dict = dict(item1=osp.join(osp.dirname(__file__), 'data', 'config')) cfg = Config(cfg_dict, filename=cfg_file) assert isinstance(cfg, Config) assert cfg.filename == cfg_file assert cfg.text == open(cfg_file, 'r').read() assert cfg.dump() == yaml.dump(cfg_dict) with tempfile.TemporaryDirectory() as temp_config_dir: dump_file = osp.join(temp_config_dir, 'p.yaml') cfg.dump(dump_file) assert cfg.dump() == open(dump_file, 'r').read() assert Config.fromfile(dump_file) assert Config.fromfile(dump_file)['item1'] == cfg_dict['item1'] # test no use_predefined_variable assert Config.fromfile(cfg_file, False) assert Config.fromfile(cfg_file, False)['item1'] == '{{ fileDirname }}' # test o.json cfg_file = osp.join(data_path, 'config/o.json') cfg_dict = dict(item1=osp.join(osp.dirname(__file__), 'data', 'config')) cfg = Config(cfg_dict, filename=cfg_file) assert isinstance(cfg, Config) assert cfg.filename == cfg_file assert cfg.text == open(cfg_file, 'r').read() assert cfg.dump() == json.dumps(cfg_dict) with tempfile.TemporaryDirectory() as temp_config_dir: dump_file = osp.join(temp_config_dir, 'o.json') cfg.dump(dump_file) assert cfg.dump() == open(dump_file, 'r').read() assert Config.fromfile(dump_file) assert Config.fromfile(dump_file)['item1'] == cfg_dict['item1'] # test no use_predefined_variable assert Config.fromfile(cfg_file, False) assert Config.fromfile(cfg_file, False)['item1'] == '{{ fileDirname }}' def test_fromfile(): for filename in ['a.py', 'a.b.py', 'b.json', 'c.yaml']: cfg_file = osp.join(data_path, 'config', filename) cfg = Config.fromfile(cfg_file) assert isinstance(cfg, Config) assert cfg.filename == cfg_file assert cfg.text == osp.abspath(osp.expanduser(cfg_file)) + '\n' + \ open(cfg_file, 'r').read() # test custom_imports for Config.fromfile cfg_file = osp.join(data_path, 'config', 'q.py') imported_file = osp.join(data_path, 'config', 'r.py') target_pkg = osp.join(osp.dirname(__file__), 'r.py') # Since the imported config will be regarded as a tmp file # it should be copied to the directory at the same level shutil.copy(imported_file, target_pkg) Config.fromfile(cfg_file, import_custom_modules=True) assert os.environ.pop('TEST_VALUE') == 'test' os.remove(target_pkg) with pytest.raises(FileNotFoundError): Config.fromfile('no_such_file.py') with pytest.raises(IOError): Config.fromfile(osp.join(data_path, 'color.jpg')) def test_fromstring(): for filename in ['a.py', 'a.b.py', 'b.json', 'c.yaml']: cfg_file = osp.join(data_path, 'config', filename) file_format = osp.splitext(filename)[-1] in_cfg = Config.fromfile(cfg_file) out_cfg = Config.fromstring(in_cfg.pretty_text, '.py') assert in_cfg._cfg_dict == out_cfg._cfg_dict cfg_str = open(cfg_file, 'r').read() out_cfg = Config.fromstring(cfg_str, file_format) assert in_cfg._cfg_dict == out_cfg._cfg_dict # test pretty_text only supports py file format cfg_file = osp.join(data_path, 'config', 'b.json') in_cfg = Config.fromfile(cfg_file) with pytest.raises(Exception): Config.fromstring(in_cfg.pretty_text, '.json') # test file format error cfg_str = open(cfg_file, 'r').read() with pytest.raises(Exception): Config.fromstring(cfg_str, '.py') def test_merge_from_base(): cfg_file = osp.join(data_path, 'config/d.py') cfg = Config.fromfile(cfg_file) assert isinstance(cfg, Config) assert cfg.filename == cfg_file base_cfg_file = osp.join(data_path, 'config/base.py') merge_text = osp.abspath(osp.expanduser(base_cfg_file)) + '\n' + \ open(base_cfg_file, 'r').read() merge_text += '\n' + osp.abspath(osp.expanduser(cfg_file)) + '\n' + \ open(cfg_file, 'r').read() assert cfg.text == merge_text assert cfg.item1 == [2, 3] assert cfg.item2.a == 1 assert cfg.item3 is False assert cfg.item4 == 'test_base' with pytest.raises(TypeError): Config.fromfile(osp.join(data_path, 'config/e.py')) def test_merge_from_multiple_bases(): cfg_file = osp.join(data_path, 'config/l.py') cfg = Config.fromfile(cfg_file) assert isinstance(cfg, Config) assert cfg.filename == cfg_file # cfg.field assert cfg.item1 == [1, 2] assert cfg.item2.a == 0 assert cfg.item3 is False assert cfg.item4 == 'test' assert cfg.item5 == dict(a=0, b=1) assert cfg.item6 == [dict(a=0), dict(b=1)] assert cfg.item7 == dict(a=[0, 1, 2], b=dict(c=[3.1, 4.2, 5.3])) with pytest.raises(KeyError): Config.fromfile(osp.join(data_path, 'config/m.py')) def test_base_variables(): for file in ['t.py', 't.json', 't.yaml']: cfg_file = osp.join(data_path, f'config/{file}') cfg = Config.fromfile(cfg_file) assert isinstance(cfg, Config) assert cfg.filename == cfg_file # cfg.field assert cfg.item1 == [1, 2] assert cfg.item2.a == 0 assert cfg.item3 is False assert cfg.item4 == 'test' assert cfg.item5 == dict(a=0, b=1) assert cfg.item6 == [dict(a=0), dict(b=1)] assert cfg.item7 == dict(a=[0, 1, 2], b=dict(c=[3.1, 4.2, 5.3])) assert cfg.item8 == file assert cfg.item9 == dict(a=0) assert cfg.item10 == [3.1, 4.2, 5.3] # test nested base for file in ['u.py', 'u.json', 'u.yaml']: cfg_file = osp.join(data_path, f'config/{file}') cfg = Config.fromfile(cfg_file) assert isinstance(cfg, Config) assert cfg.filename == cfg_file # cfg.field assert cfg.base == '_base_.item8' assert cfg.item1 == [1, 2] assert cfg.item2.a == 0 assert cfg.item3 is False assert cfg.item4 == 'test' assert cfg.item5 == dict(a=0, b=1) assert cfg.item6 == [dict(a=0), dict(b=1)] assert cfg.item7 == dict(a=[0, 1, 2], b=dict(c=[3.1, 4.2, 5.3])) assert cfg.item8 == 't.py' assert cfg.item9 == dict(a=0) assert cfg.item10 == [3.1, 4.2, 5.3] assert cfg.item11 == 't.py' assert cfg.item12 == dict(a=0) assert cfg.item13 == [3.1, 4.2, 5.3] assert cfg.item14 == [1, 2] assert cfg.item15 == dict( a=dict(b=dict(a=0)), b=[False], c=['test'], d=[[{ 'e': 0 }], [{ 'a': 0 }, { 'b': 1 }]], e=[1, 2]) # test reference assignment for py cfg_file = osp.join(data_path, 'config/v.py') cfg = Config.fromfile(cfg_file) assert isinstance(cfg, Config) assert cfg.filename == cfg_file assert cfg.item21 == 't.py' assert cfg.item22 == 't.py' assert cfg.item23 == [3.1, 4.2, 5.3] assert cfg.item24 == [3.1, 4.2, 5.3] assert cfg.item25 == dict( a=dict(b=[3.1, 4.2, 5.3]), b=[[3.1, 4.2, 5.3]], c=[[{ 'e': 't.py' }], [{ 'a': 0 }, { 'b': 1 }]], e='t.py') def test_merge_recursive_bases(): cfg_file = osp.join(data_path, 'config/f.py') cfg = Config.fromfile(cfg_file) assert isinstance(cfg, Config) assert cfg.filename == cfg_file # cfg.field assert cfg.item1 == [2, 3] assert cfg.item2.a == 1 assert cfg.item3 is False assert cfg.item4 == 'test_recursive_bases' def test_merge_from_dict(): cfg_file = osp.join(data_path, 'config/a.py') cfg = Config.fromfile(cfg_file) input_options = {'item2.a': 1, 'item2.b': 0.1, 'item3': False} cfg.merge_from_dict(input_options) assert cfg.item2 == dict(a=1, b=0.1) assert cfg.item3 is False cfg_file = osp.join(data_path, 'config/s.py') cfg = Config.fromfile(cfg_file) # Allow list keys input_options = {'item.0.a': 1, 'item.1.b': 1} cfg.merge_from_dict(input_options, allow_list_keys=True) assert cfg.item == [{'a': 1}, {'b': 1, 'c': 0}] # allow_list_keys is False input_options = {'item.0.a': 1, 'item.1.b': 1} with pytest.raises(TypeError): cfg.merge_from_dict(input_options, allow_list_keys=False) # Overflowed index number input_options = {'item.2.a': 1} with pytest.raises(KeyError): cfg.merge_from_dict(input_options, allow_list_keys=True) def test_merge_delete(): cfg_file = osp.join(data_path, 'config/delete.py') cfg = Config.fromfile(cfg_file) # cfg.field assert cfg.item1 == dict(a=0) assert cfg.item2 == dict(a=0, b=0) assert cfg.item3 is True assert cfg.item4 == 'test' assert '_delete_' not in cfg.item2 # related issue: https://github.com/open-mmlab/mmcv/issues/1570 assert type(cfg.item1) == ConfigDict assert type(cfg.item2) == ConfigDict def test_merge_intermediate_variable(): cfg_file = osp.join(data_path, 'config/i_child.py') cfg = Config.fromfile(cfg_file) # cfg.field assert cfg.item1 == [1, 2] assert cfg.item2 == dict(a=0) assert cfg.item3 is True assert cfg.item4 == 'test' assert cfg.item_cfg == dict(b=2) assert cfg.item5 == dict(cfg=dict(b=1)) assert cfg.item6 == dict(cfg=dict(b=2)) def test_fromfile_in_config(): cfg_file = osp.join(data_path, 'config/code.py') cfg = Config.fromfile(cfg_file) # cfg.field assert cfg.cfg.item1 == [1, 2] assert cfg.cfg.item2 == dict(a=0) assert cfg.cfg.item3 is True assert cfg.cfg.item4 == 'test' assert cfg.item5 == 1 def test_dict(): cfg_dict = dict(item1=[1, 2], item2=dict(a=0), item3=True, item4='test') for filename in ['a.py', 'b.json', 'c.yaml']: cfg_file = osp.join(data_path, 'config', filename) cfg = Config.fromfile(cfg_file) # len(cfg) assert len(cfg) == 4 # cfg.keys() assert set(cfg.keys()) == set(cfg_dict.keys()) assert set(cfg._cfg_dict.keys()) == set(cfg_dict.keys()) # cfg.values() for value in cfg.values(): assert value in cfg_dict.values() # cfg.items() for name, value in cfg.items(): assert name in cfg_dict assert value in cfg_dict.values() # cfg.field assert cfg.item1 == cfg_dict['item1'] assert cfg.item2 == cfg_dict['item2'] assert cfg.item2.a == 0 assert cfg.item3 == cfg_dict['item3'] assert cfg.item4 == cfg_dict['item4'] with pytest.raises(AttributeError): cfg.not_exist # field in cfg, cfg[field], cfg.get() for name in ['item1', 'item2', 'item3', 'item4']: assert name in cfg assert cfg[name] == cfg_dict[name] assert cfg.get(name) == cfg_dict[name] assert cfg.get('not_exist') is None assert cfg.get('not_exist', 0) == 0 with pytest.raises(KeyError): cfg['not_exist'] assert 'item1' in cfg assert 'not_exist' not in cfg # cfg.update() cfg.update(dict(item1=0)) assert cfg.item1 == 0 cfg.update(dict(item2=dict(a=1))) assert cfg.item2.a == 1 def test_setattr(): cfg = Config() cfg.item1 = [1, 2] cfg.item2 = {'a': 0} cfg['item5'] = {'a': {'b': None}} assert cfg._cfg_dict['item1'] == [1, 2] assert cfg.item1 == [1, 2] assert cfg._cfg_dict['item2'] == {'a': 0} assert cfg.item2.a == 0 assert cfg._cfg_dict['item5'] == {'a': {'b': None}} assert cfg.item5.a.b is None def test_pretty_text(): cfg_file = osp.join(data_path, 'config/l.py') cfg = Config.fromfile(cfg_file) with tempfile.TemporaryDirectory() as temp_config_dir: text_cfg_filename = osp.join(temp_config_dir, '_text_config.py') with open(text_cfg_filename, 'w') as f: f.write(cfg.pretty_text) text_cfg = Config.fromfile(text_cfg_filename) assert text_cfg._cfg_dict == cfg._cfg_dict def test_dict_action(): parser = argparse.ArgumentParser(description='Train a detector') parser.add_argument( '--options', nargs='+', action=DictAction, help='custom options') # Nested brackets args = parser.parse_args( ['--options', 'item2.a=a,b', 'item2.b=[(a,b), [1,2], false]']) out_dict = {'item2.a': ['a', 'b'], 'item2.b': [('a', 'b'), [1, 2], False]} assert args.options == out_dict # Single Nested brackets args = parser.parse_args(['--options', 'item2.a=[[1]]']) out_dict = {'item2.a': [[1]]} assert args.options == out_dict # Imbalance bracket with pytest.raises(AssertionError): parser.parse_args(['--options', 'item2.a=[(a,b), [1,2], false']) # Normal values args = parser.parse_args( ['--options', 'item2.a=1', 'item2.b=0.1', 'item2.c=x', 'item3=false']) out_dict = {'item2.a': 1, 'item2.b': 0.1, 'item2.c': 'x', 'item3': False} assert args.options == out_dict cfg_file = osp.join(data_path, 'config/a.py') cfg = Config.fromfile(cfg_file) cfg.merge_from_dict(args.options) assert cfg.item2 == dict(a=1, b=0.1, c='x') assert cfg.item3 is False def test_dump_mapping(): cfg_file = osp.join(data_path, 'config/n.py') cfg = Config.fromfile(cfg_file) with tempfile.TemporaryDirectory() as temp_config_dir: text_cfg_filename = osp.join(temp_config_dir, '_text_config.py') cfg.dump(text_cfg_filename) text_cfg = Config.fromfile(text_cfg_filename) assert text_cfg._cfg_dict == cfg._cfg_dict def test_reserved_key(): cfg_file = osp.join(data_path, 'config/g.py') with pytest.raises(KeyError): Config.fromfile(cfg_file) def test_syntax_error(): # the name can not be used to open the file a second time in windows, # so `delete` should be set as `False` and we need to manually remove it # more details can be found at https://github.com/open-mmlab/mmcv/pull/1077 temp_cfg_file = tempfile.NamedTemporaryFile(suffix='.py', delete=False) temp_cfg_path = temp_cfg_file.name # write a file with syntax error with open(temp_cfg_path, 'w') as f: f.write('a=0b=dict(c=1)') with pytest.raises( SyntaxError, match='There are syntax errors in config file'): Config.fromfile(temp_cfg_path) temp_cfg_file.close() os.remove(temp_cfg_path) def test_pickle_support(): cfg_file = osp.join(data_path, 'config/n.py') cfg = Config.fromfile(cfg_file) with tempfile.TemporaryDirectory() as temp_config_dir: pkl_cfg_filename = osp.join(temp_config_dir, '_pickle.pkl') dump(cfg, pkl_cfg_filename) pkl_cfg = load(pkl_cfg_filename) assert pkl_cfg._cfg_dict == cfg._cfg_dict def test_deprecation(): deprecated_cfg_files = [ osp.join(data_path, 'config/deprecated.py'), osp.join(data_path, 'config/deprecated_as_base.py') ] for cfg_file in deprecated_cfg_files: with pytest.warns(DeprecationWarning): cfg = Config.fromfile(cfg_file) assert cfg.item1 == 'expected' ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_env.py ================================================ import sys import pytest import mmcv def test_collect_env(): try: import torch # noqa: F401 except ModuleNotFoundError: pytest.skip('skipping tests that require PyTorch') from mmcv.utils import collect_env env_info = collect_env() expected_keys = [ 'sys.platform', 'Python', 'CUDA available', 'PyTorch', 'PyTorch compiling details', 'OpenCV', 'MMCV', 'MMCV Compiler', 'MMCV CUDA Compiler' ] for key in expected_keys: assert key in env_info if env_info['CUDA available']: for key in ['CUDA_HOME', 'NVCC']: assert key in env_info if sys.platform != 'win32': assert 'GCC' in env_info assert env_info['sys.platform'] == sys.platform assert env_info['Python'] == sys.version.replace('\n', '') assert env_info['MMCV'] == mmcv.__version__ ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_hub.py ================================================ import pytest from torch.utils import model_zoo from mmcv.utils import TORCH_VERSION, digit_version, load_url def test_load_url(): url1 = 'https://download.openmmlab.com/mmcv/test_data/saved_in_pt1.5.pth' url2 = 'https://download.openmmlab.com/mmcv/test_data/saved_in_pt1.6.pth' # The 1.6 release of PyTorch switched torch.save to use a new zipfile-based # file format. It will cause RuntimeError when a checkpoint was saved in # torch >= 1.6.0 but loaded in torch < 1.7.0. # More details at https://github.com/open-mmlab/mmpose/issues/904 if digit_version(TORCH_VERSION) < digit_version('1.7.0'): model_zoo.load_url(url1) with pytest.raises(RuntimeError): model_zoo.load_url(url2) else: # high version of PyTorch can load checkpoints from url, regardless # of which version they were saved in model_zoo.load_url(url1) model_zoo.load_url(url2) load_url(url1) # if a checkpoint was saved in torch >= 1.6.0 but loaded in torch < 1.5.0, # it will raise a RuntimeError if digit_version(TORCH_VERSION) < digit_version('1.5.0'): with pytest.raises(RuntimeError): load_url(url2) else: load_url(url2) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_logging.py ================================================ import logging import os import platform import tempfile from unittest.mock import patch import pytest from mmcv import get_logger, print_log if platform.system() == 'Windows': import regex as re else: import re @patch('torch.distributed.get_rank', lambda: 0) @patch('torch.distributed.is_initialized', lambda: True) @patch('torch.distributed.is_available', lambda: True) def test_get_logger_rank0(): logger = get_logger('rank0.pkg1') assert isinstance(logger, logging.Logger) assert len(logger.handlers) == 1 assert isinstance(logger.handlers[0], logging.StreamHandler) assert logger.handlers[0].level == logging.INFO logger = get_logger('rank0.pkg2', log_level=logging.DEBUG) assert isinstance(logger, logging.Logger) assert len(logger.handlers) == 1 assert logger.handlers[0].level == logging.DEBUG # the name can not be used to open the file a second time in windows, # so `delete` should be set as `False` and we need to manually remove it # more details can be found at https://github.com/open-mmlab/mmcv/pull/1077 with tempfile.NamedTemporaryFile(delete=False) as f: logger = get_logger('rank0.pkg3', log_file=f.name) assert isinstance(logger, logging.Logger) assert len(logger.handlers) == 2 assert isinstance(logger.handlers[0], logging.StreamHandler) assert isinstance(logger.handlers[1], logging.FileHandler) logger_pkg3 = get_logger('rank0.pkg3') assert id(logger_pkg3) == id(logger) # flushing and closing all handlers in order to remove `f.name` logging.shutdown() os.remove(f.name) logger_pkg3 = get_logger('rank0.pkg3.subpkg') assert logger_pkg3.handlers == logger_pkg3.handlers @patch('torch.distributed.get_rank', lambda: 1) @patch('torch.distributed.is_initialized', lambda: True) @patch('torch.distributed.is_available', lambda: True) def test_get_logger_rank1(): logger = get_logger('rank1.pkg1') assert isinstance(logger, logging.Logger) assert len(logger.handlers) == 1 assert isinstance(logger.handlers[0], logging.StreamHandler) assert logger.handlers[0].level == logging.INFO # the name can not be used to open the file a second time in windows, # so `delete` should be set as `False` and we need to manually remove it # more details can be found at https://github.com/open-mmlab/mmcv/pull/1077 with tempfile.NamedTemporaryFile(delete=False) as f: logger = get_logger('rank1.pkg2', log_file=f.name) assert isinstance(logger, logging.Logger) assert len(logger.handlers) == 1 assert logger.handlers[0].level == logging.INFO # flushing and closing all handlers in order to remove `f.name` logging.shutdown() os.remove(f.name) def test_print_log_print(capsys): print_log('welcome', logger=None) out, _ = capsys.readouterr() assert out == 'welcome\n' def test_print_log_silent(capsys, caplog): print_log('welcome', logger='silent') out, _ = capsys.readouterr() assert out == '' assert len(caplog.records) == 0 def test_print_log_logger(caplog): print_log('welcome', logger='mmcv') assert caplog.record_tuples[-1] == ('mmcv', logging.INFO, 'welcome') print_log('welcome', logger='mmcv', level=logging.ERROR) assert caplog.record_tuples[-1] == ('mmcv', logging.ERROR, 'welcome') # the name can not be used to open the file a second time in windows, # so `delete` should be set as `False` and we need to manually remove it # more details can be found at https://github.com/open-mmlab/mmcv/pull/1077 with tempfile.NamedTemporaryFile(delete=False) as f: logger = get_logger('abc', log_file=f.name) print_log('welcome', logger=logger) assert caplog.record_tuples[-1] == ('abc', logging.INFO, 'welcome') with open(f.name, 'r') as fin: log_text = fin.read() regex_time = r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}' match = re.fullmatch(regex_time + r' - abc - INFO - welcome\n', log_text) assert match is not None # flushing and closing all handlers in order to remove `f.name` logging.shutdown() os.remove(f.name) def test_print_log_exception(): with pytest.raises(TypeError): print_log('welcome', logger=0) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_misc.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import pytest import mmcv from mmcv import deprecated_api_warning from mmcv.utils.misc import has_method def test_to_ntuple(): single_number = 2 assert mmcv.utils.to_1tuple(single_number) == (single_number, ) assert mmcv.utils.to_2tuple(single_number) == (single_number, single_number) assert mmcv.utils.to_3tuple(single_number) == (single_number, single_number, single_number) assert mmcv.utils.to_4tuple(single_number) == (single_number, single_number, single_number, single_number) assert mmcv.utils.to_ntuple(5)(single_number) == (single_number, single_number, single_number, single_number, single_number) assert mmcv.utils.to_ntuple(6)(single_number) == (single_number, single_number, single_number, single_number, single_number, single_number) def test_iter_cast(): assert mmcv.list_cast([1, 2, 3], int) == [1, 2, 3] assert mmcv.list_cast(['1.1', 2, '3'], float) == [1.1, 2.0, 3.0] assert mmcv.list_cast([1, 2, 3], str) == ['1', '2', '3'] assert mmcv.tuple_cast((1, 2, 3), str) == ('1', '2', '3') assert next(mmcv.iter_cast([1, 2, 3], str)) == '1' with pytest.raises(TypeError): mmcv.iter_cast([1, 2, 3], '') with pytest.raises(TypeError): mmcv.iter_cast(1, str) def test_is_seq_of(): assert mmcv.is_seq_of([1.0, 2.0, 3.0], float) assert mmcv.is_seq_of([(1, ), (2, ), (3, )], tuple) assert mmcv.is_seq_of((1.0, 2.0, 3.0), float) assert mmcv.is_list_of([1.0, 2.0, 3.0], float) assert not mmcv.is_seq_of((1.0, 2.0, 3.0), float, seq_type=list) assert not mmcv.is_tuple_of([1.0, 2.0, 3.0], float) assert not mmcv.is_seq_of([1.0, 2, 3], int) assert not mmcv.is_seq_of((1.0, 2, 3), int) def test_slice_list(): in_list = [1, 2, 3, 4, 5, 6] assert mmcv.slice_list(in_list, [1, 2, 3]) == [[1], [2, 3], [4, 5, 6]] assert mmcv.slice_list(in_list, [len(in_list)]) == [in_list] with pytest.raises(TypeError): mmcv.slice_list(in_list, 2.0) with pytest.raises(ValueError): mmcv.slice_list(in_list, [1, 2]) def test_concat_list(): assert mmcv.concat_list([[1, 2]]) == [1, 2] assert mmcv.concat_list([[1, 2], [3, 4, 5], [6]]) == [1, 2, 3, 4, 5, 6] def test_requires_package(capsys): @mmcv.requires_package('nnn') def func_a(): pass @mmcv.requires_package(['numpy', 'n1', 'n2']) def func_b(): pass @mmcv.requires_package('numpy') def func_c(): return 1 with pytest.raises(RuntimeError): func_a() out, _ = capsys.readouterr() assert out == ('Prerequisites "nnn" are required in method "func_a" but ' 'not found, please install them first.\n') with pytest.raises(RuntimeError): func_b() out, _ = capsys.readouterr() assert out == ( 'Prerequisites "n1, n2" are required in method "func_b" but not found,' ' please install them first.\n') assert func_c() == 1 def test_requires_executable(capsys): @mmcv.requires_executable('nnn') def func_a(): pass @mmcv.requires_executable(['ls', 'n1', 'n2']) def func_b(): pass @mmcv.requires_executable('mv') def func_c(): return 1 with pytest.raises(RuntimeError): func_a() out, _ = capsys.readouterr() assert out == ('Prerequisites "nnn" are required in method "func_a" but ' 'not found, please install them first.\n') with pytest.raises(RuntimeError): func_b() out, _ = capsys.readouterr() assert out == ( 'Prerequisites "n1, n2" are required in method "func_b" but not found,' ' please install them first.\n') assert func_c() == 1 def test_import_modules_from_strings(): # multiple imports import os.path as osp_ import sys as sys_ osp, sys = mmcv.import_modules_from_strings(['os.path', 'sys']) assert osp == osp_ assert sys == sys_ # single imports osp = mmcv.import_modules_from_strings('os.path') assert osp == osp_ # No imports assert mmcv.import_modules_from_strings(None) is None assert mmcv.import_modules_from_strings([]) is None assert mmcv.import_modules_from_strings('') is None # Unsupported types with pytest.raises(TypeError): mmcv.import_modules_from_strings(1) with pytest.raises(TypeError): mmcv.import_modules_from_strings([1]) # Failed imports with pytest.raises(ImportError): mmcv.import_modules_from_strings('_not_implemented_module') with pytest.warns(UserWarning): imported = mmcv.import_modules_from_strings( '_not_implemented_module', allow_failed_imports=True) assert imported is None with pytest.warns(UserWarning): imported = mmcv.import_modules_from_strings( ['os.path', '_not_implemented'], allow_failed_imports=True) assert imported[0] == osp assert imported[1] is None def test_is_method_overridden(): class Base: def foo1(): pass def foo2(): pass class Sub(Base): def foo1(): pass # test passing sub class directly assert mmcv.is_method_overridden('foo1', Base, Sub) assert not mmcv.is_method_overridden('foo2', Base, Sub) # test passing instance of sub class sub_instance = Sub() assert mmcv.is_method_overridden('foo1', Base, sub_instance) assert not mmcv.is_method_overridden('foo2', Base, sub_instance) # base_class should be a class, not instance base_instance = Base() with pytest.raises(AssertionError): mmcv.is_method_overridden('foo1', base_instance, sub_instance) def test_has_method(): class Foo: def __init__(self, name): self.name = name def print_name(self): print(self.name) foo = Foo('foo') assert not has_method(foo, 'name') assert has_method(foo, 'print_name') def test_deprecated_api_warning(): @deprecated_api_warning(name_dict=dict(old_key='new_key')) def dummy_func(new_key=1): return new_key # replace `old_key` to `new_key` assert dummy_func(old_key=2) == 2 # The expected behavior is to replace the # deprecated key `old_key` to `new_key`, # but got them in the arguments at the same time with pytest.raises(AssertionError): dummy_func(old_key=1, new_key=2) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_parrots_jit.py ================================================ import pytest import torch import mmcv from mmcv.utils import TORCH_VERSION skip_no_parrots = pytest.mark.skipif( TORCH_VERSION != 'parrots', reason='test case under parrots environment') class TestJit(object): def test_add_dict(self): @mmcv.jit def add_dict(oper): rets = oper['x'] + oper['y'] return {'result': rets} def add_dict_pyfunc(oper): rets = oper['x'] + oper['y'] return {'result': rets} a = torch.rand((3, 4)) b = torch.rand((3, 4)) oper = {'x': a, 'y': b} rets_t = add_dict(oper) rets = add_dict_pyfunc(oper) assert 'result' in rets assert (rets_t['result'] == rets['result']).all() def test_add_list(self): @mmcv.jit def add_list(oper, x, y): rets = {} for idx, pair in enumerate(oper): rets[f'k{idx}'] = pair['x'] + pair['y'] rets[f'k{len(oper)}'] = x + y return rets def add_list_pyfunc(oper, x, y): rets = {} for idx, pair in enumerate(oper): rets[f'k{idx}'] = pair['x'] + pair['y'] rets[f'k{len(oper)}'] = x + y return rets pair_num = 3 oper = [] for _ in range(pair_num): oper.append({'x': torch.rand((3, 4)), 'y': torch.rand((3, 4))}) a = torch.rand((3, 4)) b = torch.rand((3, 4)) rets = add_list_pyfunc(oper, x=a, y=b) rets_t = add_list(oper, x=a, y=b) for idx in range(pair_num + 1): assert f'k{idx}' in rets_t assert (rets[f'k{idx}'] == rets_t[f'k{idx}']).all() @skip_no_parrots def test_jit_cache(self): @mmcv.jit def func(oper): if oper['const'] > 1: return oper['x'] * 2 + oper['y'] else: return oper['x'] * 2 - oper['y'] def pyfunc(oper): if oper['const'] > 1: return oper['x'] * 2 + oper['y'] else: return oper['x'] * 2 - oper['y'] assert len(func._cache._cache) == 0 oper = {'const': 2, 'x': torch.rand((3, 4)), 'y': torch.rand((3, 4))} rets_plus = pyfunc(oper) rets_plus_t = func(oper) assert (rets_plus == rets_plus_t).all() assert len(func._cache._cache) == 1 oper['const'] = 0.5 rets_minus = pyfunc(oper) rets_minus_t = func(oper) assert (rets_minus == rets_minus_t).all() assert len(func._cache._cache) == 2 rets_a = (rets_minus_t + rets_plus_t) / 4 assert torch.allclose(oper['x'], rets_a) @skip_no_parrots def test_jit_shape(self): @mmcv.jit def func(a): return a + 1 assert len(func._cache._cache) == 0 a = torch.ones((3, 4)) r = func(a) assert r.shape == (3, 4) assert (r == 2).all() assert len(func._cache._cache) == 1 a = torch.ones((2, 3, 4)) r = func(a) assert r.shape == (2, 3, 4) assert (r == 2).all() assert len(func._cache._cache) == 2 @skip_no_parrots def test_jit_kwargs(self): @mmcv.jit def func(a, b): return torch.mean((a - b) * (a - b)) assert len(func._cache._cache) == 0 x = torch.rand((16, 32)) y = torch.rand((16, 32)) func(x, y) assert len(func._cache._cache) == 1 func(x, b=y) assert len(func._cache._cache) == 1 func(b=y, a=x) assert len(func._cache._cache) == 1 def test_jit_derivate(self): @mmcv.jit(derivate=True) def func(x, y): return (x + 2) * (y - 2) a = torch.rand((3, 4)) b = torch.rand((3, 4)) a.requires_grad = True c = func(a, b) assert c.requires_grad d = torch.empty_like(c) d.fill_(1.0) c.backward(d) assert torch.allclose(a.grad, (b - 2)) assert b.grad is None a.grad = None c = func(a, b) assert c.requires_grad d = torch.empty_like(c) d.fill_(2.7) c.backward(d) assert torch.allclose(a.grad, 2.7 * (b - 2)) assert b.grad is None def test_jit_optimize(self): @mmcv.jit(optimize=True) def func(a, b): return torch.mean((a - b) * (a - b)) def pyfunc(a, b): return torch.mean((a - b) * (a - b)) a = torch.rand((16, 32)) b = torch.rand((16, 32)) c = func(a, b) d = pyfunc(a, b) assert torch.allclose(c, d) @mmcv.skip_no_elena def test_jit_coderize(self): if not torch.cuda.is_available(): return @mmcv.jit(coderize=True) def func(a, b): return (a + b) * (a - b) def pyfunc(a, b): return (a + b) * (a - b) a = torch.rand((16, 32), device='cuda') b = torch.rand((16, 32), device='cuda') c = func(a, b) d = pyfunc(a, b) assert torch.allclose(c, d) def test_jit_value_dependent(self): @mmcv.jit def func(a, b): torch.nonzero(a) return torch.mean((a - b) * (a - b)) def pyfunc(a, b): torch.nonzero(a) return torch.mean((a - b) * (a - b)) a = torch.rand((16, 32)) b = torch.rand((16, 32)) c = func(a, b) d = pyfunc(a, b) assert torch.allclose(c, d) @skip_no_parrots def test_jit_check_input(self): def func(x): y = torch.rand_like(x) return x + y a = torch.ones((3, 4)) with pytest.raises(AssertionError): func = mmcv.jit(func, check_input=(a, )) @skip_no_parrots def test_jit_partial_shape(self): @mmcv.jit(full_shape=False) def func(a, b): return torch.mean((a - b) * (a - b)) def pyfunc(a, b): return torch.mean((a - b) * (a - b)) a = torch.rand((3, 4)) b = torch.rand((3, 4)) assert torch.allclose(func(a, b), pyfunc(a, b)) assert len(func._cache._cache) == 1 a = torch.rand((6, 5)) b = torch.rand((6, 5)) assert torch.allclose(func(a, b), pyfunc(a, b)) assert len(func._cache._cache) == 1 a = torch.rand((3, 4, 5)) b = torch.rand((3, 4, 5)) assert torch.allclose(func(a, b), pyfunc(a, b)) assert len(func._cache._cache) == 2 a = torch.rand((1, 9, 8)) b = torch.rand((1, 9, 8)) assert torch.allclose(func(a, b), pyfunc(a, b)) assert len(func._cache._cache) == 2 def test_instance_method(self): class T(object): def __init__(self, shape): self._c = torch.rand(shape) @mmcv.jit def test_method(self, x, y): return (x * self._c) + y shape = (16, 32) t = T(shape) a = torch.rand(shape) b = torch.rand(shape) res = (a * t._c) + b jit_res = t.test_method(a, b) assert torch.allclose(res, jit_res) t = T(shape) res = (a * t._c) + b jit_res = t.test_method(a, b) assert torch.allclose(res, jit_res) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_path.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import os.path as osp from pathlib import Path import pytest import mmcv def test_is_filepath(): assert mmcv.is_filepath(__file__) assert mmcv.is_filepath('abc') assert mmcv.is_filepath(Path('/etc')) assert not mmcv.is_filepath(0) def test_fopen(): assert hasattr(mmcv.fopen(__file__), 'read') assert hasattr(mmcv.fopen(Path(__file__)), 'read') def test_check_file_exist(): mmcv.check_file_exist(__file__) with pytest.raises(FileNotFoundError): mmcv.check_file_exist('no_such_file.txt') def test_scandir(): folder = osp.join(osp.dirname(osp.dirname(__file__)), 'data/for_scan') filenames = ['a.bin', '1.txt', '2.txt', '1.json', '2.json', '3.TXT'] assert set(mmcv.scandir(folder)) == set(filenames) assert set(mmcv.scandir(Path(folder))) == set(filenames) assert set(mmcv.scandir(folder, '.txt')) == set( [filename for filename in filenames if filename.endswith('.txt')]) assert set(mmcv.scandir(folder, ('.json', '.txt'))) == set([ filename for filename in filenames if filename.endswith(('.txt', '.json')) ]) assert set(mmcv.scandir(folder, '.png')) == set() # path of sep is `\\` in windows but `/` in linux, so osp.join should be # used to join string for compatibility filenames_recursive = [ 'a.bin', '1.txt', '2.txt', '1.json', '2.json', '3.TXT', osp.join('sub', '1.json'), osp.join('sub', '1.txt'), '.file' ] # .file starts with '.' and is a file so it will not be scanned assert set(mmcv.scandir(folder, recursive=True)) == set( [filename for filename in filenames_recursive if filename != '.file']) assert set(mmcv.scandir(Path(folder), recursive=True)) == set( [filename for filename in filenames_recursive if filename != '.file']) assert set(mmcv.scandir(folder, '.txt', recursive=True)) == set([ filename for filename in filenames_recursive if filename.endswith('.txt') ]) assert set( mmcv.scandir(folder, '.TXT', recursive=True, case_sensitive=False)) == set([ filename for filename in filenames_recursive if filename.endswith(('.txt', '.TXT')) ]) assert set( mmcv.scandir( folder, ('.TXT', '.JSON'), recursive=True, case_sensitive=False)) == set([ filename for filename in filenames_recursive if filename.endswith(('.txt', '.json', '.TXT')) ]) with pytest.raises(TypeError): list(mmcv.scandir(123)) with pytest.raises(TypeError): list(mmcv.scandir(folder, 111)) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_progressbar.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import os import time try: from unittest.mock import patch except ImportError: from mock import patch try: from StringIO import StringIO except ImportError: from io import StringIO import mmcv # isort:skip def reset_string_io(io): io.truncate(0) io.seek(0) class TestProgressBar: def test_start(self): out = StringIO() bar_width = 20 # without total task num prog_bar = mmcv.ProgressBar(bar_width=bar_width, file=out) assert out.getvalue() == 'completed: 0, elapsed: 0s' reset_string_io(out) prog_bar = mmcv.ProgressBar(bar_width=bar_width, start=False, file=out) assert out.getvalue() == '' reset_string_io(out) prog_bar.start() assert out.getvalue() == 'completed: 0, elapsed: 0s' # with total task num reset_string_io(out) prog_bar = mmcv.ProgressBar(10, bar_width=bar_width, file=out) assert out.getvalue() == f'[{" " * bar_width}] 0/10, elapsed: 0s, ETA:' reset_string_io(out) prog_bar = mmcv.ProgressBar( 10, bar_width=bar_width, start=False, file=out) assert out.getvalue() == '' reset_string_io(out) prog_bar.start() assert out.getvalue() == f'[{" " * bar_width}] 0/10, elapsed: 0s, ETA:' def test_update(self): out = StringIO() bar_width = 20 # without total task num prog_bar = mmcv.ProgressBar(bar_width=bar_width, file=out) time.sleep(1) reset_string_io(out) prog_bar.update() assert out.getvalue() == 'completed: 1, elapsed: 1s, 1.0 tasks/s' reset_string_io(out) # with total task num prog_bar = mmcv.ProgressBar(10, bar_width=bar_width, file=out) time.sleep(1) reset_string_io(out) prog_bar.update() assert out.getvalue() == f'\r[{">" * 2 + " " * 18}] 1/10, 1.0 ' \ 'task/s, elapsed: 1s, ETA: 9s' def test_adaptive_length(self): with patch.dict('os.environ', {'COLUMNS': '80'}): out = StringIO() bar_width = 20 prog_bar = mmcv.ProgressBar(10, bar_width=bar_width, file=out) time.sleep(1) reset_string_io(out) prog_bar.update() assert len(out.getvalue()) == 66 os.environ['COLUMNS'] = '30' reset_string_io(out) prog_bar.update() assert len(out.getvalue()) == 48 os.environ['COLUMNS'] = '60' reset_string_io(out) prog_bar.update() assert len(out.getvalue()) == 60 def sleep_1s(num): time.sleep(1) return num def test_track_progress_list(): out = StringIO() ret = mmcv.track_progress(sleep_1s, [1, 2, 3], bar_width=3, file=out) assert out.getvalue() == ( '[ ] 0/3, elapsed: 0s, ETA:' '\r[> ] 1/3, 1.0 task/s, elapsed: 1s, ETA: 2s' '\r[>> ] 2/3, 1.0 task/s, elapsed: 2s, ETA: 1s' '\r[>>>] 3/3, 1.0 task/s, elapsed: 3s, ETA: 0s\n') assert ret == [1, 2, 3] def test_track_progress_iterator(): out = StringIO() ret = mmcv.track_progress( sleep_1s, ((i for i in [1, 2, 3]), 3), bar_width=3, file=out) assert out.getvalue() == ( '[ ] 0/3, elapsed: 0s, ETA:' '\r[> ] 1/3, 1.0 task/s, elapsed: 1s, ETA: 2s' '\r[>> ] 2/3, 1.0 task/s, elapsed: 2s, ETA: 1s' '\r[>>>] 3/3, 1.0 task/s, elapsed: 3s, ETA: 0s\n') assert ret == [1, 2, 3] def test_track_iter_progress(): out = StringIO() ret = [] for num in mmcv.track_iter_progress([1, 2, 3], bar_width=3, file=out): ret.append(sleep_1s(num)) assert out.getvalue() == ( '[ ] 0/3, elapsed: 0s, ETA:' '\r[> ] 1/3, 1.0 task/s, elapsed: 1s, ETA: 2s' '\r[>> ] 2/3, 1.0 task/s, elapsed: 2s, ETA: 1s' '\r[>>>] 3/3, 1.0 task/s, elapsed: 3s, ETA: 0s\n') assert ret == [1, 2, 3] def test_track_enum_progress(): out = StringIO() ret = [] count = [] for i, num in enumerate( mmcv.track_iter_progress([1, 2, 3], bar_width=3, file=out)): ret.append(sleep_1s(num)) count.append(i) assert out.getvalue() == ( '[ ] 0/3, elapsed: 0s, ETA:' '\r[> ] 1/3, 1.0 task/s, elapsed: 1s, ETA: 2s' '\r[>> ] 2/3, 1.0 task/s, elapsed: 2s, ETA: 1s' '\r[>>>] 3/3, 1.0 task/s, elapsed: 3s, ETA: 0s\n') assert ret == [1, 2, 3] assert count == [0, 1, 2] def test_track_parallel_progress_list(): out = StringIO() results = mmcv.track_parallel_progress( sleep_1s, [1, 2, 3, 4], 2, bar_width=4, file=out) # The following cannot pass CI on Github Action # assert out.getvalue() == ( # '[ ] 0/4, elapsed: 0s, ETA:' # '\r[> ] 1/4, 1.0 task/s, elapsed: 1s, ETA: 3s' # '\r[>> ] 2/4, 2.0 task/s, elapsed: 1s, ETA: 1s' # '\r[>>> ] 3/4, 1.5 task/s, elapsed: 2s, ETA: 1s' # '\r[>>>>] 4/4, 2.0 task/s, elapsed: 2s, ETA: 0s\n') assert results == [1, 2, 3, 4] def test_track_parallel_progress_iterator(): out = StringIO() results = mmcv.track_parallel_progress( sleep_1s, ((i for i in [1, 2, 3, 4]), 4), 2, bar_width=4, file=out) # The following cannot pass CI on Github Action # assert out.getvalue() == ( # '[ ] 0/4, elapsed: 0s, ETA:' # '\r[> ] 1/4, 1.0 task/s, elapsed: 1s, ETA: 3s' # '\r[>> ] 2/4, 2.0 task/s, elapsed: 1s, ETA: 1s' # '\r[>>> ] 3/4, 1.5 task/s, elapsed: 2s, ETA: 1s' # '\r[>>>>] 4/4, 2.0 task/s, elapsed: 2s, ETA: 0s\n') assert results == [1, 2, 3, 4] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_registry.py ================================================ import pytest import mmcv def test_registry(): CATS = mmcv.Registry('cat') assert CATS.name == 'cat' assert CATS.module_dict == {} assert len(CATS) == 0 @CATS.register_module() class BritishShorthair: pass assert len(CATS) == 1 assert CATS.get('BritishShorthair') is BritishShorthair class Munchkin: pass CATS.register_module(Munchkin) assert len(CATS) == 2 assert CATS.get('Munchkin') is Munchkin assert 'Munchkin' in CATS with pytest.raises(KeyError): CATS.register_module(Munchkin) CATS.register_module(Munchkin, force=True) assert len(CATS) == 2 # force=False with pytest.raises(KeyError): @CATS.register_module() class BritishShorthair: pass @CATS.register_module(force=True) class BritishShorthair: pass assert len(CATS) == 2 assert CATS.get('PersianCat') is None assert 'PersianCat' not in CATS @CATS.register_module(name=['Siamese', 'Siamese2']) class SiameseCat: pass assert CATS.get('Siamese').__name__ == 'SiameseCat' assert CATS.get('Siamese2').__name__ == 'SiameseCat' class SphynxCat: pass CATS.register_module(name='Sphynx', module=SphynxCat) assert CATS.get('Sphynx') is SphynxCat CATS.register_module(name=['Sphynx1', 'Sphynx2'], module=SphynxCat) assert CATS.get('Sphynx2') is SphynxCat repr_str = 'Registry(name=cat, items={' repr_str += ("'BritishShorthair': .BritishShorthair'>, ") repr_str += ("'Munchkin': .Munchkin'>, ") repr_str += ("'Siamese': .SiameseCat'>, ") repr_str += ("'Siamese2': .SiameseCat'>, ") repr_str += ("'Sphynx': .SphynxCat'>, ") repr_str += ("'Sphynx1': .SphynxCat'>, ") repr_str += ("'Sphynx2': .SphynxCat'>") repr_str += '})' assert repr(CATS) == repr_str # name type with pytest.raises(TypeError): CATS.register_module(name=7474741, module=SphynxCat) # the registered module should be a class with pytest.raises(TypeError): CATS.register_module(0) # can only decorate a class with pytest.raises(TypeError): @CATS.register_module() def some_method(): pass # begin: test old APIs with pytest.warns(DeprecationWarning): CATS.register_module(SphynxCat) assert CATS.get('SphynxCat').__name__ == 'SphynxCat' with pytest.warns(DeprecationWarning): CATS.register_module(SphynxCat, force=True) assert CATS.get('SphynxCat').__name__ == 'SphynxCat' with pytest.warns(DeprecationWarning): @CATS.register_module class NewCat: pass assert CATS.get('NewCat').__name__ == 'NewCat' with pytest.warns(DeprecationWarning): CATS.deprecated_register_module(SphynxCat, force=True) assert CATS.get('SphynxCat').__name__ == 'SphynxCat' with pytest.warns(DeprecationWarning): @CATS.deprecated_register_module class CuteCat: pass assert CATS.get('CuteCat').__name__ == 'CuteCat' with pytest.warns(DeprecationWarning): @CATS.deprecated_register_module(force=True) class NewCat2: pass assert CATS.get('NewCat2').__name__ == 'NewCat2' # end: test old APIs def test_multi_scope_registry(): DOGS = mmcv.Registry('dogs') assert DOGS.name == 'dogs' assert DOGS.scope == 'test_registry' assert DOGS.module_dict == {} assert len(DOGS) == 0 @DOGS.register_module() class GoldenRetriever: pass assert len(DOGS) == 1 assert DOGS.get('GoldenRetriever') is GoldenRetriever HOUNDS = mmcv.Registry('dogs', parent=DOGS, scope='hound') @HOUNDS.register_module() class BloodHound: pass assert len(HOUNDS) == 1 assert HOUNDS.get('BloodHound') is BloodHound assert DOGS.get('hound.BloodHound') is BloodHound assert HOUNDS.get('hound.BloodHound') is BloodHound LITTLE_HOUNDS = mmcv.Registry('dogs', parent=HOUNDS, scope='little_hound') @LITTLE_HOUNDS.register_module() class Dachshund: pass assert len(LITTLE_HOUNDS) == 1 assert LITTLE_HOUNDS.get('Dachshund') is Dachshund assert LITTLE_HOUNDS.get('hound.BloodHound') is BloodHound assert HOUNDS.get('little_hound.Dachshund') is Dachshund assert DOGS.get('hound.little_hound.Dachshund') is Dachshund MID_HOUNDS = mmcv.Registry('dogs', parent=HOUNDS, scope='mid_hound') @MID_HOUNDS.register_module() class Beagle: pass assert MID_HOUNDS.get('Beagle') is Beagle assert HOUNDS.get('mid_hound.Beagle') is Beagle assert DOGS.get('hound.mid_hound.Beagle') is Beagle assert LITTLE_HOUNDS.get('hound.mid_hound.Beagle') is Beagle assert MID_HOUNDS.get('hound.BloodHound') is BloodHound assert MID_HOUNDS.get('hound.Dachshund') is None def test_build_from_cfg(): BACKBONES = mmcv.Registry('backbone') @BACKBONES.register_module() class ResNet: def __init__(self, depth, stages=4): self.depth = depth self.stages = stages @BACKBONES.register_module() class ResNeXt: def __init__(self, depth, stages=4): self.depth = depth self.stages = stages cfg = dict(type='ResNet', depth=50) model = mmcv.build_from_cfg(cfg, BACKBONES) assert isinstance(model, ResNet) assert model.depth == 50 and model.stages == 4 cfg = dict(type='ResNet', depth=50) model = mmcv.build_from_cfg(cfg, BACKBONES, default_args={'stages': 3}) assert isinstance(model, ResNet) assert model.depth == 50 and model.stages == 3 cfg = dict(type='ResNeXt', depth=50, stages=3) model = mmcv.build_from_cfg(cfg, BACKBONES) assert isinstance(model, ResNeXt) assert model.depth == 50 and model.stages == 3 cfg = dict(type=ResNet, depth=50) model = mmcv.build_from_cfg(cfg, BACKBONES) assert isinstance(model, ResNet) assert model.depth == 50 and model.stages == 4 # type defined using default_args cfg = dict(depth=50) model = mmcv.build_from_cfg( cfg, BACKBONES, default_args=dict(type='ResNet')) assert isinstance(model, ResNet) assert model.depth == 50 and model.stages == 4 cfg = dict(depth=50) model = mmcv.build_from_cfg(cfg, BACKBONES, default_args=dict(type=ResNet)) assert isinstance(model, ResNet) assert model.depth == 50 and model.stages == 4 # not a registry with pytest.raises(TypeError): cfg = dict(type='VGG') model = mmcv.build_from_cfg(cfg, 'BACKBONES') # non-registered class with pytest.raises(KeyError): cfg = dict(type='VGG') model = mmcv.build_from_cfg(cfg, BACKBONES) # default_args must be a dict or None with pytest.raises(TypeError): cfg = dict(type='ResNet', depth=50) model = mmcv.build_from_cfg(cfg, BACKBONES, default_args=1) # cfg['type'] should be a str or class with pytest.raises(TypeError): cfg = dict(type=1000) model = mmcv.build_from_cfg(cfg, BACKBONES) # cfg should contain the key "type" with pytest.raises(KeyError, match='must contain the key "type"'): cfg = dict(depth=50, stages=4) model = mmcv.build_from_cfg(cfg, BACKBONES) # cfg or default_args should contain the key "type" with pytest.raises(KeyError, match='must contain the key "type"'): cfg = dict(depth=50) model = mmcv.build_from_cfg( cfg, BACKBONES, default_args=dict(stages=4)) # incorrect registry type with pytest.raises(TypeError): cfg = dict(type='ResNet', depth=50) model = mmcv.build_from_cfg(cfg, 'BACKBONES') # incorrect default_args type with pytest.raises(TypeError): cfg = dict(type='ResNet', depth=50) model = mmcv.build_from_cfg(cfg, BACKBONES, default_args=0) # incorrect arguments with pytest.raises(TypeError): cfg = dict(type='ResNet', non_existing_arg=50) model = mmcv.build_from_cfg(cfg, BACKBONES) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_testing.py ================================================ import numpy as np import pytest import mmcv try: import torch except ImportError: torch = None else: import torch.nn as nn def test_assert_dict_contains_subset(): dict_obj = {'a': 'test1', 'b': 2, 'c': (4, 6)} # case 1 expected_subset = {'a': 'test1', 'b': 2, 'c': (4, 6)} assert mmcv.assert_dict_contains_subset(dict_obj, expected_subset) # case 2 expected_subset = {'a': 'test1', 'b': 2, 'c': (6, 4)} assert not mmcv.assert_dict_contains_subset(dict_obj, expected_subset) # case 3 expected_subset = {'a': 'test1', 'b': 2, 'c': None} assert not mmcv.assert_dict_contains_subset(dict_obj, expected_subset) # case 4 expected_subset = {'a': 'test1', 'b': 2, 'd': (4, 6)} assert not mmcv.assert_dict_contains_subset(dict_obj, expected_subset) # case 5 dict_obj = { 'a': 'test1', 'b': 2, 'c': (4, 6), 'd': np.array([[5, 3, 5], [1, 2, 3]]) } expected_subset = { 'a': 'test1', 'b': 2, 'c': (4, 6), 'd': np.array([[5, 3, 5], [6, 2, 3]]) } assert not mmcv.assert_dict_contains_subset(dict_obj, expected_subset) # case 6 dict_obj = {'a': 'test1', 'b': 2, 'c': (4, 6), 'd': np.array([[1]])} expected_subset = {'a': 'test1', 'b': 2, 'c': (4, 6), 'd': np.array([[1]])} assert mmcv.assert_dict_contains_subset(dict_obj, expected_subset) if torch is not None: dict_obj = { 'a': 'test1', 'b': 2, 'c': (4, 6), 'd': torch.tensor([5, 3, 5]) } # case 7 expected_subset = {'d': torch.tensor([5, 5, 5])} assert not mmcv.assert_dict_contains_subset(dict_obj, expected_subset) # case 8 expected_subset = {'d': torch.tensor([[5, 3, 5], [4, 1, 2]])} assert not mmcv.assert_dict_contains_subset(dict_obj, expected_subset) def test_assert_attrs_equal(): class TestExample(object): a, b, c = 1, ('wvi', 3), [4.5, 3.14] def test_func(self): return self.b # case 1 assert mmcv.assert_attrs_equal(TestExample, { 'a': 1, 'b': ('wvi', 3), 'c': [4.5, 3.14] }) # case 2 assert not mmcv.assert_attrs_equal(TestExample, { 'a': 1, 'b': ('wvi', 3), 'c': [4.5, 3.14, 2] }) # case 3 assert not mmcv.assert_attrs_equal(TestExample, { 'bc': 54, 'c': [4.5, 3.14] }) # case 4 assert mmcv.assert_attrs_equal(TestExample, { 'b': ('wvi', 3), 'test_func': TestExample.test_func }) if torch is not None: class TestExample(object): a, b = torch.tensor([1]), torch.tensor([4, 5]) # case 5 assert mmcv.assert_attrs_equal(TestExample, { 'a': torch.tensor([1]), 'b': torch.tensor([4, 5]) }) # case 6 assert not mmcv.assert_attrs_equal(TestExample, { 'a': torch.tensor([1]), 'b': torch.tensor([4, 6]) }) assert_dict_has_keys_data_1 = [({ 'res_layer': 1, 'norm_layer': 2, 'dense_layer': 3 })] assert_dict_has_keys_data_2 = [(['res_layer', 'dense_layer'], True), (['res_layer', 'conv_layer'], False)] @pytest.mark.parametrize('obj', assert_dict_has_keys_data_1) @pytest.mark.parametrize('expected_keys, ret_value', assert_dict_has_keys_data_2) def test_assert_dict_has_keys(obj, expected_keys, ret_value): assert mmcv.assert_dict_has_keys(obj, expected_keys) == ret_value assert_keys_equal_data_1 = [(['res_layer', 'norm_layer', 'dense_layer'])] assert_keys_equal_data_2 = [(['res_layer', 'norm_layer', 'dense_layer'], True), (['res_layer', 'dense_layer', 'norm_layer'], True), (['res_layer', 'norm_layer'], False), (['res_layer', 'conv_layer', 'norm_layer'], False)] @pytest.mark.parametrize('result_keys', assert_keys_equal_data_1) @pytest.mark.parametrize('target_keys, ret_value', assert_keys_equal_data_2) def test_assert_keys_equal(result_keys, target_keys, ret_value): assert mmcv.assert_keys_equal(result_keys, target_keys) == ret_value @pytest.mark.skipif(torch is None, reason='requires torch library') def test_assert_is_norm_layer(): # case 1 assert not mmcv.assert_is_norm_layer(nn.Conv3d(3, 64, 3)) # case 2 assert mmcv.assert_is_norm_layer(nn.BatchNorm3d(128)) # case 3 assert mmcv.assert_is_norm_layer(nn.GroupNorm(8, 64)) # case 4 assert not mmcv.assert_is_norm_layer(nn.Sigmoid()) @pytest.mark.skipif(torch is None, reason='requires torch library') def test_assert_params_all_zeros(): demo_module = nn.Conv2d(3, 64, 3) nn.init.constant_(demo_module.weight, 0) nn.init.constant_(demo_module.bias, 0) assert mmcv.assert_params_all_zeros(demo_module) nn.init.xavier_normal_(demo_module.weight) nn.init.constant_(demo_module.bias, 0) assert not mmcv.assert_params_all_zeros(demo_module) demo_module = nn.Linear(2048, 400, bias=False) nn.init.constant_(demo_module.weight, 0) assert mmcv.assert_params_all_zeros(demo_module) nn.init.normal_(demo_module.weight, mean=0, std=0.01) assert not mmcv.assert_params_all_zeros(demo_module) def test_check_python_script(capsys): mmcv.utils.check_python_script('./tests/data/scripts/hello.py zz') captured = capsys.readouterr().out assert captured == 'hello zz!\n' mmcv.utils.check_python_script('./tests/data/scripts/hello.py agent') captured = capsys.readouterr().out assert captured == 'hello agent!\n' # Make sure that wrong cmd raises an error with pytest.raises(SystemExit): mmcv.utils.check_python_script('./tests/data/scripts/hello.py li zz') ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_timer.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import time import pytest import mmcv def test_timer_init(): timer = mmcv.Timer(start=False) assert not timer.is_running timer.start() assert timer.is_running timer = mmcv.Timer() assert timer.is_running def test_timer_run(): timer = mmcv.Timer() time.sleep(1) assert abs(timer.since_start() - 1) < 1e-2 time.sleep(1) assert abs(timer.since_last_check() - 1) < 1e-2 assert abs(timer.since_start() - 2) < 1e-2 timer = mmcv.Timer(False) with pytest.raises(mmcv.TimerError): timer.since_start() with pytest.raises(mmcv.TimerError): timer.since_last_check() def test_timer_context(capsys): with mmcv.Timer(): time.sleep(1) out, _ = capsys.readouterr() assert abs(float(out) - 1) < 1e-2 with mmcv.Timer(print_tmpl='time: {:.1f}s'): time.sleep(1) out, _ = capsys.readouterr() assert out == 'time: 1.0s\n' ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_trace.py ================================================ import pytest import torch from mmcv.utils import digit_version, is_jit_tracing @pytest.mark.skipif( digit_version(torch.__version__) < digit_version('1.6.0'), reason='torch.jit.is_tracing is not available before 1.6.0') def test_is_jit_tracing(): def foo(x): if is_jit_tracing(): return x else: return x.tolist() x = torch.rand(3) # test without trace assert isinstance(foo(x), list) # test with trace traced_foo = torch.jit.trace(foo, (torch.rand(1), )) assert isinstance(traced_foo(x), torch.Tensor) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_version_utils.py ================================================ from unittest.mock import patch import pytest from mmcv import get_git_hash, parse_version_info from mmcv.utils import digit_version def test_digit_version(): assert digit_version('0.2.16') == (0, 2, 16, 0, 0, 0) assert digit_version('1.2.3') == (1, 2, 3, 0, 0, 0) assert digit_version('1.2.3rc0') == (1, 2, 3, 0, -1, 0) assert digit_version('1.2.3rc1') == (1, 2, 3, 0, -1, 1) assert digit_version('1.0rc0') == (1, 0, 0, 0, -1, 0) assert digit_version('1.0') == digit_version('1.0.0') assert digit_version('1.5.0+cuda90_cudnn7.6.3_lms') == digit_version('1.5') assert digit_version('1.0.0dev') < digit_version('1.0.0a') assert digit_version('1.0.0a') < digit_version('1.0.0a1') assert digit_version('1.0.0a') < digit_version('1.0.0b') assert digit_version('1.0.0b') < digit_version('1.0.0rc') assert digit_version('1.0.0rc1') < digit_version('1.0.0') assert digit_version('1.0.0') < digit_version('1.0.0post') assert digit_version('1.0.0post') < digit_version('1.0.0post1') assert digit_version('v1') == (1, 0, 0, 0, 0, 0) assert digit_version('v1.1.5') == (1, 1, 5, 0, 0, 0) with pytest.raises(AssertionError): digit_version('a') with pytest.raises(AssertionError): digit_version('1x') with pytest.raises(AssertionError): digit_version('1.x') def test_parse_version_info(): assert parse_version_info('0.2.16') == (0, 2, 16, 0, 0, 0) assert parse_version_info('1.2.3') == (1, 2, 3, 0, 0, 0) assert parse_version_info('1.2.3rc0') == (1, 2, 3, 0, 'rc', 0) assert parse_version_info('1.2.3rc1') == (1, 2, 3, 0, 'rc', 1) assert parse_version_info('1.0rc0') == (1, 0, 0, 0, 'rc', 0) def _mock_cmd_success(cmd): return '3b46d33e90c397869ad5103075838fdfc9812aa0'.encode('ascii') def _mock_cmd_fail(cmd): raise OSError def test_get_git_hash(): with patch('mmcv.utils.version_utils._minimal_ext_cmd', _mock_cmd_success): assert get_git_hash() == '3b46d33e90c397869ad5103075838fdfc9812aa0' assert get_git_hash(digits=6) == '3b46d3' assert get_git_hash(digits=100) == get_git_hash() with patch('mmcv.utils.version_utils._minimal_ext_cmd', _mock_cmd_fail): assert get_git_hash() == 'unknown' assert get_git_hash(fallback='n/a') == 'n/a' ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_video/test_optflow.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import os import os.path as osp import tempfile import cv2 import numpy as np import pytest from numpy.testing import assert_array_almost_equal, assert_array_equal import mmcv def test_flowread(): data_dir = osp.join(osp.dirname(__file__), '../data') flow_shape = (60, 80, 2) # read .flo file flow = mmcv.flowread(osp.join(data_dir, 'optflow.flo')) assert flow.shape == flow_shape # pseudo read flow_same = mmcv.flowread(flow) assert_array_equal(flow, flow_same) # read quantized flow concatenated vertically flow = mmcv.flowread( osp.join(data_dir, 'optflow_concat0.jpg'), quantize=True, denorm=True) assert flow.shape == flow_shape # read quantized flow concatenated horizontally flow = mmcv.flowread( osp.join(data_dir, 'optflow_concat1.jpg'), quantize=True, concat_axis=1, denorm=True) assert flow.shape == flow_shape # test exceptions notflow_file = osp.join(data_dir, 'color.jpg') with pytest.raises(TypeError): mmcv.flowread(1) with pytest.raises(IOError): mmcv.flowread(notflow_file) with pytest.raises(IOError): mmcv.flowread(notflow_file, quantize=True) with pytest.raises(ValueError): mmcv.flowread(np.zeros((100, 100, 1))) def test_flowwrite(): flow = np.random.rand(100, 100, 2).astype(np.float32) # write to a .flo file tmp_filehandler, filename = tempfile.mkstemp() mmcv.flowwrite(flow, filename) flow_from_file = mmcv.flowread(filename) assert_array_equal(flow, flow_from_file) os.close(tmp_filehandler) os.remove(filename) # write to two .jpg files tmp_filename = osp.join(tempfile.gettempdir(), 'mmcv_test_flow.jpg') for concat_axis in range(2): mmcv.flowwrite( flow, tmp_filename, quantize=True, concat_axis=concat_axis) shape = (200, 100) if concat_axis == 0 else (100, 200) assert osp.isfile(tmp_filename) assert mmcv.imread(tmp_filename, flag='unchanged').shape == shape os.remove(tmp_filename) # test exceptions with pytest.raises(AssertionError): mmcv.flowwrite(flow, tmp_filename, quantize=True, concat_axis=2) def test_quantize_flow(): flow = (np.random.rand(10, 8, 2).astype(np.float32) - 0.5) * 15 max_val = 5.0 dx, dy = mmcv.quantize_flow(flow, max_val=max_val, norm=False) ref = np.zeros_like(flow, dtype=np.uint8) for i in range(ref.shape[0]): for j in range(ref.shape[1]): for k in range(ref.shape[2]): val = flow[i, j, k] + max_val val = min(max(val, 0), 2 * max_val) ref[i, j, k] = min(np.floor(255 * val / (2 * max_val)), 254) assert_array_equal(dx, ref[..., 0]) assert_array_equal(dy, ref[..., 1]) max_val = 0.5 dx, dy = mmcv.quantize_flow(flow, max_val=max_val, norm=True) ref = np.zeros_like(flow, dtype=np.uint8) for i in range(ref.shape[0]): for j in range(ref.shape[1]): for k in range(ref.shape[2]): scale = flow.shape[1] if k == 0 else flow.shape[0] val = flow[i, j, k] / scale + max_val val = min(max(val, 0), 2 * max_val) ref[i, j, k] = min(np.floor(255 * val / (2 * max_val)), 254) assert_array_equal(dx, ref[..., 0]) assert_array_equal(dy, ref[..., 1]) def test_dequantize_flow(): dx = np.random.randint(256, size=(10, 8), dtype=np.uint8) dy = np.random.randint(256, size=(10, 8), dtype=np.uint8) max_val = 5.0 flow = mmcv.dequantize_flow(dx, dy, max_val=max_val, denorm=False) ref = np.zeros_like(flow, dtype=np.float32) for i in range(ref.shape[0]): for j in range(ref.shape[1]): ref[i, j, 0] = float(dx[i, j] + 0.5) * 2 * max_val / 255 - max_val ref[i, j, 1] = float(dy[i, j] + 0.5) * 2 * max_val / 255 - max_val assert_array_almost_equal(flow, ref) max_val = 0.5 flow = mmcv.dequantize_flow(dx, dy, max_val=max_val, denorm=True) h, w = dx.shape ref = np.zeros_like(flow, dtype=np.float32) for i in range(ref.shape[0]): for j in range(ref.shape[1]): ref[i, j, 0] = (float(dx[i, j] + 0.5) * 2 * max_val / 255 - max_val) * w ref[i, j, 1] = (float(dy[i, j] + 0.5) * 2 * max_val / 255 - max_val) * h assert_array_almost_equal(flow, ref) def test_flow2rgb(): flow = np.array([[[0, 0], [0.5, 0.5], [1, 1], [2, 1], [3, np.inf]]], dtype=np.float32) flow_img = mmcv.flow2rgb(flow) # yapf: disable assert_array_almost_equal( flow_img, np.array([[[1., 1., 1.], [1., 0.826074731, 0.683772236], [1., 0.652149462, 0.367544472], [1., 0.265650552, 5.96046448e-08], [0., 0., 0.]]], dtype=np.float32)) # yapf: enable def test_flow_warp(): img = np.zeros((5, 5, 3)) img[2, 2, 0] = 1 flow = np.ones((5, 5, 2)) res_nn = mmcv.flow_warp(img, flow, interpolate_mode='nearest') res_bi = mmcv.flow_warp(img, flow, interpolate_mode='bilinear') assert_array_almost_equal(res_nn, res_bi, decimal=5) img = np.zeros((5, 5, 1)) img[2, 2, 0] = 1 img[2, 3, 0] = 0.75 flow = np.zeros((5, 5, 2)) flow[2, 2, :] = [0.5, 0.7] res_ = np.copy(img) res_[2, 2] = 0.5 * 0.3 + 0.75 * 0.5 * 0.3 res_bi = mmcv.flow_warp(img, flow, interpolate_mode='bilinear') assert_array_almost_equal(res_, res_bi, decimal=5) with pytest.raises(NotImplementedError): _ = mmcv.flow_warp(img, flow, interpolate_mode='xxx') with pytest.raises(AssertionError): _ = mmcv.flow_warp(img, flow[:, :, 0], interpolate_mode='xxx') def test_make_color_wheel(): default_color_wheel = mmcv.make_color_wheel() color_wheel = mmcv.make_color_wheel([2, 2, 2, 2, 2, 2]) # yapf: disable assert_array_equal(default_color_wheel, np.array( [[1. , 0. , 0. ], # noqa [1. , 0.06666667, 0. ], # noqa [1. , 0.13333334, 0. ], # noqa [1. , 0.2 , 0. ], # noqa [1. , 0.26666668, 0. ], # noqa [1. , 0.33333334, 0. ], # noqa [1. , 0.4 , 0. ], # noqa [1. , 0.46666667, 0. ], # noqa [1. , 0.53333336, 0. ], # noqa [1. , 0.6 , 0. ], # noqa [1. , 0.6666667 , 0. ], # noqa [1. , 0.73333335, 0. ], # noqa [1. , 0.8 , 0. ], # noqa [1. , 0.8666667 , 0. ], # noqa [1. , 0.93333334, 0. ], # noqa [1. , 1. , 0. ], # noqa [0.8333333 , 1. , 0. ], # noqa [0.6666667 , 1. , 0. ], # noqa [0.5 , 1. , 0. ], # noqa [0.33333334, 1. , 0. ], # noqa [0.16666667, 1. , 0. ], # noqa [0. , 1. , 0. ], # noqa [0. , 1. , 0.25 ], # noqa [0. , 1. , 0.5 ], # noqa [0. , 1. , 0.75 ], # noqa [0. , 1. , 1. ], # noqa [0. , 0.90909094, 1. ], # noqa [0. , 0.8181818 , 1. ], # noqa [0. , 0.72727275, 1. ], # noqa [0. , 0.6363636 , 1. ], # noqa [0. , 0.54545456, 1. ], # noqa [0. , 0.45454547, 1. ], # noqa [0. , 0.36363637, 1. ], # noqa [0. , 0.27272728, 1. ], # noqa [0. , 0.18181819, 1. ], # noqa [0. , 0.09090909, 1. ], # noqa [0. , 0. , 1. ], # noqa [0.07692308, 0. , 1. ], # noqa [0.15384616, 0. , 1. ], # noqa [0.23076923, 0. , 1. ], # noqa [0.30769232, 0. , 1. ], # noqa [0.3846154 , 0. , 1. ], # noqa [0.46153846, 0. , 1. ], # noqa [0.53846157, 0. , 1. ], # noqa [0.61538464, 0. , 1. ], # noqa [0.6923077 , 0. , 1. ], # noqa [0.7692308 , 0. , 1. ], # noqa [0.84615386, 0. , 1. ], # noqa [0.9230769 , 0. , 1. ], # noqa [1. , 0. , 1. ], # noqa [1. , 0. , 0.8333333 ], # noqa [1. , 0. , 0.6666667 ], # noqa [1. , 0. , 0.5 ], # noqa [1. , 0. , 0.33333334], # noqa [1. , 0. , 0.16666667]], dtype=np.float32)) # noqa assert_array_equal( color_wheel, np.array([[1., 0. , 0. ], # noqa [1. , 0.5, 0. ], # noqa [1. , 1. , 0. ], # noqa [0.5, 1. , 0. ], # noqa [0. , 1. , 0. ], # noqa [0. , 1. , 0.5], # noqa [0. , 1. , 1. ], # noqa [0. , 0.5, 1. ], # noqa [0. , 0. , 1. ], # noqa [0.5, 0. , 1. ], # noqa [1. , 0. , 1. ], # noqa [1. , 0. , 0.5]], dtype=np.float32)) # noqa # yapf: enable def test_flow_from_bytes(): data_dir = osp.join(osp.dirname(__file__), '../data') flow_shape = (60, 80, 2) flow_file = osp.join(data_dir, 'optflow.flo') # read .flo file flow_fromfile = mmcv.flowread(flow_file) with open(flow_file, 'rb') as f: flow_bytes = f.read() flow_frombytes = mmcv.flow_from_bytes(flow_bytes) assert flow_frombytes.shape == flow_shape assert np.all(flow_frombytes == flow_fromfile) def test_sparse_flow_from_bytes(): data_dir = osp.join(osp.dirname(__file__), '../data') flow_file = osp.join(data_dir, 'sparse_flow.png') with open(flow_file, 'rb') as f: flow_bytes = f.read() # read flow from bytes flow_frombytes, valid_frombytes = mmcv.sparse_flow_from_bytes(flow_bytes) # test flow shape is [H, W, 2] and valid shape is [H, W] assert flow_frombytes.shape[:2] == valid_frombytes.shape assert flow_frombytes.shape[2] == 2 def read_sparse_flow_from_file(): flow = cv2.imread(flow_file, cv2.IMREAD_ANYDEPTH | cv2.IMREAD_COLOR) flow = flow[:, :, ::-1].astype(np.float32) flow, valid = flow[:, :, :2], flow[:, :, 2] flow = (flow - 2**15) / 64.0 return flow, valid # read flow from file flow_flowfile, valid_fromfile = read_sparse_flow_from_file() assert np.all(flow_frombytes == flow_flowfile) assert np.all(valid_frombytes == valid_fromfile) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_video/test_processing.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import os import os.path as osp import platform import tempfile import pytest import mmcv class TestVideoEditor: @classmethod def setup_class(cls): cls.video_path = osp.join(osp.dirname(__file__), '../data/test.mp4') cls.num_frames = 168 @pytest.mark.skipif(platform.system() == 'Windows', reason='skip windows') def test_cut_concat_video(self): part1_file = osp.join(tempfile.gettempdir(), '.mmcv_test1.mp4') part2_file = osp.join(tempfile.gettempdir(), '.mmcv_test2.mp4') mmcv.cut_video(self.video_path, part1_file, end=3, vcodec='h264') mmcv.cut_video(self.video_path, part2_file, start=3, vcodec='h264') v1 = mmcv.VideoReader(part1_file) v2 = mmcv.VideoReader(part2_file) assert len(v1) == 75 assert len(v2) == self.num_frames - 75 out_file = osp.join(tempfile.gettempdir(), '.mmcv_test.mp4') mmcv.concat_video([part1_file, part2_file], out_file) v = mmcv.VideoReader(out_file) assert len(v) == self.num_frames os.remove(part1_file) os.remove(part2_file) os.remove(out_file) @pytest.mark.skipif(platform.system() == 'Windows', reason='skip windows') def test_resize_video(self): out_file = osp.join(tempfile.gettempdir(), '.mmcv_test.mp4') mmcv.resize_video( self.video_path, out_file, (200, 100), log_level='panic') v = mmcv.VideoReader(out_file) assert v.resolution == (200, 100) os.remove(out_file) mmcv.resize_video(self.video_path, out_file, ratio=2) v = mmcv.VideoReader(out_file) assert v.resolution == (294 * 2, 240 * 2) os.remove(out_file) mmcv.resize_video(self.video_path, out_file, (1000, 480), keep_ar=True) v = mmcv.VideoReader(out_file) assert v.resolution == (294 * 2, 240 * 2) os.remove(out_file) mmcv.resize_video( self.video_path, out_file, ratio=(2, 1.5), keep_ar=True) v = mmcv.VideoReader(out_file) assert v.resolution == (294 * 2, 360) os.remove(out_file) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_video/test_reader.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import os import os.path as osp import shutil import tempfile from collections import OrderedDict import pytest import mmcv class TestCache: def test_init(self): with pytest.raises(ValueError): mmcv.Cache(0) cache = mmcv.Cache(100) assert cache.capacity == 100 assert cache.size == 0 def test_put(self): cache = mmcv.Cache(3) for i in range(1, 4): cache.put(f'k{i}', i) assert cache.size == i assert cache._cache == OrderedDict([('k1', 1), ('k2', 2), ('k3', 3)]) cache.put('k4', 4) assert cache.size == 3 assert cache._cache == OrderedDict([('k2', 2), ('k3', 3), ('k4', 4)]) cache.put('k2', 2) assert cache._cache == OrderedDict([('k2', 2), ('k3', 3), ('k4', 4)]) def test_get(self): cache = mmcv.Cache(3) assert cache.get('key_none') is None assert cache.get('key_none', 0) == 0 cache.put('k1', 1) assert cache.get('k1') == 1 class TestVideoReader: @classmethod def setup_class(cls): cls.video_path = osp.join(osp.dirname(__file__), '../data/test.mp4') cls.num_frames = 168 cls.video_url = 'https://www.learningcontainer.com/wp-content/uploads/2020/05/sample-mp4-file.mp4' # noqa: E501 def test_load(self): # read from video file v = mmcv.VideoReader(self.video_path) assert v.width == 294 assert v.height == 240 assert v.fps == 25 assert v.frame_cnt == self.num_frames assert len(v) == self.num_frames assert v.opened import cv2 assert isinstance(v.vcap, type(cv2.VideoCapture())) # read from video url v = mmcv.VideoReader(self.video_url) assert v.width == 320 assert v.height == 240 assert v.fps == 15 assert v.frame_cnt == 1889 assert len(v) == 1889 assert v.opened assert isinstance(v.vcap, type(cv2.VideoCapture())) def test_read(self): v = mmcv.VideoReader(self.video_path) img = v.read() assert int(round(img.mean())) == 94 img = v.get_frame(63) assert int(round(img.mean())) == 94 img = v[64] assert int(round(img.mean())) == 205 img = v[-104] assert int(round(img.mean())) == 205 img = v[63] assert int(round(img.mean())) == 94 img = v[-105] assert int(round(img.mean())) == 94 img = v.read() assert int(round(img.mean())) == 205 with pytest.raises(IndexError): v.get_frame(self.num_frames + 1) with pytest.raises(IndexError): v[-self.num_frames - 1] def test_slice(self): v = mmcv.VideoReader(self.video_path) imgs = v[-105:-103] assert int(round(imgs[0].mean())) == 94 assert int(round(imgs[1].mean())) == 205 assert len(imgs) == 2 imgs = v[63:65] assert int(round(imgs[0].mean())) == 94 assert int(round(imgs[1].mean())) == 205 assert len(imgs) == 2 imgs = v[64:62:-1] assert int(round(imgs[0].mean())) == 205 assert int(round(imgs[1].mean())) == 94 assert len(imgs) == 2 imgs = v[:5] assert len(imgs) == 5 for img in imgs: assert int(round(img.mean())) == 94 imgs = v[165:] assert len(imgs) == 3 for img in imgs: assert int(round(img.mean())) == 0 imgs = v[-3:] assert len(imgs) == 3 for img in imgs: assert int(round(img.mean())) == 0 def test_current_frame(self): v = mmcv.VideoReader(self.video_path) assert v.current_frame() is None v.read() img = v.current_frame() assert int(round(img.mean())) == 94 def test_position(self): v = mmcv.VideoReader(self.video_path) assert v.position == 0 for _ in range(10): v.read() assert v.position == 10 v.get_frame(99) assert v.position == 100 def test_iterator(self): cnt = 0 for img in mmcv.VideoReader(self.video_path): cnt += 1 assert img.shape == (240, 294, 3) assert cnt == self.num_frames def test_with(self): with mmcv.VideoReader(self.video_path) as v: assert v.opened assert not v.opened def test_cvt2frames(self): v = mmcv.VideoReader(self.video_path) frame_dir = tempfile.mkdtemp() v.cvt2frames(frame_dir) assert osp.isdir(frame_dir) for i in range(self.num_frames): filename = f'{frame_dir}/{i:06d}.jpg' assert osp.isfile(filename) os.remove(filename) v = mmcv.VideoReader(self.video_path) v.cvt2frames(frame_dir, show_progress=False) assert osp.isdir(frame_dir) for i in range(self.num_frames): filename = f'{frame_dir}/{i:06d}.jpg' assert osp.isfile(filename) os.remove(filename) v = mmcv.VideoReader(self.video_path) v.cvt2frames( frame_dir, file_start=100, filename_tmpl='{:03d}.JPEG', start=100, max_num=20) assert osp.isdir(frame_dir) for i in range(100, 120): filename = f'{frame_dir}/{i:03d}.JPEG' assert osp.isfile(filename) os.remove(filename) shutil.rmtree(frame_dir) def test_frames2video(self): v = mmcv.VideoReader(self.video_path) frame_dir = tempfile.mkdtemp() v.cvt2frames(frame_dir) assert osp.isdir(frame_dir) for i in range(self.num_frames): filename = f'{frame_dir}/{i:06d}.jpg' assert osp.isfile(filename) out_filename = osp.join(tempfile.gettempdir(), 'mmcv_test.avi') mmcv.frames2video(frame_dir, out_filename) v = mmcv.VideoReader(out_filename) assert v.fps == 30 assert len(v) == self.num_frames mmcv.frames2video( frame_dir, out_filename, fps=25, start=10, end=50, show_progress=False) with mmcv.VideoReader(out_filename) as v: assert v.fps == 25 assert len(v) == 40 for i in range(self.num_frames): filename = f'{frame_dir}/{i:06d}.jpg' os.remove(filename) shutil.rmtree(frame_dir) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_visualization.py ================================================ # Copyright (c) OpenMMLab. All rights reserved. import numpy as np import pytest import mmcv def test_color(): assert mmcv.color_val(mmcv.Color.blue) == (255, 0, 0) assert mmcv.color_val('green') == (0, 255, 0) assert mmcv.color_val((1, 2, 3)) == (1, 2, 3) assert mmcv.color_val(100) == (100, 100, 100) assert mmcv.color_val(np.zeros(3, dtype=int)) == (0, 0, 0) with pytest.raises(TypeError): mmcv.color_val([255, 255, 255]) with pytest.raises(TypeError): mmcv.color_val(1.0) with pytest.raises(AssertionError): mmcv.color_val((0, 0, 500)) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/common/dataset.py ================================================ # GPL License # Copyright (C) 2021 , UESTC # All Rights Reserved # @Author : Xiao Wu, LiangJian Deng # @reference: import torch.utils.data as data import torch import h5py import cv2 import numpy as np class Dataset_Pro(data.Dataset): def __init__(self, file_path, img_scale): super(Dataset_Pro, self).__init__() data = h5py.File(file_path) # NxCxHxW = 0x1x2x3 print(f"loading Dataset_Pro: {file_path} with {img_scale}") # tensor type: gt1 = data["gt"][...] # convert to np tpye for CV2.filter gt1 = np.array(gt1, dtype=np.float32) / img_scale self.gt = torch.from_numpy(gt1) # NxCxHxW: ms1 = data["ms"][...] # convert to np tpye for CV2.filter ms1 = np.array(ms1, dtype=np.float32) / img_scale self.ms = torch.from_numpy(ms1) lms1 = data["lms"][...] # convert to np tpye for CV2.filter lms1 = np.array(lms1, dtype=np.float32) / img_scale self.lms = torch.from_numpy(lms1) pan1 = data['pan'][...] # Nx1xHxW pan1 = np.array(pan1, dtype=np.float32) / img_scale # Nx1xHxW self.pan = torch.from_numpy(pan1) # Nx1xHxW: if 'valid' in file_path: self.gt = self.gt.permute([0, 2, 3, 1]) print(pan1.shape, lms1.shape, gt1.shape, ms1.shape) #####必要函数 def __getitem__(self, index): return {'gt':self.gt[index, :, :, :].float(), 'lms':self.lms[index, :, :, :].float(), 'ms':self.ms[index, :, :, :].float(), 'pan':self.pan[index, :, :, :].float()} #####必要函数 def __len__(self): return self.gt.shape[0] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/common/dataset_hp.py ================================================ # GPL License # Copyright (C) 2021 , UESTC # All Rights Reserved # @Author : Xiao Wu, LiangJian Deng # @reference: import torch.utils.data as data import torch import h5py import cv2 import numpy as np def get_edge(data): # for training: HxWxC rs = np.zeros_like(data) N = data.shape[0] for i in range(N): if len(data.shape) == 3: rs[i, :, :] = data[i, :, :] - cv2.boxFilter(data[i, :, :], -1, (5, 5)) else: rs[i, :, :, :] = data[i, :, :, :] - cv2.boxFilter(data[i, :, :, :], -1, (5, 5)) return rs class Dataset_Pro(data.Dataset): def __init__(self, file_path, img_scale): super(Dataset_Pro, self).__init__() data = h5py.File(file_path) # NxCxHxW = 0x1x2x3=8806x8x64x64 # tensor type: gt1 = data["gt"][...] # convert to np tpye for CV2.filter gt1 = np.array(gt1, dtype=np.float32) / img_scale self.gt = torch.from_numpy(gt1) # NxCxHxW: lms1 = data["lms"][...] # convert to np tpye for CV2.filter lms1 = np.array(lms1, dtype=np.float32) / img_scale self.lms = torch.from_numpy(lms1) ms1 = data["ms"][...] # NxCxHxW=0,1,2,3 ms1 = np.array(ms1.transpose(0, 2, 3, 1), dtype=np.float32) / img_scale # NxHxWxC ms1_tmp = get_edge(ms1) # NxHxWxC self.ms_hp = torch.from_numpy(ms1_tmp).permute(0, 3, 1, 2) # NxCxHxW: pan1 = data['pan'][...] # Nx1xHxW pan1 = np.array(pan1.transpose(0, 2, 3, 1), dtype=np.float32) / img_scale # NxHxWx1 pan1 = np.squeeze(pan1, axis=3) # NxHxW pan_hp_tmp = get_edge(pan1) # NxHxW pan_hp_tmp = np.expand_dims(pan_hp_tmp, axis=3) # NxHxWx1 self.pan_hp = torch.from_numpy(pan_hp_tmp).permute(0, 3, 1, 2) # Nx1xHxW: print( f"gt: {self.gt.size()}, lms: {self.lms.size()}, pan_hp: {self.pan_hp.size()}, ms_hp: {self.ms_hp.size()} with {img_scale}") #####必要函数 def __getitem__(self, index): return {'gt': self.gt[index, :, :, :].float(), 'lms': self.lms[index, :, :, :].float(), 'ms_hp': self.ms_hp[index, :, :, :].float(), 'pan_hp': self.pan_hp[index, :, :, :].float()} #####必要函数 def __len__(self): return self.gt.shape[0] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/common/evaluate.py ================================================ # GPL License # Copyright (C) 2021 , UESTC # All Rights Reserved # @Author : Xiao Wu, LiangJian Deng # @reference: import math import torch import torch.nn.functional as F import numpy as np def q2n(gt, x, q_blocks_size, q_shift): ''' ''' if isinstance(gt, torch.Tensor): gt = gt.cpu().numpy() x = x.cpu().numpy() N, N1, N2, N3 = gt.shape # 255 255 8 size2 = q_blocks_size # 32 stepx = math.ceil(N1 / q_shift) # 8 stepy = math.ceil(N2 / q_shift) # 8 if stepy <= 0: stepy = 1 stepx = 1 est1 = (stepx - 1) * q_shift + q_blocks_size - N1 # 1 est2 = (stepy - 1) * q_shift + q_blocks_size - N2 # 1 # if np.sum(np.array([est1 != 0, est2 != 0])) > 0: # refref = np.zeros(shape=[N1+1, N2+1]) # fusfus = refref.copy() for i in range(N3): a1 = gt[..., 0] ia1 = np.zeros(shape=[N, N1 + est1, N2 + est2]) ia1[:, : N1, : N2] = a1 ia1[:, :, N2:N2 + est2] = ia1[:, :, N2 - 1:-1:N2 - est2 + 1] ia1[:, N1:N1 + est1, ...] = ia1[:, N1 - 1:-1:N1 - est1 + 1, ...] if i == 0: refref = ia1[..., np.newaxis] # np.concatenate(refref, ia1, axis=3) else: refref = np.concatenate([refref, ia1[..., np.newaxis]], axis=-1) if i < N3: gt = gt[..., 1:] gt = refref for i in range(N3): a2 = x[..., 0] ia2 = np.zeros(shape=[N, N1 + est1, N2 + est2]) ia2[:, : N1, : N2] = a2 ia2[:, :, N2:N2 + est2] = ia2[:, :, N2 - 1:-1:N2 - est2 + 1] ia2[:, N1:N1 + est1, ...] = ia2[:, N1 - 1:-1:N1 - est1 + 1, ...] if i == 0: fusfus = ia2[..., np.newaxis] # np.concatenate(refref, ia1, axis=3) else: fusfus = np.concatenate([fusfus, ia2[..., np.newaxis]], axis=-1) if i < N3: x = x[..., 1:] x = fusfus x = np.array(x, dtype=np.uint16) gt = np.array(gt, dtype=np.uint16) _, N1, N2, N3 = gt.shape if math.ceil(math.log2(N3)) - math.log2(N3) != 0: Ndif = pow(2, math.ceil(math.log2(N3))) - N3 dif = np.zeros(shape=[N, N1, N2, Ndif], dtype=np.uint16) gt = np.concatenate(gt, dif, axis=-1) x = np.concatenate(x, dif, axis=-1) _, _, _, N3 = gt.shape valori = np.zeros(shape=[N, stepx, stepy, N3]) for j in range(stepx): for i in range(stepy): o = onions_quality(gt[:, j * q_shift:j * q_shift + q_blocks_size, i * q_shift: i * q_shift + size2, :], x[:, j * q_shift:j * q_shift + q_blocks_size, i * q_shift: i * q_shift + size2, :], q_blocks_size) valori[:, j, i, :] = o q2n_idx_map = np.sqrt(np.sum(valori ** 2, axis=-1)) # q2n_index = np.mean(q2n_idx_map) return q2n_idx_map def norm_blocco(x, eps=1e-8): a = x.mean() c = x.std() if c == 0: c = eps return (x - a) / c + 1, a, c def onions_quality(dat1, dat2, size1): dat1 = np.float64(dat1) dat2 = np.float64(dat2) dat2 = np.concatenate([dat2[..., 0, np.newaxis], -dat2[..., 1:]], axis=-1) N, _, _, N3 = dat1.shape size2 = size1 for i in range(N3): a1, s, t = norm_blocco(np.squeeze(dat1[..., i])) # print(s,t) dat1[..., i] = a1 if s == 0: if i == 0: dat2[..., i] = dat2[..., i] - s + 1 else: dat2[..., i] = -(-dat2[..., i] - s + 1) else: if i == 0: dat2[..., i] = ((dat2[..., i] - s) / t) + 1 else: dat2[..., i] = -(((-dat2[..., i] - s) / t) + 1) m1 = np.zeros(shape=[N, N3]) m2 = m1.copy() mod_q1m = 0 mod_q2m = 0 mod_q1 = np.zeros(shape=[size1, size2]) mod_q2 = np.zeros(shape=[size1, size2]) for i in range(N3): m1[..., i] = np.mean(np.squeeze(dat1[..., i])) m2[..., i] = np.mean(np.squeeze(dat2[..., i])) mod_q1m += m1[..., i] ** 2 mod_q2m += m2[..., i] ** 2 mod_q1 += np.squeeze(dat1[..., i]) ** 2 mod_q2 += np.squeeze(dat2[..., i]) ** 2 mod_q1m = np.sqrt(mod_q1m) mod_q2m = np.sqrt(mod_q2m) mod_q1 = np.sqrt(mod_q1) mod_q2 = np.sqrt(mod_q2) termine2 = mod_q1m * mod_q2m # 7.97 termine4 = mod_q1m ** 2 + mod_q2m ** 2 # int1 = (size1 * size2) / (size1 * size2 - 1) * np.mean(mod_q1 ** 2) int2 = (size1 * size2) / (size1 * size2 - 1) * np.mean(mod_q2 ** 2) termine3 = int1 + int2 - (size1 * size2) / ((size1 * size2 - 1)) * (mod_q1m ** 2 + mod_q2m ** 2) # 17.8988 ** 2 mean_bias = 2 * termine2 / termine4 # 1 if termine3 == 0: q = np.zeros(shape=[N, 1, N3]) q[:, :, N3 - 1] = mean_bias else: cbm = 2 / termine3 # 32 32 8 qu = onion_mult2D(dat1, dat2) qm = onion_mult(m1.reshape(-1), m2.reshape(-1)) qv = np.zeros(shape=[N, N3]) for i in range(N3): qv[..., i] = (size1 * size2) / ((size1 * size2) - 1) * np.mean(np.squeeze(qu[:, :, i])) q = qv - (size1 * size2) / ((size1 * size2) - 1) * qm q = q * mean_bias * cbm return q def onion_mult2D(onion1, onion2): _, _, _, N3 = onion1.shape if N3 > 1: L = N3 // 2 a = onion1[..., : L] b = onion1[..., L:] b = np.concatenate([b[..., 0, np.newaxis], -b[..., 1:]], axis=-1) c = onion2[..., : L] d = onion2[..., L:] d = np.concatenate([d[..., 0, np.newaxis], -d[..., 1:]], axis=-1) if N3 == 2: ris = np.concatenate([a * c - d * b, a * d + c * b], axis=-1) else: ris1 = onion_mult2D(a, c) ris2 = onion_mult2D(d, np.concatenate([b[..., 0, np.newaxis], -b[..., 1:]], axis=-1)) ris3 = onion_mult2D(np.concatenate([a[..., 0, np.newaxis], -a[..., 1:]], axis=-1), d) ris4 = onion_mult2D(c, b) aux1 = ris1 - ris2 aux2 = ris3 + ris4 ris = np.concatenate([aux1, aux2], axis=-1) else: ris = onion1 * onion2 return ris def onion_mult(onion1, onion2): # _, N = onion1.shape N = len(onion1) if N > 1: L = N // 2 a = onion1[:L] b = onion1[L:] # b[1:] = -b[1:] b = np.append(np.array(b[0]), -b[1:]) c = onion2[:L] d = onion2[L:] # d[1:] = -d[1:] d = np.append(np.array(d[0]), -d[1:]) if N == 2: ris = np.append(a * c - d * b, a * d + c * b) else: ris1 = onion_mult(a, c) # b[1:] = -b[1:] ris2 = onion_mult(d, np.append(np.array(b[0]), -b[1:])) # a[1:] = -a[1:] ris3 = onion_mult(np.append(np.array(a[0]), -a[1:]), d) ris4 = onion_mult(c, b) aux1 = ris1 - ris2 aux2 = ris3 + ris4 ris = np.append(aux1, aux2) else: ris = np.array(onion1).reshape(-1) * np.array(onion2).reshape(-1) return ris def compute_index(img_base, img_out, ratio): h = img_out.shape[0] w = img_out.shape[1] chanel = img_out.shape[2] # 计算SAM sum1 = torch.sum(img_base * img_out, 2) sum2 = torch.sum(img_base * img_base, 2) sum3 = torch.sum(img_out * img_out, 2) t = (sum2 * sum3) ** 0.5 numlocal = torch.gt(t, 0) num = torch.sum(numlocal) t = sum1 / t angle = torch.acos(t) sumangle = torch.where(torch.isnan(angle), torch.full_like(angle, 0), angle).sum() if num == 0: averangle = sumangle else: averangle = sumangle / num SAM = averangle * 180 / 3.14159256 # 计算ERGAS summ = 0 for i in range(chanel): a1 = torch.mean((img_base[:, :, i] - img_out[:, :, i]) ** 2) m1 = torch.mean(img_base[:, :, i]) a2 = m1 * m1 summ = summ + a1 / a2 ERGAS = 100 * (1 / ratio) * ((summ / chanel) ** 0.5) return SAM, ERGAS import decimal decimal.getcontext().rounding = "ROUND_HALF_UP" n_digits = 6 def analysis_accu(img_base, img_out, ratio, flag_cut_bounds=True, dim_cut=21, choices=4): if flag_cut_bounds: img_base = img_base[dim_cut - 1:-dim_cut, dim_cut - 1:-dim_cut, :] #: img_out = img_out[dim_cut - 1:-dim_cut, dim_cut - 1:-dim_cut, :] #: # q2n # q2n_index = q2n(img_base, img_out, q_blocks_size=32, q_shift=32) h = img_out.shape[0] w = img_out.shape[1] chanel = img_out.shape[2] # 计算SAM sum1 = torch.sum(img_base * img_out, 2) sum2 = torch.sum(img_base * img_base, 2) sum3 = torch.sum(img_out * img_out, 2) t = (sum2 * sum3) ** 0.5 numlocal = torch.gt(t, 0) num = torch.sum(numlocal) t = sum1 / t angle = torch.acos(t) sumangle = torch.where(torch.isnan(angle), torch.full_like(angle, 0), angle).sum() if num == 0: averangle = sumangle else: averangle = sumangle / num # you can adopt https://segmentfault.com/a/1190000018929994 to compute, too. # averangle = math.ceil(averangle * 1000000) / 1000000 averangle = (averangle * 10 ** n_digits).round() / (10 ** n_digits) # SAM = decimal.Decimal(averangle.cpu().numpy() * 180 / 3.14159256).quantize(decimal.Decimal("0.00000")) SAM = averangle * 180 / 3.14159256 # 计算ERGAS summ = 0 for i in range(chanel): a1 = torch.mean((img_base[:, :, i] - img_out[:, :, i]) ** 2) m1 = torch.mean(img_base[:, :, i]) a2 = m1 * m1 summ = summ + a1 / a2 ERGAS = 100 * (1 / ratio) * ((summ / chanel) ** 0.5) # 计算PSNR # mse = torch.mean((img_base - img_out) ** 2, 0) # mse = torch.mean(mse, 0) # rmse = mse ** 0.5 # temp = torch.log(1 / rmse) / math.log(10) PSNR = 10 * torch.log10(math.pow(1.0, 2) / torch.mean((img_out-img_base)**2, [0, 1])) # SSIM # img_base = img_base.permute(2, 0, 1) # img_out = img_out.permute(2, 0, 1) # img_base = img_base.unsqueeze(0) # img_out = img_out.unsqueeze(0) # SSIM = _ssim(img_base.permute(2, 0, 1).unsqueeze(0), img_out.permute(2, 0, 1).unsqueeze(0)) # index = torch.zeros((5, chanel + 1)) # index[0, 1:chanel + 1] = CC # index[1, 1:chanel + 1] = PSNR # index[2, 1:chanel + 1] = SSIM # index[0, 0] = torch.mean(CC) # index[1, 0] = torch.mean(PSNR) # index[2, 0] = torch.mean(SSIM) # index[3, 0] = SAM # index[4, 0] = ERGAS PSNR = torch.mean(PSNR) # SSIM = torch.mean(SSIM) # q2n_index = np.mean(q2n_index) if choices == 5: # 计算CC C1 = torch.sum(torch.sum(img_base * img_out, 0), 0) - h * w * ( torch.mean(torch.mean(img_base, 0), 0) * torch.mean(torch.mean(img_out, 0), 0)) C2 = torch.sum(torch.sum(img_out ** 2, 0), 0) - h * w * (torch.mean(torch.mean(img_out, 0), 0) ** 2) C3 = torch.sum(torch.sum(img_base ** 2, 0), 0) - h * w * (torch.mean(torch.mean(img_base, 0), 0) ** 2) CC = C1 / ((C2 * C3) ** 0.5) CC = torch.mean(CC) return {'SAM': SAM, 'ERGAS': ERGAS, 'PSNR': PSNR, 'CC': CC} # , q2n_index return {'SAM': SAM, 'ERGAS': ERGAS, 'PSNR': PSNR, } def _ssim(img1, img2): img1 = img1.float() img2 = img2.float() channel = img1.shape[1] max_val = 1 _, c, w, h = img1.size() window_size = min(w, h, 11) sigma = 1.5 * window_size / 11 window = create_window(window_size, sigma, channel).cuda() mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel) mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel) mu1_sq = mu1.pow(2) mu2_sq = mu2.pow(2) mu1_mu2 = mu1 * mu2 sigma1_sq = F.conv2d(img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq sigma2_sq = F.conv2d(img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq sigma12 = F.conv2d(img1 * img2, window, padding=window_size // 2, groups=channel) - mu1_mu2 C1 = (0.01 * max_val) ** 2 C2 = (0.03 * max_val) ** 2 V1 = 2.0 * sigma12 + C2 V2 = sigma1_sq + sigma2_sq + C2 ssim_map = ((2 * mu1_mu2 + C1) * V1) / ((mu1_sq + mu2_sq + C1) * V2) t = ssim_map.shape return ssim_map.mean(2).mean(2) from torch.autograd import Variable def gaussian(window_size, sigma): gauss = torch.Tensor([math.exp(-(x - window_size // 2) ** 2 / float(2 * sigma ** 2)) for x in range(window_size)]) return gauss / gauss.sum() def create_window(window_size, sigma, channel): _1D_window = gaussian(window_size, sigma).unsqueeze(1) _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0) window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous()) return window def compare_index(A): A_size = A.shape ite_n = A_size[2] band_n = A_size[1] C_better = A[:, 0, 0] ind = 0 for i in range(ite_n): score_b = 0 score_c = 0 C_compare = A[:, 0, i] if (C_better[0] > C_compare[0]): score_b = score_b + 1 else: score_c = score_c + 1 if (C_better[1] > C_compare[1]): score_b = score_b + 1 else: score_c = score_c + 1 if (C_better[2] > C_compare[2]): score_b = score_b + 1 else: score_c = score_c + 1 if (C_better[3] < C_compare[3]): score_b = score_b + 1 else: score_c = score_c + 1 if (C_better[4] < C_compare[4]): score_b = score_b + 1 else: score_c = score_c + 1 if (score_c > score_b): C_better = A[:, 0, i] ind = i C_best = A[:, :, ind] best_ind = ind + 1 return C_best, best_ind if __name__ == "__main__": # a = np.zeros(shape=[256, 256]) # print(a[:255, :255].shape) from scipy import io as sio ms = sio.loadmat('../../tests/I_MS.mat')['I_MS'] / 2047.0 gt = sio.loadmat('../../tests/I_GT.mat')['I_GT'] / 2047.0 ms = torch.from_numpy(ms).float() #* 2047.0 gt = torch.from_numpy(gt).float() #* 2047.0 print(analysis_accu(ms, gt, ratio=4, dim_cut=21)) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/common/psdata.py ================================================ # GPL License # Copyright (C) 2021 , UESTC # All Rights Reserved # @Author : Xiao Wu, LiangJian Deng # @reference: import glob import torch from torch.utils.data import DataLoader class PansharpeningSession(): def __init__(self, args): self.dataloaders = {} self.samples_per_gpu = args.samples_per_gpu self.workers_per_gpu = args.workers_per_gpu # self.patch_size = args.patch_size self.writers = {} self.args = args def get_dataloader(self, dataset_name, distributed): if any(list(map(lambda x: x in dataset_name, ['wv2', 'wv3', 'wv4', 'qb']))): if "hp" in dataset_name: # high-pass filter from UDL.pansharpening.common.dataset_hp import Dataset_Pro dataset_name = dataset_name.split('_')[0] #'wv2_hp' dataset = Dataset_Pro('/'.join([self.args.data_dir, 'training_data', f'train_{dataset_name}_10000.h5']), img_scale=self.args.img_range) else: from UDL.pansharpening.common.dataset import Dataset_Pro dataset = Dataset_Pro('/'.join([self.args.data_dir, 'training_data', f'train_{dataset_name}_10000.h5']), img_scale=self.args.img_range) else: print(f"train_{dataset_name} is not supported.") raise NotImplementedError sampler = None if distributed: sampler = torch.utils.data.distributed.DistributedSampler(dataset) # if not dataset_name in self.dataloaders: dataloaders = \ DataLoader(dataset, batch_size=self.samples_per_gpu, persistent_workers=(True if self.workers_per_gpu > 0 else False), pin_memory=True, shuffle=(sampler is None), num_workers=self.workers_per_gpu, drop_last=True, sampler=sampler) return dataloaders, sampler def get_eval_dataloader(self, dataset_name, distributed): if 'valid' in dataset_name: if "hp" in dataset_name: from UDL.pansharpening.common.dataset_hp import Dataset_Pro dataset = Dataset_Pro( '/'.join([self.args.data_dir, 'validation_data', f'{dataset_name}.h5']), img_scale=self.args.img_range) else: from UDL.pansharpening.common.dataset import Dataset_Pro dataset = Dataset_Pro('/'.join([self.args.data_dir, 'validation_data', f'{dataset_name}.h5']), img_scale=self.args.img_range) elif 'TestData' in dataset_name: if 'hp' in dataset_name: satellite = dataset_name.split('_')[-2] else: satellite = dataset_name.split('_')[-1] from UDL.pansharpening.evaluation.ps_evaluate import MultiExmTest_h5 dataset = MultiExmTest_h5('/'.join([self.args.data_dir, 'test_data', satellite.lower(), f"{dataset_name.replace('_hp', '')}.h5"]), dataset_name, img_scale=self.args.img_range) elif 'RR' in dataset_name or 'FR' in dataset_name: splits = dataset_name.split('_') if 'hp' in dataset_name: satellite = splits[-3] else: satellite = splits[-2] from UDL.pansharpening.evaluation.ps_evaluate import SingleDataset dataset = SingleDataset(['/'.join([self.args.data_dir, 'test_data', satellite.lower(), dataset_name.replace('_hp', '')+".mat"])], dataset_name, img_scale=self.args.img_range) else: print(f"{dataset_name} is not supported.") raise NotImplementedError sampler = None if distributed: sampler = torch.utils.data.distributed.DistributedSampler(dataset) # if not dataset_name in self.dataloaders: dataloaders = \ DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1, drop_last=False, sampler=sampler) return dataloaders, sampler if __name__ == '__main__': # from option import args import argparse parser = argparse.ArgumentParser() args = parser.parse_args() args.samples_per_gpu = 8 args.workers_per_gpu = 0 args.data_dir = "C:/Datasets/pansharpening_2" args.dataset = 'gf2' # survey # wv3 9714 16-64 # wv2 15084 16-64 # gf2 19809 16-64 # qb 17139 16-64 sess = PansharpeningSession(args) train_loader, _ = sess.get_test_dataloader(args.dataset, False) print(len(train_loader)) # import scipy.io as sio # # x = sio.loadmat("D:/Datasets/pansharpening/training_data/train1.mat") # print(x.keys()) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/configs/__init__.py ================================================ # from .models.builder import PANSHARPENING_MODELS, build_model from importlib import import_module import os join = os.path.join dirname = os.path.dirname pkg_list = [import_module('.' + pkg.replace('.py', ''), package="UDL.pansharpening.configs") for pkg in os.listdir(dirname(__file__)) if '.py' in pkg] del pkg_list ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/configs/hook_configs.py ================================================ # checkpoint saving # checkpoint_config = dict(interval=1) checkpoint_config = dict(type='ModelCheckpoint', indicator='loss') # yapf:disable log_config = dict( interval=100, hooks=[ dict(type='TextLoggerHook'), # dict(type='TensorboardLoggerHook') ]) # yapf:enable # dist_params = dict(backend='nccl') log_level = 'INFO' load_from = "D:/ProjectSets/NDA/UDL/UDL/results/pansharpening/wv3/FusionNet/Test/model_2022-04-02-12-02-55/275.pth.tar" resume_from = "D:/ProjectSets/NDA/UDL/UDL/results/pansharpening/wv3/FusionNet/Test/model_2022-04-02-12-02-55/275.pth.tar" workflow = [('train', 1)] # optimizer optimizer = dict(type='Adam', lr=3e-4) optimizer_config = dict(grad_clip=None) lr_config = None # learning policy runner = dict(type='EpochBasedRunner', max_epochs=275) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/configs/option_bdpn.py ================================================ import argparse from UDL.AutoDL import TaskDispatcher import os class parser_args(TaskDispatcher, name='BDPN'): def __init__(self, cfg=None): super(parser_args, self).__init__() if cfg is None: from UDL.Basis.option import panshaprening_cfg cfg = panshaprening_cfg() script_path = os.path.dirname(os.path.dirname(__file__)) root_dir = script_path.split(cfg.task)[0] model_path = f'{root_dir}/results/{cfg.task}/wv3/BDPN/Test/.pth.tar' parser = argparse.ArgumentParser(description='PyTorch Pansharpening Training') # * Logger parser.add_argument('--out_dir', metavar='DIR', default=f'{root_dir}/results/{cfg.task}', help='path to save model') # * Training parser.add_argument('--lr', default=0.0001, type=float) # 1e-4 2e-4 8 parser.add_argument('--lr_scheduler', default=True, type=bool) parser.add_argument('--samples_per_gpu', default=8, type=int, # 8 metavar='N', help='mini-batch size (default: 256)') parser.add_argument('--print-freq', '-p', default=50, type=int, metavar='N', help='print frequency (default: 10)') parser.add_argument('--epochs', default=1000, type=int) parser.add_argument('--workers_per_gpu', default=0, type=int) parser.add_argument('--resume_from', default=model_path, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') # * Model and Dataset parser.add_argument('--arch', '-a', metavar='ARCH', default='BDPN', type=str, choices=['PanNet', 'DiCNN', 'PNN', 'FusionNet']) parser.add_argument('--dataset', default={'train': 'wv3', 'val': 'NY1_WV3_RR'}, type=str, choices=[None, 'wv2', 'wv3', 'wv4', 'qb', 'TestData_qb', 'TestData_wv2', 'TestData_wv3', 'TestData_wv4', 'San_Francisco_QB_RR', 'San_Francisco_QB_FR', 'NY1_WV3_FR', 'NY1_WV3_RR', 'Alice_WV4_FR', 'Alice_WV4_RR', 'Rio_WV2_FR', 'Rio_WV2_RR'], help="training choices: ['wv2', 'wv3', 'wv4', 'qb']," "validation choices: ['valid_wv2_10000','valid_wv3_10000', 'valid_wv4_10000', 'valid_qb_10000']" "test choices is ['TestData_wv2', 'TestData_wv3', 'TestData_wv4', 'TestData_qb'], and others with RR/FR") parser.add_argument('--eval', default=False, type=bool, help="performing evalution for patch2entire") args = parser.parse_args() args.start_epoch = args.best_epoch = 1 args.experimental_desc = "Test" # cfg.save_fmt = 'png' cfg.img_range = 2047.0 cfg.merge_args2cfg(args) print(cfg.pretty_text) # cfg.workflow = [('train', 50), ('val', 1)] # cfg.workflow = [('val', 1)] # only val workflow means perform test. cfg.workflow = [('train', 50)] self.merge_from_dict(cfg) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/configs/option_dicnn.py ================================================ import argparse # from UDL.Basis.option import panshaprening_cfg, Config, os from UDL.AutoDL import TaskDispatcher import os class parser_args(TaskDispatcher, name='DiCNN1'): def __init__(self, cfg=None): super(parser_args, self).__init__() if cfg is None: from UDL.Basis.option import panshaprening_cfg cfg = panshaprening_cfg() script_path = os.path.dirname(os.path.dirname(__file__)) root_dir = script_path.split(cfg.task)[0] # model_path = f'{root_dir}/results/{cfg.task}/gf2/DiCNN1/Test/.pth.tar' # model_path = f'{root_dir}/results/{cfg.task}/qb/DiCNN1/Test/m.pth.tar' model_path = f'{root_dir}/results/{cfg.task}/wv3/DiCNN1/Test/.pth.tar' parser = argparse.ArgumentParser(description='PyTorch Pansharpening Training') # * Logger parser.add_argument('--out_dir', metavar='DIR', default=f'{root_dir}/results/{cfg.task}', help='path to save model') # * Training parser.add_argument('--lr', default=2e-4, type=float) # 1e-4 2e-4 8 parser.add_argument('--lr_scheduler', default=True, type=bool) parser.add_argument('--samples_per_gpu', default=64, type=int, # 8 metavar='N', help='mini-batch size (default: 256)') parser.add_argument('--print-freq', '-p', default=1, type=int, metavar='N', help='print frequency (default: 10)') parser.add_argument('--epochs', default=5000, type=int) parser.add_argument('--workers_per_gpu', default=0, type=int) parser.add_argument('--resume_from', default=model_path, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') # * Model and Dataset parser.add_argument('--arch', '-a', metavar='ARCH', default='DiCNN1', type=str, choices=['PanNet', 'DiCNN', 'PNN', 'FusionNet']) parser.add_argument('--dataset', default={'train': 'wv3', 'val': 'NY1_WV3_RR'}, type=str, choices=[None, 'wv2', 'wv3', 'wv4', 'qb', 'TestData_qb', 'TestData_wv2', 'TestData_wv3', 'TestData_wv4', 'San_Francisco_QB_RR', 'San_Francisco_QB_FR', 'NY1_WV3_FR', 'NY1_WV3_RR', 'Alice_WV4_FR', 'Alice_WV4_RR', 'Rio_WV2_FR', 'Rio_WV2_RR'], help="training choices: ['wv2', 'wv3', 'wv4', 'qb']," "validation choices: ['valid_wv2_10000','valid_wv3_10000', 'valid_wv4_10000', 'valid_qb_10000']" "test choices is ['TestData_wv2', 'TestData_wv3', 'TestData_wv4', 'TestData_qb'], and others with RR/FR") parser.add_argument('--eval', default=False, type=bool, help="performing evalution for patch2entire") args = parser.parse_args() args.start_epoch = args.best_epoch = 1 args.experimental_desc = "Test" # cfg.save_fmt = 'png' cfg.img_range = 2047.0#1023.0 cfg.merge_args2cfg(args) print(cfg.pretty_text) cfg.workflow = [('train', 1)] self.merge_from_dict(cfg) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/configs/option_drpnn.py ================================================ import argparse # from UDL.Basis.option import panshaprening_cfg, Config, os from UDL.AutoDL import TaskDispatcher import os class parser_args(TaskDispatcher, name='DRPNN'): def __init__(self, cfg=None): super(parser_args, self).__init__() if cfg is None: from UDL.Basis.option import panshaprening_cfg cfg = panshaprening_cfg() script_path = os.path.dirname(os.path.dirname(__file__)) root_dir = script_path.split(cfg.task)[0] model_path = f'{root_dir}/results/{cfg.task}/wv3/DRPNN/Test/.pth.tar' parser = argparse.ArgumentParser(description='PyTorch Pansharpening Training') # * Logger parser.add_argument('--out_dir', metavar='DIR', default=f'{root_dir}/results/{cfg.task}', help='path to save model') # * Training parser.add_argument('--lr', default=2e-4, type=float) # 1e-4 2e-4 8 parser.add_argument('--lr_scheduler', default=True, type=bool) parser.add_argument('--samples_per_gpu', default=32, type=int, # 8 metavar='N', help='mini-batch size (default: 256)') parser.add_argument('--print-freq', '-p', default=50, type=int, metavar='N', help='print frequency (default: 10)') parser.add_argument('--epochs', default=500, type=int) parser.add_argument('--workers_per_gpu', default=0, type=int) parser.add_argument('--resume_from', default=model_path, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') # * Model and Dataset parser.add_argument('--arch', '-a', metavar='ARCH', default='DRPNN', type=str, choices=['PanNet', 'DiCNN', 'PNN', 'FusionNet']) parser.add_argument('--dataset', default={'train': 'wv3', 'val': 'NY1_WV3_RR'}, type=str, choices=[None, 'wv2', 'wv3', 'wv4', 'qb', 'TestData_qb', 'TestData_wv2', 'TestData_wv3', 'TestData_wv4', 'San_Francisco_QB_RR', 'San_Francisco_QB_FR', 'NY1_WV3_FR', 'NY1_WV3_RR', 'Alice_WV4_FR', 'Alice_WV4_RR', 'Rio_WV2_FR', 'Rio_WV2_RR'], help="training choices: ['wv2', 'wv3', 'wv4', 'qb']," "validation choices: ['valid_wv2_10000','valid_wv3_10000', 'valid_wv4_10000', 'valid_qb_10000']" "test choices is ['TestData_wv2', 'TestData_wv3', 'TestData_wv4', 'TestData_qb'], and others with RR/FR") parser.add_argument('--eval', default=False, type=bool, help="performing evalution for patch2entire") args = parser.parse_args() args.start_epoch = args.best_epoch = 1 args.experimental_desc = "Test" # cfg.save_fmt = 'png' cfg.img_range = 2047.0 cfg.seed = 1 cfg.merge_args2cfg(args) print(cfg.pretty_text) # cfg.workflow = [('train', 50), ('val', 1)] cfg.workflow = [('val', 1)] self.merge_from_dict(cfg) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/configs/option_fusionnet.py ================================================ import argparse # from UDL.Basis.option import panshaprening_cfg, Config, os from UDL.AutoDL import TaskDispatcher import os class parser_args(TaskDispatcher, name='FusionNet'): def __init__(self, cfg=None): super(parser_args, self).__init__() if cfg is None: from UDL.Basis.option import panshaprening_cfg cfg = panshaprening_cfg() script_path = os.path.dirname(os.path.dirname(__file__)) root_dir = script_path.split(cfg.task)[0] model_path = f'./.pth.tar' parser = argparse.ArgumentParser(description='PyTorch Pansharpening Training') # * Logger parser.add_argument('--out_dir', metavar='DIR', default=f'{root_dir}/results/{cfg.task}', help='path to save model') parser.add_argument('--mode', default=argparse.SUPPRESS, help='protective declare, please ignore it') parser.add_argument('--lr', default=3e-4, type=float) # parser.add_argument('--lr_scheduler', default=True, type=bool) parser.add_argument('--samples_per_gpu', default=32, type=int, metavar='N', help='mini-batch size (default: 256)') parser.add_argument('--print-freq', '-p', default=50, type=int, metavar='N', help='print frequency (default: 10)') parser.add_argument('--seed', default=1, type=int, help='seed for initializing training. ') parser.add_argument('--epochs', default=400, type=int) parser.add_argument('--workers_per_gpu', default=0, type=int) parser.add_argument('--resume_from', default=model_path, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') ## parser.add_argument('--arch', '-a', metavar='ARCH', default='FusionNet', type=str, choices=['PanNet', 'DiCNN', 'PNN', 'FusionNet']) parser.add_argument('--dataset', default={'train': 'wv3', 'val': 'NY1_WV3_RR'}, type=str, choices=[None, 'wv2', 'wv3', 'wv4', 'qb', 'TestData_qb', 'TestData_wv2', 'TestData_wv3', 'TestData_wv4', 'San_Francisco_QB_RR', 'San_Francisco_QB_FR', 'NY1_WV3_FR', 'NY1_WV3_RR', 'Alice_WV4_FR', 'Alice_WV4_RR', 'Rio_WV2_FR', 'Rio_WV2_RR'], help="training choices: ['wv2', 'wv3', 'wv4', 'qb']," "validation choices: ['valid_wv2_10000','valid_wv3_10000', 'valid_wv4_10000', 'valid_qb_10000']" "test choices is ['TestData_wv2', 'TestData_wv3', 'TestData_wv4', 'TestData_qb'], and others with RR/FR") parser.add_argument('--eval', default=False, type=bool, help="performing evalution for patch2entire") args = parser.parse_args() args.start_epoch = args.best_epoch = 1 args.experimental_desc = 'Test' cfg.merge_args2cfg(args) cfg.save_fmt = "mat" # cfg.workflow = [('train', 10), ('val', 1)] cfg.workflow = [('val', 1), ('train', 1)] # cfg.config = f"{script_path}/configs/hook_configs.py" cfg.use_tfb = False cfg.img_range = 2047.0#1023.0 self.merge_from_dict(cfg) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/configs/option_msdcnn.py ================================================ import argparse # from UDL.Basis.option import panshaprening_cfg, Config, os from UDL.AutoDL import TaskDispatcher import os class parser_args(TaskDispatcher, name='MSDCNN'): def __init__(self, cfg=None): super(parser_args, self).__init__() if cfg is None: from UDL.Basis.option import panshaprening_cfg cfg = panshaprening_cfg() script_path = os.path.dirname(os.path.dirname(__file__)) root_dir = script_path.split(cfg.task)[0] model_path = f'{root_dir}/results/{cfg.task}/wv3/MSDCNN/Test/.pth.tar' parser = argparse.ArgumentParser(description='PyTorch Pansharpening Training') # * Logger parser.add_argument('--out_dir', metavar='DIR', default=f'{root_dir}/results/{cfg.task}', help='path to save model') # * Training parser.add_argument('--lr', default=0.000001, type=float) # 1e-4 2e-4 8 parser.add_argument('--lr_scheduler', default=True, type=bool) parser.add_argument('--samples_per_gpu', default=64, type=int, # 8 metavar='N', help='mini-batch size (default: 256)') parser.add_argument('--print-freq', '-p', default=50, type=int, metavar='N', help='print frequency (default: 10)') parser.add_argument('--epochs', default=500, type=int) parser.add_argument('--workers_per_gpu', default=0, type=int) parser.add_argument('--resume_from', default=model_path, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') # * Model and Dataset parser.add_argument('--arch', '-a', metavar='ARCH', default='MSDCNN', type=str, choices=['PanNet', 'DiCNN', 'PNN', 'FusionNet']) parser.add_argument('--dataset', default={'train': 'wv3', 'val': 'NY1_WV3_RR'}, type=str, choices=[None, 'wv2', 'wv3', 'wv4', 'qb', 'TestData_qb', 'TestData_wv2', 'TestData_wv3', 'TestData_wv4', 'San_Francisco_QB_RR', 'San_Francisco_QB_FR', 'NY1_WV3_FR', 'NY1_WV3_RR', 'Alice_WV4_FR', 'Alice_WV4_RR', 'Rio_WV2_FR', 'Rio_WV2_RR'], help="training choices: ['wv2', 'wv3', 'wv4', 'qb']," "validation choices: ['valid_wv2_10000','valid_wv3_10000', 'valid_wv4_10000', 'valid_qb_10000']" "test choices is ['TestData_wv2', 'TestData_wv3', 'TestData_wv4', 'TestData_qb'], and others with RR/FR") parser.add_argument('--eval', default=False, type=bool, help="performing evalution for patch2entire") args = parser.parse_args() args.start_epoch = args.best_epoch = 1 args.experimental_desc = "Test" # cfg.save_fmt = 'png' cfg.img_range = 2047.0 cfg.merge_args2cfg(args) print(cfg.pretty_text) # cfg.workflow = [('train', 50), ('val', 1)] # cfg.workflow = [('val', 1)] cfg.workflow = [('train', 50)] self.merge_from_dict(cfg) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/configs/option_pannet.py ================================================ import argparse # from UDL.Basis.option import panshaprening_cfg, Config, os from UDL.AutoDL import TaskDispatcher import os class parser_args(TaskDispatcher, name='PanNet'): def __init__(self, cfg=None): super(parser_args, self).__init__() if cfg is None: from UDL.Basis.option import panshaprening_cfg cfg = panshaprening_cfg() script_path = os.path.dirname(os.path.dirname(__file__)) root_dir = script_path.split(cfg.task)[0] # model_path = f'{root_dir}/results/{cfg.task}/qb_hp/PanNet/Test/.pth.tar' model_path = f'{root_dir}/results/{cfg.task}/wv3_hp/PanNet/Test/.pth.tar' # model_path = f'' parser = argparse.ArgumentParser(description='PyTorch Pansharpening Training') # * Logger parser.add_argument('--out_dir', metavar='DIR', default=f'{root_dir}/results/{cfg.task}', help='path to save model') # * Training parser.add_argument('--lr', default=1e-3, type=float) # 1e-4 2e-4 8 parser.add_argument('--lr_scheduler', default=True, type=bool) parser.add_argument('--samples_per_gpu', default=32, type=int, # 8 metavar='N', help='mini-batch size (default: 256)') parser.add_argument('--print-freq', '-p', default=50, type=int, metavar='N', help='print frequency (default: 10)') parser.add_argument('--seed', default=1, type=int, help='seed for initializing training. ') parser.add_argument('--epochs', default=450, type=int) parser.add_argument('--workers_per_gpu', default=0, type=int) parser.add_argument('--resume_from', default=model_path, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') # * Model and Dataset parser.add_argument('--arch', '-a', metavar='ARCH', default='PanNet', type=str, choices=['PanNet', 'DiCNN', 'PNN', 'FusionNet']) parser.add_argument('--dataset', default={'train': 'wv3_hp', 'val': 'NY1_WV3_RR_hp'}, type=str, choices=[None, 'wv2_hp', 'wv3_hp', 'wv4_hp', 'qb_hp', 'TestData_qb_hp', 'TestData_wv2_hp', 'TestData_wv3_hp', 'TestData_wv4_hp', 'San_Francisco_QB_RR_hp', 'San_Francisco_QB_FR_hp', 'NY1_WV3_FR_hp', 'NY1_WV3_RR_hp', 'Alice_WV4_FR', 'Alice_WV4_RR_hp', 'Rio_WV2_FR_hp', 'Rio_WV2_RR_hp'], help="training choices: ['wv2', 'wv3', 'wv4', 'qb']," "validation choices: ['valid_wv2_10000','valid_wv3_10000', 'valid_wv4_10000', 'valid_qb_10000']" "test choices is ['TestData_wv2', 'TestData_wv3', 'TestData_wv4', 'TestData_qb'], and others with RR/FR") parser.add_argument('--eval', default=False, type=bool, help="performing evalution for patch2entire") args = parser.parse_args() args.start_epoch = args.best_epoch = 1 args.experimental_desc = "Test" cfg.merge_args2cfg(args) cfg.img_range = 2047.0 cfg.reg = True cfg.workflow = [('train', 1)] print(cfg.pretty_text) cfg.workflow = [('train', 1)] self.merge_from_dict(cfg) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/configs/option_pnn.py ================================================ import argparse # from UDL.Basis.option import panshaprening_cfg, Config, os from UDL.AutoDL import TaskDispatcher import os class parser_args(TaskDispatcher, name='PNN'): def __init__(self, cfg=None): super(parser_args, self).__init__() if cfg is None: from UDL.Basis.option import panshaprening_cfg cfg = panshaprening_cfg() script_path = os.path.dirname(os.path.dirname(__file__)) root_dir = script_path.split(cfg.task)[0].replace('\\', '/') model_path = f'.pth.tar' parser = argparse.ArgumentParser(description='PyTorch Pansharpening Training') # * Logger parser.add_argument('--out_dir', metavar='DIR', default=f'{root_dir}/results/{cfg.task}', help='path to save model') # * Training parser.add_argument('--lr', default=1e-3, type=float) # 1e-4 2e-4 8 parser.add_argument('--lr_scheduler', default=True, type=bool) parser.add_argument('--samples_per_gpu', default=64, type=int, # 8 metavar='N', help='mini-batch size (default: 256)') parser.add_argument('--print-freq', '-p', default=500, type=int, metavar='N', help='print frequency (default: 10)') parser.add_argument('--seed', default=1, type=int, help='seed for initializing training. ') parser.add_argument('--epochs', default=12000, type=int) parser.add_argument('--workers_per_gpu', default=0, type=int) parser.add_argument('--resume_from', default=model_path, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') # * Model and Dataset parser.add_argument('--arch', '-a', metavar='ARCH', default='PNN', type=str, choices=['PanNet', 'DiCNN', 'PNN', 'FusionNet']) parser.add_argument('--dataset', default={'train': 'wv3', 'val': 'NY1_WV3_RR'}, type=str, choices=[None, 'wv2', 'wv3', 'wv4', 'qb', 'TestData_qb', 'TestData_wv2', 'TestData_wv3', 'TestData_wv4', 'San_Francisco_QB_RR', 'San_Francisco_QB_FR', 'NY1_WV3_FR', 'NY1_WV3_RR', 'Alice_WV4_FR', 'Alice_WV4_RR', 'Rio_WV2_FR', 'Rio_WV2_RR'], help="training choices: ['wv2', 'wv3', 'wv4', 'qb']," "validation choices: ['valid_wv2_10000','valid_wv3_10000', 'valid_wv4_10000', 'valid_qb_10000']" "test choices is ['TestData_wv2', 'TestData_wv3', 'TestData_wv4', 'TestData_qb'], and others with RR/FR") parser.add_argument('--eval', default=False, type=bool, help="performing evalution for patch2entire") args = parser.parse_args() args.start_epoch = args.best_epoch = 1 args.experimental_desc = 'Test' cfg.merge_args2cfg(args) cfg.workflow = [('train', 1)] cfg.img_range = 2047.0 print(cfg.pretty_text) self.merge_from_dict(cfg) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/evaluation/ps_evaluate.py ================================================ import os import datetime import imageio import numpy as np import cv2 import h5py import torch import torch.nn.functional as F from scipy import io as sio from torch.utils.data import DataLoader, Dataset from UDL.Basis.auxiliary import MetricLogger, SmoothedValue, set_random_seed from UDL.Basis.dist_utils import init_dist, dist_train_v1, get_dist_info, reduce_mean from UDL.pansharpening.common.evaluate import analysis_accu from UDL.Basis.postprocess import showimage8 import matplotlib.pyplot as plt # from UDL.Basis.zoom_image_region import show_region_images from logging import info as log_string # dmd def load_gt_compared(file_path_gt, file_path_compared): data1 = sio.loadmat(file_path_gt) # HxWxC data2 = sio.loadmat(file_path_compared) try: gt = torch.from_numpy(data1['gt'] / 2047.0) except KeyError: print(data1.keys()) compared_data = torch.from_numpy(data2['output_dmdnet_newdata6'] * 2047.0) return gt, compared_data def get_edge(data): # get high-frequency rs = np.zeros_like(data) if rs.ndim == 4: for b in range(data.shape[0]): for i in range(data.shape[1]): rs[b, i, :, :] = data[b, i, :, :] - cv2.boxFilter(data[b, i, :, :], -1, (5, 5)) elif len(rs.shape) == 3: for i in range(data.shape[2]): rs[:, :, i] = data[:, :, i] - cv2.boxFilter(data[:, :, i], -1, (5, 5)) else: rs = data - cv2.boxFilter(data, -1, (5, 5)) return rs def load_dataset_singlemat_hp(file_path, scale): data = sio.loadmat(file_path) # HxWxC # tensor type: lms = torch.from_numpy(data['I_MS'] / scale).permute(2, 0, 1) # CxHxW = 8x256x256 ms_hp = torch.from_numpy(get_edge(data['I_MS_LR'] / scale)).permute(2, 0, 1).unsqueeze(dim=0) # CxHxW= 8x64x64 pan_hp = torch.from_numpy(get_edge(data['I_PAN'] / scale)) # HxW = 256x256 gt = torch.from_numpy(data['I_GT'] / scale) return lms.squeeze().float(), ms_hp.squeeze().float(), pan_hp.float(), gt.float() def load_dataset_singlemat(file_path, scale): data = sio.loadmat(file_path) # HxWxC print("load_dataset_singlemat: ", data.keys()) # tensor type: lms = torch.from_numpy(data['I_MS'] / scale).permute(2, 0, 1) # CxHxW = 8x256x256 ms = torch.from_numpy(data['I_MS_LR'] / scale).permute(2, 0, 1).unsqueeze(dim=0) # CxHxW= 8x64x64 pan = torch.from_numpy(data['I_PAN'] / scale) # HxW = 256x256 if data.get('I_GT', None) is None: gt = torch.from_numpy(data['I_MS'] / scale) else: gt = torch.from_numpy(data['I_GT'] / scale) return lms.squeeze().float(), ms.squeeze().float(), pan.float(), gt.float() def load_dataset_H5_hp(file_path, scale, use_cuda=True): data = h5py.File(file_path) # NxHxWxC shape_list = [] # for k in data.keys(): # shape_list.append((k, data[k].shape)) # print(shape_list) # tensor type: NxCxHxW: lms = torch.from_numpy(data['lms'][...] / scale).float()#.permute(0, 3, 1, 2) ms_hp = torch.from_numpy(get_edge(data['ms'][...] / scale)).float()#.permute(0, 3, 1, 2) # NxCxHxW: mms_hp = torch.nn.functional.interpolate(ms_hp, size=(ms_hp.size(2) * 2, ms_hp.size(3) * 2), mode="bilinear", align_corners=True) pan = np.squeeze(data['pan'][...]) pan = pan[:, np.newaxis, :, :] # NxCxHxW (C=1) pan_hp = torch.from_numpy(get_edge(pan / scale)).float()#.permute(0, 3, 1, 2) # Nx1xHxW: if data.get('gt', None) is None: gt = torch.from_numpy(data['lms'][...]).float() else: gt = torch.from_numpy(data['gt'][...]).float() return {'lms': lms, 'mms:': mms_hp, 'ms': ms_hp, 'pan': pan_hp, 'gt': gt.permute([0, 2, 3, 1]) } def load_dataset_H5(file_path, scale, use_cuda=True): data = h5py.File(file_path) # CxHxW print(data.keys()) # tensor type: if use_cuda: lms = torch.from_numpy(data['lms'][...] / scale).cuda().float() # CxHxW = 8x64x64 ms = torch.from_numpy(data['ms'][...] / scale).cuda().float() # CxHxW= 8x64x64 pan = torch.from_numpy(data['pan'][...] / scale).cuda().float() # HxW = 256x256 gt = torch.from_numpy(data['gt'][...]).cuda().float() else: lms = torch.from_numpy(data['lms'][...] / scale).float() # CxHxW = 8x64x64 ms = torch.from_numpy(data['ms'][...] / scale).float() # CxHxW= 8x64x64 pan = torch.from_numpy(data['pan'][...] / scale).float() # HxW = 256x256 if data.get('gt', None) is None: gt = torch.from_numpy(data['lms'][...]).float() else: gt = torch.from_numpy(data['gt'][...]).float() return {'lms': lms, 'ms': ms, 'pan': pan, 'gt': gt.permute([0, 2, 3, 1]) } class MultiExmTest_h5(Dataset): def __init__(self, file_path, dataset_name, img_scale, suffix='.h5'): super(MultiExmTest_h5, self).__init__() # self.scale = 2047.0 # if 'gf' in dataset_name: # self.scale = 1023.0 self.img_scale = img_scale print(f"loading MultiExmTest_h5: {file_path} with {img_scale}") # 一次性载入到内存 if 'hp' not in dataset_name: data = load_dataset_H5(file_path, img_scale, False) elif 'hp' in dataset_name: file_path = file_path.replace('_hp', '') data = load_dataset_H5_hp(file_path, img_scale, False) else: print(f"{dataset_name} is not supported in evaluation") raise NotImplementedError if suffix == '.mat': self.lms = data['lms'].permute(0, 3, 1, 2) # CxHxW = 8x256x256 self.ms = data['ms'].permute(0, 3, 1, 2) # CxHxW= 8x64x64 self.pan = data['pan'].unsqueeze(1) self.gt = data['gt'].permute(0, 3, 1, 2) else: self.lms = data['lms'] self.ms = data['ms'] self.pan = data['pan'] self.gt = data['gt'] print(f"lms: {self.lms.shape}, ms: {self.ms.shape}, pan: {self.pan.shape}, gt: {self.gt.shape}") def __getitem__(self, item): return {'lms': self.lms[item, ...], 'ms': self.ms[item, ...], 'pan': self.pan[item, ...], 'gt': self.gt[item, ...] } def __len__(self): return self.gt.shape[0] class SingleDataset(Dataset): def __init__(self, file_lists, dataset_name, img_scale, dataset=None): self.img_scale = img_scale self.file_lists = file_lists print(f"loading SingleDataset: {file_lists} with {img_scale}") self.file_nums = len(file_lists) self.dataset = {} self.dataset_name = dataset_name if 'hp' not in dataset_name: self.dataset = load_dataset_singlemat elif 'hp' in dataset_name: self.dataset = load_dataset_singlemat_hp else: print(f"{dataset_name} is not supported in evaluation") raise NotImplementedError def __getitem__(self, idx): file_path = self.file_lists[idx % self.file_nums] test_lms, test_ms, test_pan, gt = self.dataset(file_path, self.img_scale) if 'hp' not in self.dataset_name: return {'gt': gt, 'lms': test_lms, 'ms': test_ms, 'pan': test_pan.unsqueeze(dim=0), 'filename': file_path} else: return {'gt': gt, 'lms': test_lms, 'ms': test_ms, 'pan': test_pan.unsqueeze(dim=0), 'filename': file_path} def __len__(self): return self.file_nums def save_results(idx, save_model_output, filename, save_fmt, output): if filename is None: save_name = os.path.join(f"{save_model_output}", "output_mulExm_{}.mat".format(idx)) sio.savemat(save_name, {'sr': output.cpu().detach().numpy()}) else: filename = os.path.basename(filename).split('.')[0] if save_fmt != 'mat': output = showimage8(output) filename = '/'.join([save_model_output, filename + ".png"]) # plt.imsave(filename, output, dpi=300) # show_region_images(output, xywh=[50, 100, 50, 50], #sub_width="20%", sub_height="20%", # sub_ax_anchor=(0, 0, 1, 1)) # mpl_save_fig(filename) else: filename = '/'.join([save_model_output, "output_" + filename + ".mat"]) sio.savemat(filename, {'sr': output.cpu().detach().numpy()}) def mpl_save_fig(filename): plt.savefig(f"{filename}", format='svg', dpi=300, pad_inches=0, bbox_inches='tight') ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/data_qb.py ================================================ import torch.utils.data as data import torch import h5py import numpy as np class Dataset_Ft(data.Dataset): def __init__(self, file_path): super(Dataset_Ft, self).__init__() class Dataset_Pro(data.Dataset): def __init__(self, file_path): super(Dataset_Pro, self).__init__() data = h5py.File(file_path) # NxCxHxW = 0x1x2x3 # tensor type: gt1 = data["gt"][...] gt1 = np.array(gt1, dtype=np.float32) / 2047 self.gt = torch.from_numpy(gt1) # NxCxHxW: print(self.gt.size()) lms1 = data["lms"][...] lms1 = np.array(lms1, dtype=np.float32) / 2047 self.lms = torch.from_numpy(lms1) ms1 = data["ms"][...] # NxCxHxW ms1 = np.array(ms1.transpose(0, 2, 3, 1), dtype=np.float32) / 2047 # NxHxWxC self.ms = torch.from_numpy(ms1).permute(0, 3, 1, 2) # NxCxHxW: pan1 = data['pan'][...] # Nx1xHxW pan1 = np.array(pan1.transpose(0, 2, 3, 1), dtype=np.float32) / 2047 # NxHxWx1 pan1 = np.squeeze(pan1, axis=3) # NxHxW pan_tmp = np.expand_dims(pan1, axis=3) # NxHxWx1 self.pan = torch.from_numpy(pan_tmp).permute(0, 3, 1, 2) # Nx1xHxW: #####必要函数 def __getitem__(self, index): return self.gt[index, :, :, :].float(), \ self.lms[index, :, :, :].float(), \ self.ms[index, :, :, :].float(), \ self.pan[index, :, :, :].float() #####必要函数 def __len__(self): return self.gt.shape[0] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/data_single_read.py ================================================ import torch.nn.modules as nn import torch import cv2 import numpy as np import h5py import scipy.io as sio import os def load_set(file_path, blk): data = sio.loadmat(file_path) # HxWxC # tensor type: lms = np.array(data['lms'] / 2047.0, dtype=np.float32) pan_hp = np.expand_dims(np.array(data['pan'] / 2047.0,dtype=np.float32), axis=-1) lms = np.concatenate([lms, pan_hp], axis=-1) lms = np.pad(lms, ((blk, blk), (blk, blk), (0, 0)), mode='edge') lms = torch.from_numpy(lms).cuda().permute(2, 0, 1) # CxHxW = 8x256x256 pan_hp = torch.from_numpy(pan_hp).cuda().permute(2, 0, 1) # HxW = 256x256 ms_hp = torch.from_numpy(data['ms'] / 2047.0).cuda().permute(2, 0, 1) # CxHxW= 8x64x64 gt = torch.from_numpy(data['gt'] / 2047.0).cuda() return lms, ms_hp, pan_hp, gt ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/data_wv2.py ================================================ import torch.utils.data as data import torch import h5py import numpy as np class Dataset_Ft(data.Dataset): def __init__(self, file_path): super(Dataset_Ft, self).__init__() class Dataset_Pro(data.Dataset): def __init__(self, file_path): super(Dataset_Pro, self).__init__() data = h5py.File(file_path) # NxCxHxW = 0x1x2x3 # tensor type: gt1 = data["gt"][...] gt1 = np.array(gt1, dtype=np.float32) / 2047 self.gt = torch.from_numpy(gt1) # NxCxHxW: print(self.gt.size()) lms1 = data["lms"][...] lms1 = np.array(lms1, dtype=np.float32) / 2047 self.lms = torch.from_numpy(lms1) ms1 = data["ms"][...] # NxCxHxW ms1 = np.array(ms1.transpose(0, 2, 3, 1), dtype=np.float32) / 2047 # NxHxWxC self.ms = torch.from_numpy(ms1).permute(0, 3, 1, 2) # NxCxHxW: pan1 = data['pan'][...] # Nx1xHxW pan1 = np.array(pan1.transpose(0, 2, 3, 1), dtype=np.float32) / 2047 # NxHxWx1 pan1 = np.squeeze(pan1, axis=3) # NxHxW pan_tmp = np.expand_dims(pan1, axis=3) # NxHxWx1 self.pan = torch.from_numpy(pan_tmp).permute(0, 3, 1, 2) # Nx1xHxW: #####必要函数 def __getitem__(self, index): return self.gt[index, :, :, :].float(), \ self.lms[index, :, :, :].float(), \ self.ms[index, :, :, :].float(), \ self.pan[index, :, :, :].float() #####必要函数 def __len__(self): return self.gt.shape[0] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/data_wv3.py ================================================ import torch.utils.data as data import torch import h5py import numpy as np class Dataset_Ft(data.Dataset): def __init__(self, file_path): super(Dataset_Ft, self).__init__() class Dataset_Pro(data.Dataset): def __init__(self, file_path): super(Dataset_Pro, self).__init__() data = h5py.File(file_path) # NxCxHxW = 0x1x2x3 # tensor type: gt1 = data["gt"][...] gt1 = np.array(gt1, dtype=np.float32) / 2047 self.gt = torch.from_numpy(gt1) # NxCxHxW: print(self.gt.size()) lms1 = data["lms"][...] lms1 = np.array(lms1, dtype=np.float32) / 2047 self.lms = torch.from_numpy(lms1) ms1 = data["ms"][...] # NxCxHxW ms1 = np.array(ms1.transpose(0, 2, 3, 1), dtype=np.float32) / 2047 # NxHxWxC self.ms = torch.from_numpy(ms1).permute(0, 3, 1, 2) # NxCxHxW: pan1 = data['pan'][...] # Nx1xHxW pan1 = np.array(pan1.transpose(0, 2, 3, 1), dtype=np.float32) / 2047 # NxHxWx1 pan1 = np.squeeze(pan1, axis=3) # NxHxW pan_tmp = np.expand_dims(pan1, axis=3) # NxHxWx1 self.pan = torch.from_numpy(pan_tmp).permute(0, 3, 1, 2) # Nx1xHxW: #####必要函数 def __getitem__(self, index): return self.gt[index, :, :, :].float(), \ self.lms[index, :, :, :].float(), \ self.ms[index, :, :, :].float(), \ self.pan[index, :, :, :].float() #####必要函数 def __len__(self): return self.gt.shape[0] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/data_wv4.py ================================================ import torch.utils.data as data import torch import h5py import numpy as np class Dataset_Ft(data.Dataset): def __init__(self, file_path): super(Dataset_Ft, self).__init__() class Dataset_Pro(data.Dataset): def __init__(self, file_path): super(Dataset_Pro, self).__init__() data = h5py.File(file_path) # NxCxHxW = 0x1x2x3 # tensor type: gt1 = data["gt"][...] gt1 = np.array(gt1, dtype=np.float32) / 2047 self.gt = torch.from_numpy(gt1) # NxCxHxW: print(self.gt.size()) lms1 = data["lms"][...] lms1 = np.array(lms1, dtype=np.float32) / 2047 self.lms = torch.from_numpy(lms1) ms1 = data["ms"][...] # NxCxHxW ms1 = np.array(ms1.transpose(0, 2, 3, 1), dtype=np.float32) / 2047 # NxHxWxC self.ms = torch.from_numpy(ms1).permute(0, 3, 1, 2) # NxCxHxW: pan1 = data['pan'][...] # Nx1xHxW pan1 = np.array(pan1.transpose(0, 2, 3, 1), dtype=np.float32) / 2047 # NxHxWx1 pan1 = np.squeeze(pan1, axis=3) # NxHxW pan_tmp = np.expand_dims(pan1, axis=3) # NxHxWx1 self.pan = torch.from_numpy(pan_tmp).permute(0, 3, 1, 2) # Nx1xHxW: #####必要函数 def __getitem__(self, index): return self.gt[index, :, :, :].float(), \ self.lms[index, :, :, :].float(), \ self.ms[index, :, :, :].float(), \ self.pan[index, :, :, :].float() #####必要函数 def __len__(self): return self.gt.shape[0] ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/evaluate.py ================================================ import math import torch import torch.nn.functional as F import numpy as np # 由于dat及其方差等数值舍入存在误差,最终结果有0.001左右的误差 def q2n(gt, x, q_blocks_size, q_shift): ''' ''' if isinstance(gt , torch.Tensor): gt = gt.cpu().numpy() x = x.cpu().numpy() N, N1, N2, N3 = gt.shape # 255 255 8 size2 = q_blocks_size # 32 stepx = math.ceil(N1 / q_shift) # 8 stepy = math.ceil(N2 / q_shift) # 8 if stepy <= 0: stepy = 1 stepx = 1 est1 = (stepx - 1) * q_shift + q_blocks_size - N1 # 1 est2 = (stepy - 1) * q_shift + q_blocks_size - N2 # 1 # if np.sum(np.array([est1 != 0, est2 != 0])) > 0: # refref = np.zeros(shape=[N1+1, N2+1]) # fusfus = refref.copy() for i in range(N3): a1 = gt[..., 0] ia1 = np.zeros(shape=[N, N1 + est1, N2 + est2]) ia1[:, : N1, : N2] = a1 ia1[:, :, N2:N2 + est2] = ia1[:, :, N2 - 1:-1:N2 - est2 + 1] ia1[:, N1:N1 + est1, ...] = ia1[:, N1 - 1:-1:N1 - est1 + 1, ...] if i == 0: refref = ia1[..., np.newaxis] # np.concatenate(refref, ia1, axis=3) else: refref = np.concatenate([refref, ia1[..., np.newaxis]], axis=-1) if i < N3: gt = gt[..., 1:] gt = refref for i in range(N3): a2 = x[..., 0] ia2 = np.zeros(shape=[N, N1 + est1, N2 + est2]) ia2[:, : N1, : N2] = a2 ia2[:, :, N2:N2 + est2] = ia2[:, :, N2 - 1:-1:N2 - est2 + 1] ia2[:, N1:N1 + est1, ...] = ia2[:, N1 - 1:-1:N1 - est1 + 1, ...] if i == 0: fusfus = ia2[..., np.newaxis] # np.concatenate(refref, ia1, axis=3) else: fusfus = np.concatenate([fusfus, ia2[..., np.newaxis]], axis=-1) if i < N3: x = x[..., 1:] x = fusfus x = np.array(x, dtype=np.uint16) gt = np.array(gt, dtype=np.uint16) _, N1, N2, N3 = gt.shape if math.ceil(math.log2(N3)) - math.log2(N3) != 0: Ndif = pow(2, math.ceil(math.log2(N3))) - N3 dif = np.zeros(shape=[N, N1, N2, Ndif], dtype=np.uint16) gt = np.concatenate(gt, dif, axis=-1) x = np.concatenate(x, dif, axis=-1) _, _, _, N3 = gt.shape valori = np.zeros(shape=[N, stepx, stepy, N3]) for j in range(stepx): for i in range(stepy): o = onions_quality(gt[:, j * q_shift:j * q_shift + q_blocks_size, i * q_shift: i * q_shift + size2, :], x[:, j * q_shift:j * q_shift + q_blocks_size, i * q_shift: i * q_shift + size2, :], q_blocks_size) # 0.971379489438014 0.00553590637316723 0.00305237797490489 -0.0188289323262161 -0.00420556598390016 -0.0173947468044076 -0.0202144450367593 0.0102693855205061 valori[:, j, i, :] = o q2n_idx_map = np.sqrt(np.sum(valori ** 2, axis=-1)) # q2n_index = np.mean(q2n_idx_map) return q2n_idx_map def norm_blocco(x, eps=1e-8): a = x.mean() c = x.std() if c == 0: c = eps return (x - a) / c + 1, a, c def onions_quality(dat1, dat2, size1): dat1 = np.float64(dat1) dat2 = np.float64(dat2) dat2 = np.concatenate([dat2[..., 0, np.newaxis], -dat2[..., 1:]], axis=-1) N, _, _, N3 = dat1.shape size2 = size1 # Block norm for i in range(N3): a1, s, t = norm_blocco(np.squeeze(dat1[..., i])) # print(s,t) dat1[..., i] = a1 if s == 0: if i == 0: dat2[..., i] = dat2[..., i] - s + 1 else: dat2[..., i] = -(-dat2[..., i] - s + 1) else: if i == 0: dat2[..., i] = ((dat2[..., i] - s) / t) + 1 else: dat2[..., i] = -(((-dat2[..., i] - s) / t) + 1) m1 = np.zeros(shape=[N, N3]) m2 = m1.copy() mod_q1m = 0 mod_q2m = 0 mod_q1 = np.zeros(shape=[size1, size2]) mod_q2 = np.zeros(shape=[size1, size2]) for i in range(N3): m1[..., i] = np.mean(np.squeeze(dat1[..., i])) m2[..., i] = np.mean(np.squeeze(dat2[..., i])) mod_q1m += m1[..., i] ** 2 mod_q2m += m2[..., i] ** 2 mod_q1 += np.squeeze(dat1[..., i]) ** 2 mod_q2 += np.squeeze(dat2[..., i]) ** 2 mod_q1m = np.sqrt(mod_q1m) mod_q2m = np.sqrt(mod_q2m) mod_q1 = np.sqrt(mod_q1) mod_q2 = np.sqrt(mod_q2) termine2 = mod_q1m * mod_q2m # 7.97 termine4 = mod_q1m ** 2 + mod_q2m ** 2 # int1 = (size1 * size2) / (size1 * size2 - 1) * np.mean(mod_q1 ** 2) int2 = (size1 * size2) / (size1 * size2 - 1) * np.mean(mod_q2 ** 2) termine3 = int1 + int2 - (size1 * size2) / ((size1 * size2 - 1)) * (mod_q1m ** 2 + mod_q2m ** 2) # 17.8988 ** 2 mean_bias = 2 * termine2 / termine4 # 1 if termine3 == 0: q = np.zeros(shape=[N, 1, N3]) q[:, :, N3 - 1] = mean_bias else: cbm = 2 / termine3 # 32 32 8 qu = onion_mult2D(dat1, dat2) qm = onion_mult(m1.reshape(-1), m2.reshape(-1)) qv = np.zeros(shape=[N, N3]) for i in range(N3): qv[..., i] = (size1 * size2) / ((size1 * size2) - 1) * np.mean(np.squeeze(qu[:, :, i])) q = qv - (size1 * size2) / ((size1 * size2) - 1) * qm q = q * mean_bias * cbm return q def onion_mult2D(onion1, onion2): _, _, _, N3 = onion1.shape if N3 > 1: L = N3 // 2 a = onion1[..., : L] b = onion1[..., L:] b = np.concatenate([b[..., 0, np.newaxis], -b[..., 1:]], axis=-1) c = onion2[..., : L] d = onion2[..., L:] d = np.concatenate([d[..., 0, np.newaxis], -d[..., 1:]], axis=-1) if N3 == 2: ris = np.concatenate([a * c - d * b, a * d + c * b], axis=-1) else: ris1 = onion_mult2D(a, c) ris2 = onion_mult2D(d, np.concatenate([b[..., 0, np.newaxis], -b[..., 1:]], axis=-1)) ris3 = onion_mult2D(np.concatenate([a[..., 0, np.newaxis], -a[..., 1:]], axis=-1), d) ris4 = onion_mult2D(c, b) aux1 = ris1 - ris2 aux2 = ris3 + ris4 ris = np.concatenate([aux1, aux2], axis=-1) else: ris = onion1 * onion2 return ris def onion_mult(onion1, onion2): # _, N = onion1.shape N = len(onion1) if N > 1: L = N // 2 a = onion1[:L] b = onion1[L:] # b[1:] = -b[1:] b = np.append(np.array(b[0]), -b[1:]) c = onion2[:L] d = onion2[L:] # d[1:] = -d[1:] d = np.append(np.array(d[0]), -d[1:]) if N == 2: ris = np.append(a * c - d * b, a * d + c * b) else: ris1 = onion_mult(a, c) # b[1:] = -b[1:] ris2 = onion_mult(d, np.append(np.array(b[0]), -b[1:])) # a[1:] = -a[1:] ris3 = onion_mult(np.append(np.array(a[0]), -a[1:]), d) ris4 = onion_mult(c, b) aux1 = ris1 - ris2 aux2 = ris3 + ris4 ris = np.append(aux1, aux2) else: ris = np.array(onion1).reshape(-1) * np.array(onion2).reshape(-1) return ris def compute_index(img_base, img_out, ratio): h = img_out.shape[0] w = img_out.shape[1] chanel = img_out.shape[2] # 计算SAM sum1 = torch.sum(img_base * img_out, 2) sum2 = torch.sum(img_base * img_base, 2) sum3 = torch.sum(img_out * img_out, 2) t = (sum2 * sum3) ** 0.5 numlocal = torch.gt(t, 0) num = torch.sum(numlocal) t = sum1 / t angle = torch.acos(t) sumangle = torch.where(torch.isnan(angle), torch.full_like(angle, 0), angle).sum() if num == 0: averangle = sumangle else: averangle = sumangle / num SAM = averangle * 180 / 3.14159256 # 计算ERGAS summ = 0 for i in range(chanel): a1 = torch.mean((img_base[:, :, i] - img_out[:, :, i]) ** 2) m1 = torch.mean(img_base[:, :, i]) a2 = m1 * m1 summ = summ + a1 / a2 ERGAS = 100 * (1 / ratio) * ((summ / chanel) ** 0.5) return SAM, ERGAS import decimal decimal.getcontext().rounding = "ROUND_HALF_UP" n_digits = 6 # panHrnet: 2.6565 |1.4651 | 0.98364 | 0.98024 | 0.98089-Q8 def analysis_accu(img_base, img_out, ratio, flag_cut_bounds=True, dim_cut=1): if flag_cut_bounds: img_base = img_base[dim_cut - 1:-dim_cut, dim_cut - 1:-dim_cut, :]#: img_out = img_out[dim_cut - 1:-dim_cut, dim_cut - 1:-dim_cut, :]#: # q2n # q2n_index = q2n(img_base, img_out, q_blocks_size=32, q_shift=32) h = img_out.shape[0] w = img_out.shape[1] chanel = img_out.shape[2] # 计算CC C1 = torch.sum(torch.sum(img_base * img_out, 0), 0) - h * w * ( torch.mean(torch.mean(img_base, 0), 0) * torch.mean(torch.mean(img_out, 0), 0)) C2 = torch.sum(torch.sum(img_out ** 2, 0), 0) - h * w * (torch.mean(torch.mean(img_out, 0), 0) ** 2) C3 = torch.sum(torch.sum(img_base ** 2, 0), 0) - h * w * (torch.mean(torch.mean(img_base, 0), 0) ** 2) CC = C1 / ((C2 * C3) ** 0.5) # 计算SAM sum1 = torch.sum(img_base * img_out, 2) sum2 = torch.sum(img_base * img_base, 2) sum3 = torch.sum(img_out * img_out, 2) t = (sum2 * sum3) ** 0.5 numlocal = torch.gt(t, 0) num = torch.sum(numlocal) t = sum1 / t angle = torch.acos(t) sumangle = torch.where(torch.isnan(angle), torch.full_like(angle, 0), angle).sum() if num == 0: averangle = sumangle else: averangle = sumangle / num # 或者采用https://segmentfault.com/a/1190000018929994修改精度 # averangle = math.ceil(averangle * 1000000) / 1000000 averangle = (averangle * 10 ** n_digits).round() / (10 ** n_digits) # SAM = decimal.Decimal(averangle.cpu().numpy() * 180 / 3.14159256).quantize(decimal.Decimal("0.00000")) SAM = averangle * 180 / 3.14159256 # 计算ERGAS summ = 0 for i in range(chanel): a1 = torch.mean((img_base[:, :, i] - img_out[:, :, i]) ** 2) m1 = torch.mean(img_base[:, :, i]) a2 = m1 * m1 summ = summ + a1 / a2 ERGAS = 100 * (1 / ratio) * ((summ / chanel) ** 0.5) # 计算PSNR mse = torch.mean((img_base - img_out) ** 2, 0) mse = torch.mean(mse, 0) rmse = mse ** 0.5 temp = torch.log(1 / rmse) / math.log(10) PSNR = 20 * temp # 计算SSIM img_base = img_base.permute(2, 0, 1) img_out = img_out.permute(2, 0, 1) img_base = img_base.unsqueeze(0) img_out = img_out.unsqueeze(0) SSIM = _ssim(img_base, img_out) index = torch.zeros((5, chanel + 1)) index[0, 1:chanel + 1] = CC index[1, 1:chanel + 1] = PSNR index[2, 1:chanel + 1] = SSIM # index[0, 0] = torch.mean(CC) # index[1, 0] = torch.mean(PSNR) # index[2, 0] = torch.mean(SSIM) # index[3, 0] = SAM # index[4, 0] = ERGAS CC = torch.mean(CC) PSNR = torch.mean(PSNR) SSIM = torch.mean(SSIM) # q2n_index = np.mean(q2n_index) return CC, PSNR, SSIM, SAM, ERGAS#, q2n_index def _ssim(img1, img2): img1 = img1.float() img2 = img2.float() channel = img1.shape[1] max_val = 1 _, c, w, h = img1.size() window_size = min(w, h, 11) sigma = 1.5 * window_size / 11 window = create_window(window_size, sigma, channel).cuda() mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel) mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel) mu1_sq = mu1.pow(2) mu2_sq = mu2.pow(2) mu1_mu2 = mu1 * mu2 sigma1_sq = F.conv2d(img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq sigma2_sq = F.conv2d(img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq sigma12 = F.conv2d(img1 * img2, window, padding=window_size // 2, groups=channel) - mu1_mu2 C1 = (0.01 * max_val) ** 2 C2 = (0.03 * max_val) ** 2 V1 = 2.0 * sigma12 + C2 V2 = sigma1_sq + sigma2_sq + C2 ssim_map = ((2 * mu1_mu2 + C1) * V1) / ((mu1_sq + mu2_sq + C1) * V2) t = ssim_map.shape return ssim_map.mean(2).mean(2) from torch.autograd import Variable def gaussian(window_size, sigma): gauss = torch.Tensor([math.exp(-(x - window_size // 2) ** 2 / float(2 * sigma ** 2)) for x in range(window_size)]) return gauss / gauss.sum() def create_window(window_size, sigma, channel): _1D_window = gaussian(window_size, sigma).unsqueeze(1) _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0) window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous()) return window def compare_index(A): A_size = A.shape ite_n = A_size[2] band_n = A_size[1] C_better = A[:, 0, 0] ind = 0 for i in range(ite_n): score_b = 0 score_c = 0 C_compare = A[:, 0, i] if (C_better[0] > C_compare[0]): score_b = score_b + 1 else: score_c = score_c + 1 if (C_better[1] > C_compare[1]): score_b = score_b + 1 else: score_c = score_c + 1 if (C_better[2] > C_compare[2]): score_b = score_b + 1 else: score_c = score_c + 1 if (C_better[3] < C_compare[3]): score_b = score_b + 1 else: score_c = score_c + 1 if (C_better[4] < C_compare[4]): score_b = score_b + 1 else: score_c = score_c + 1 if (score_c > score_b): C_better = A[:, 0, i] ind = i C_best = A[:, :, ind] best_ind = ind + 1 return C_best, best_ind if __name__ == "__main__": a = np.zeros(shape=[256, 256]) print(a[:255, :255].shape) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/main_pre_train_trainData_qb.py ================================================ import os import torch import torch.backends.cudnn as cudnn import torch.nn as nn import torch.optim as optim from torch.utils.tensorboard import SummaryWriter from torch.autograd import Variable from torch.utils.data import DataLoader from data_qb import Dataset_Pro from model_qb import APNN, summaries, loss_with_l2_regularization, weights_init from logger import create_logger, log_string import numpy as np import argparse parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') parser.add_argument('--out_dir', metavar='DIR', default='../results', help='path to save model') parser.add_argument('--log_dir', metavar='DIR', default='logs', help='path to save log') parser.add_argument('--tfb_dir', metavar='DIR', default=None, help='useless in this script.') parser.add_argument('--arch', '-a', metavar='ARCH', default='APNN') args = parser.parse_args() args.experimental_desc = "APNN" args.dataset = "QB" out_dir, model_save_dir, tfb_dir = create_logger(args, args.experimental_desc) ################################################################### # ------------------- Pre-Define Part---------------------- ################################################################### # ================== Pre-Define =================== # SEED = 10 torch.manual_seed(SEED) torch.cuda.manual_seed(SEED) torch.cuda.manual_seed_all(SEED) # cudnn.benchmark = True ###自动寻找最优算法 cudnn.deterministic = True # ============= 2) HYPER PARAMS(Pre-Defined) ==========# sensor = 'QB' nr_bands = 4 #selected by user or taken from data? lr = 0.0001*17*17*nr_bands epochs = 15000 ckpt = 50 batch_size = 128 model_path = "Weights/qb/.pth" # ============= 3) Load Model + Loss + Optimizer + Learn_rate_update ==========# model = APNN().cuda() model.apply(weights_init) if os.path.isfile(model_path): model.load_state_dict(torch.load(model_path)) ## Load the pretrained Encoder log_string('APNN is Successfully Loaded from %s' % (model_path)) # summaries(model, grad=True) ## Summary the Network criterion = nn.L1Loss(reduction='mean').cuda() regularization = loss_with_l2_regularization().cuda() target_layerParam = list(map(id, model.conv3.parameters())) base_layerParam = filter(lambda p: id(p) not in target_layerParam, model.parameters()) training_parameters = [{'params': model.conv3.parameters(), 'lr': lr/10}, {'params': base_layerParam}] optimizer = optim.SGD(training_parameters, lr=lr, momentum=0.9) log_string("inspect optimizer setting: {}\n".format(optimizer.state_dict())) print("target id: {}".format(target_layerParam)) # (input_size - kernel_size + 1) // 2 = 2* pad = 2 * blk = net_scope net_scope = 0 for name, layer in model.named_parameters(): if 'conv' in name and 'bias' not in name: net_scope += layer.shape[-1]-1 net_scope = np.sum(net_scope) + 1 blk = net_scope//2 #8 save_best_file = './results/PNN/PNN_model.pth.tar' PNN_model = {'sensor': sensor, 'lr': lr, 'epochs': epochs, 'model_sampling_period': ckpt, 'net_scope': net_scope, 'batch_size': batch_size} writer = SummaryWriter('./train_logs') ## Tensorboard_show: case 2 def save_checkpoint(model, epoch): # save model function model_out_path = 'Weights' + '/' + "{}.pth".format(epoch) torch.save(model.state_dict(), model_out_path) ################################################################### # ------------------- Main Train (Run second)---------------------------------- ################################################################### def train(training_data_loader, validate_data_loader, start_epoch=0): log_string('Start training...') vmin = 10000 for epoch in range(start_epoch, epochs, 1): epoch += 1 epoch_train_mae, epoch_train_mse, epoch_val_mae, epoch_val_mse = [], [], [], [] # ============Epoch Train=============== # model.train() for iteration, batch in enumerate(training_data_loader, 1): gt, lms, ms, pan = batch[0].cuda(), batch[1].cuda(), batch[2].cuda(), batch[3].cuda() gt = gt - lms lms = torch.cat([lms, pan], dim=1) optimizer.zero_grad() # fixed sr = model(lms) # call model gt = gt[:, :, blk:-blk, blk:-blk] loss = criterion(sr, gt) # compute loss new_loss = regularization(loss, model, flag=False) epoch_train_mae.append(loss.item()) # save all losses into a vector for one epoch new_loss.backward() # fixed optimizer.step() # fixed with torch.no_grad(): loss = nn.MSELoss()(sr, gt) loss.requires_grad = False epoch_train_mse.append(loss.item()) t_loss1 = np.nanmean(np.array(epoch_train_mae)) # compute the mean value of all losses, as one epoch loss t_loss2 = np.nanmean(np.array(epoch_train_mse)) writer.add_scalar('mae_loss/t_mae', t_loss1, epoch) # write to tensorboard to check writer.add_scalar('mae_loss/t_mse', t_loss2, epoch) log_string('Epoch: {}/{} training L1-loss: {:.7f}, L2-loss: {:.7f}'.format(epochs, epoch, t_loss1, t_loss2)) # print loss for each epoch # if epoch % ckpt == 0: # if each ckpt epochs, then start to save model # save_checkpoint(model, epoch) # ============Epoch Validate=============== # model.eval() with torch.no_grad(): for iteration, batch in enumerate(validate_data_loader, 1): gt, lms, ms, pan = batch[0].cuda(), batch[1].cuda(), batch[2].cuda(), batch[3].cuda() gt = gt - lms lms = torch.cat([lms, pan], dim=1) sr = model(lms) gt = gt[:, :, blk:-blk, blk:-blk] loss1 = criterion(sr, gt) loss2 = nn.MSELoss()(sr, gt) epoch_val_mae.append(loss1.item()) epoch_val_mse.append(loss2.item()) v_loss1 = np.nanmean(np.array(epoch_val_mae)) v_loss2 = np.nanmean(np.array(epoch_val_mse)) writer.add_scalar('val/v_mae', v_loss1, epoch) writer.add_scalar('val/v_mse', v_loss2, epoch) log_string('Epoch: {}/{} validate L1-loss: {:.7f}, L2-loss: {:7f}'.format(epochs, epoch, v_loss1, v_loss2)) # print loss for each epoch ### during save and simple best save ### # vmin = 10000 if (epoch + 1) % ckpt == 0: # print("saving PNN_model_{}.pth.tar".format(epoch)) torch.save(dict(model=model, model_state=model.state_dict(), loss=v_loss1, train_params=PNN_model), '{}/PNN_model_{}.pth.tar'.format(model_save_dir, epoch + 1)) if v_loss1 < vmin: if os.path.isfile(save_best_file): os.remove(save_best_file) # print("saving PNN_model.pth.tar") torch.save(dict(model=model, model_state=model.state_dict(), loss1=v_loss1, train_params=PNN_model), '{}/best_PNN_model_{}.pth.tar'.format(model_save_dir, epoch)) torch.save(dict(model=model, model_state=model.state_dict(), loss1=v_loss1, train_params=PNN_model), './pretrained_models/'+sensor+'_PNNplus_model.pth.tar') vmin = v_loss1 writer.close() # close tensorboard ################################################################### # ------------------- Main Function (Run first) ------------------- ################################################################### if __name__ == "__main__": train_set = Dataset_Pro('../training_data/train_qb_10000.h5') # creat data for training training_data_loader = DataLoader(dataset=train_set, num_workers=0, batch_size=batch_size, shuffle=True, pin_memory=True, drop_last=True) # put training data to DataLoader for batches validate_set = Dataset_Pro('../training_data/valid_qb_10000.h5') # creat data for validation validate_data_loader = DataLoader(dataset=validate_set, num_workers=0, batch_size=batch_size, shuffle=False, pin_memory=True, drop_last=True) # put training data to DataLoader for batches train(training_data_loader, validate_data_loader) # call train function (call: Line 53) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/main_pre_train_trainData_wv2.py ================================================ import os import torch import torch.backends.cudnn as cudnn import torch.nn as nn import torch.optim as optim from torch.utils.tensorboard import SummaryWriter from torch.autograd import Variable from torch.utils.data import DataLoader from data_wv2 import Dataset_Pro from model_wv2 import APNN, summaries, loss_with_l2_regularization, weights_init from logger import create_logger, log_string import numpy as np import argparse parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') parser.add_argument('--out_dir', metavar='DIR', default='../results', help='path to save model') parser.add_argument('--log_dir', metavar='DIR', default='logs', help='path to save log') parser.add_argument('--tfb_dir', metavar='DIR', default=None, help='useless in this script.') parser.add_argument('--arch', '-a', metavar='ARCH', default='APNN') args = parser.parse_args() args.experimental_desc = "APNN" args.dataset = "WV2" out_dir, model_save_dir, tfb_dir = create_logger(args, args.experimental_desc) print(model_save_dir) # import shutil # from torch.utils.tensorboard import SummaryWriter ################################################################### # ------------------- Pre-Define Part---------------------- ################################################################### # ================== Pre-Define =================== # SEED = 10 torch.manual_seed(SEED) torch.cuda.manual_seed(SEED) torch.cuda.manual_seed_all(SEED) # cudnn.benchmark = True ###自动寻找最优算法 cudnn.deterministic = True # ============= 2) HYPER PARAMS(Pre-Defined) ==========# """ CHANGES: 1. row 48: in APNN the L1 loss is averaged only on the minibatch size, for the learning rate in case the loss is averaged on minibatches, patches size, and bands is lr=0.0001*17*17*nr_bands a. nr_bands takes into account the number of bands 2. row 46: should be indicated by user here, or taken from data 3. row 46: depends on the 4. the dataset is already normalized, so we do not need anymore and 5. row 49: in APNN epochs=10000 6. row 71: in APNN weight_decay=0 7. rows 182-195: in pretrained_models the best PNN model is saved """ sensor = 'WV2' nr_bands = 8 # selected by user or taken from data? lr = 0.1#0.0001 * 17 * 17 * nr_bands#=0.2302 epochs = 15000 ckpt = 50 batch_size = 128 model_path = "../results/WV2/best_PNN_model_4765.pth.tar" v_min = 10000 ''' - Epoch: 15000/4765 training L1-loss: 0.0145116, L2-loss: 0.0005868 - Epoch: 15000/4765 validate L1-loss: 0.0145294, L2-loss: 0.000590 - Epoch: 15000/4766 training L1-loss: 0.0145040, L2-loss: 0.0005864 - Epoch: 15000/4766 validate L1-loss: 0.0145357, L2-loss: 0.000590 - Epoch: 15000/4767 training L1-loss: 0.0145072, L2-loss: 0.0005866 - Epoch: 15000/4767 validate L1-loss: 0.0145413, L2-loss: 0.000591 ''' # TODO L2 norm to do where # ============= 3) Load Model + Loss + Optimizer + Learn_rate_update ==========# model = APNN().cuda() model.apply(weights_init) if os.path.isfile(model_path): log_string("loading") checkpoint = torch.load(model_path) model.load_state_dict(checkpoint["model_state"]) ## Load the pretrained Encoder log_string('APNN is Successfully Loaded from %s' % (model_path)) v_min = checkpoint["loss1"] # summaries(model, grad=True) ## Summary the Network criterion = nn.L1Loss(reduction='mean').cuda() regularization = loss_with_l2_regularization().cuda() # 用model里有的实例id去指定model中的其他参数,而不要遍历model.parameters() target_layerParam = list(map(id, model.conv3.parameters())) base_layerParam = filter(lambda p: id(p) not in target_layerParam, model.parameters()) training_parameters = [{'params': model.conv3.parameters(), 'lr': lr / 10}, {'params': base_layerParam}] optimizer = optim.SGD(training_parameters, lr=lr, momentum=0.9) log_string("inspect optimizer setting: {}\n".format(optimizer.state_dict())) log_string("target id: {}".format(target_layerParam)) # 模型卷积层宽卷积零填充范围 # (input_size - kernel_size + 1) // 2 = 2* pad = 2 * blk = net_scope net_scope = 0 for name, layer in model.named_parameters(): if 'conv' in name and 'bias' not in name: net_scope += layer.shape[-1] - 1 net_scope = np.sum(net_scope) + 1 blk = net_scope // 2 # 8 save_best_file = './results/WV2/PNN_model.pth.tar' PNN_model = {'sensor': sensor, 'lr': lr, 'epochs': epochs, 'model_sampling_period': ckpt, 'net_scope': net_scope, 'batch_size': batch_size} writer = SummaryWriter('../train_logs') ## Tensorboard_show: case 2 def save_checkpoint(model, epoch): # save model function model_out_path = 'Weights' + '/' + "{}.pth".format(epoch) torch.save(model.state_dict(), model_out_path) ################################################################### # ------------------- Main Train (Run second)---------------------------------- ################################################################### def train(training_data_loader, validate_data_loader, start_epoch=0, v_min=10000): log_string('Start training...') for epoch in range(start_epoch, epochs, 1): epoch += 1 epoch_train_mae, epoch_train_mse, epoch_val_mae, epoch_val_mse = [], [], [], [] # ============Epoch Train=============== # model.train() for iteration, batch in enumerate(training_data_loader, 1): gt, lms, ms, pan = batch[0].cuda(), batch[1].cuda(), batch[2].cuda(), batch[3].cuda() gt = gt - lms lms = torch.cat([lms, pan], dim=1) optimizer.zero_grad() # fixed sr = model(lms) # call model gt = gt[:, :, blk:-blk, blk:-blk] loss = criterion(sr, gt) # compute loss new_loss = regularization(loss, model, flag=False) epoch_train_mae.append(loss.item()) # save all losses into a vector for one epoch new_loss.backward() # fixed optimizer.step() # fixed with torch.no_grad(): loss = nn.MSELoss()(sr, gt) loss.requires_grad = False epoch_train_mse.append(loss.item()) t_loss1 = np.nanmean(np.array(epoch_train_mae)) # compute the mean value of all losses, as one epoch loss t_loss2 = np.nanmean(np.array(epoch_train_mse)) writer.add_scalar('mae_loss/t_mae', t_loss1, epoch) # write to tensorboard to check writer.add_scalar('mae_loss/t_mse', t_loss2, epoch) log_string('Epoch: {}/{} training L1-loss: {:.7f}, L2-loss: {:.7f}'.format(epochs, epoch, t_loss1, t_loss2)) # print loss for each epoch # if epoch % ckpt == 0: # if each ckpt epochs, then start to save model # save_checkpoint(model, epoch) # ============Epoch Validate=============== # model.eval() with torch.no_grad(): for iteration, batch in enumerate(validate_data_loader, 1): gt, lms, ms, pan = batch[0].cuda(), batch[1].cuda(), batch[2].cuda(), batch[3].cuda() gt = gt - lms lms = torch.cat([lms, pan], dim=1) sr = model(lms) gt = gt[:, :, blk:-blk, blk:-blk] loss1 = criterion(sr, gt) loss2 = nn.MSELoss()(sr, gt) epoch_val_mae.append(loss1.item()) epoch_val_mse.append(loss2.item()) v_loss1 = np.nanmean(np.array(epoch_val_mae)) v_loss2 = np.nanmean(np.array(epoch_val_mse)) writer.add_scalar('val/v_mae', v_loss1, epoch) writer.add_scalar('val/v_mse', v_loss2, epoch) log_string('Epoch: {}/{} validate L1-loss: {:.7f}, L2-loss: {:7f}'.format(epochs, epoch, v_loss1, v_loss2)) # print loss for each epoch ### during save and simple best save ### # vmin = 10000 if (epoch + 1) % ckpt == 0: # print("saving PNN_model_{}.pth.tar".format(epoch)) torch.save(dict(model=model, model_state=model.state_dict(), loss=v_loss1, train_params=PNN_model), '{}/PNN_model_{}.pth.tar'.format(model_save_dir, epoch + 1)) if v_loss1 < v_min: if os.path.isfile(save_best_file): os.remove(save_best_file) # print("saving PNN_model.pth.tar") torch.save(dict(model=model, model_state=model.state_dict(), loss1=v_loss1, train_params=PNN_model), '{}/best_PNN_model_{}.pth.tar'.format(model_save_dir,epoch)) torch.save(dict(model=model, model_state=model.state_dict(), loss1=v_loss1, train_params=PNN_model), '../pretrained_models/'+sensor+'_PNNplus_model.pth.tar') v_min = v_loss1 writer.close() # close tensorboard ################################################################### # ------------------- Main Function (Run first) ------------------- ################################################################### if __name__ == "__main__": train_set = Dataset_Pro('../training_data/train_wv2_10000.h5') # creat data for training training_data_loader = DataLoader(dataset=train_set, num_workers=0, batch_size=batch_size, shuffle=True, pin_memory=True, drop_last=True) # put training data to DataLoader for batches validate_set = Dataset_Pro('../training_data/valid_wv2_10000.h5') # creat data for validation validate_data_loader = DataLoader(dataset=validate_set, num_workers=0, batch_size=batch_size, shuffle=False, pin_memory=True, drop_last=True) # put training data to DataLoader for batches train(training_data_loader, validate_data_loader, 6700, v_min=v_min) # call train function (call: Line 53) ''' - Epoch: 15000/1942 validate L1-loss: 0.0152082, L2-loss: 0.000656 - Epoch: 15000/1943 training L1-loss: 0.0151794, L2-loss: 0.0006520 - Epoch: 15000/1943 validate L1-loss: 0.0152087, L2-loss: 0.000656 - Epoch: 15000/1944 training L1-loss: 0.0151762, L2-loss: 0.0006518 - Epoch: 15000/1944 validate L1-loss: 0.0152076, L2-loss: 0.000656 - Epoch: 15000/1945 training L1-loss: 0.0151769, L2-loss: 0.0006519 - Epoch: 15000/1945 validate L1-loss: 0.0152089, L2-loss: 0.000656 - Epoch: 15000/1946 training L1-loss: 0.0151799, L2-loss: 0.0006520 - Epoch: 15000/1946 validate L1-loss: 0.0152077, L2-loss: 0.000656 ''' ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/main_pre_train_trainData_wv3.py ================================================ import os import torch import torch.backends.cudnn as cudnn import torch.nn as nn import torch.optim as optim from torch.utils.tensorboard import SummaryWriter from torch.autograd import Variable from torch.utils.data import DataLoader from data_wv3 import Dataset_Pro from model_wv3 import APNN, loss_with_l2_regularization, weights_init import numpy as np import shutil from torch.utils.tensorboard import SummaryWriter from torchstat import stat ################################################################### # ------------------- Pre-Define Part---------------------- ################################################################### # ================== Pre-Define =================== # SEED = 10 torch.manual_seed(SEED) torch.cuda.manual_seed(SEED) torch.cuda.manual_seed_all(SEED) # cudnn.benchmark = True ###自动寻找最优算法 cudnn.deterministic = True # ============= 2) HYPER PARAMS(Pre-Defined) ==========# sensor = 'WV3' nr_bands = 8 # selected by user or taken from data? lr = 0.0001 * 17 * 17 * nr_bands epochs = 10000 ckpt = 50 batch_size = 128 # 0.010094023841832365 model_path = "results/PNN/.pth.tar" # ============= 3) Load Model + Loss + Optimizer + Learn_rate_update ==========# model = APNN().cuda() model.apply(weights_init) stat(model, input_size=[(9, 64, 64)]) if os.path.isfile(model_path): checkpoint = torch.load(model_path) model.load_state_dict(checkpoint["model_state"]) ## Load the pretrained Encoder print('APNN is Successfully Loaded from %s' % (model_path)) if "loss1" in dict(checkpoint).keys(): print("loss: {}".format(checkpoint["loss1"])) criterion = nn.L1Loss(reduction='mean').cuda() regularization = loss_with_l2_regularization().cuda() target_layerParam = list(map(id, model.conv3.parameters())) base_layerParam = filter(lambda p: id(p) not in target_layerParam, model.parameters()) training_parameters = [{'params': model.conv3.parameters(), 'lr': lr / 10}, {'params': base_layerParam}] optimizer = optim.SGD(training_parameters, lr=lr, momentum=0.9, weight_decay=0) print("inspect optimizer setting:\n", optimizer.state_dict()) print("target id:", target_layerParam) # (input_size - kernel_size + 1) // 2 = 2* pad = 2 * blk = net_scope net_scope = 0 for name, layer in model.named_parameters(): if 'conv' in name and 'bias' not in name: net_scope += layer.shape[-1] - 1 net_scope = np.sum(net_scope) + 1 blk = net_scope // 2 # 8 save_best_file = './results/PNN/PNN_model.pth.tar' PNN_model = {'sensor': sensor, 'lr': lr, 'epochs': epochs, 'model_sampling_period': ckpt, 'net_scope': net_scope, 'batch_size': batch_size} writer = SummaryWriter('./train_logs') ## Tensorboard_show: case 2 def save_checkpoint(model, epoch): # save model function model_out_path = 'Weights' + '/wv3/' + "{}.pth".format(epoch) torch.save(model.state_dict(), model_out_path) ################################################################### # ------------------- Main Train (Run second)---------------------------------- ################################################################### def train(training_data_loader, validate_data_loader, start_epoch=0): print('Start training...') print(model.conv1.weight.data[0, 0, 0, 0]) vmin = 10000 for epoch in range(start_epoch, epochs, 1): flag = (epoch == (epochs - 1)) or epoch == 0 epoch += 1 epoch_train_mae, epoch_train_mse, epoch_val_mae, epoch_val_mse = [], [], [], [] # ============Epoch Train=============== # model.train() for iteration, batch in enumerate(training_data_loader, 1): gt, lms, ms, pan = batch[0].cuda(), batch[1].cuda(), batch[2].cuda(), batch[3].cuda() gt = gt - lms lms = torch.cat([lms, pan], dim=1) optimizer.zero_grad() # fixed sr = model(lms) # call model gt = gt[:, :, blk:-blk, blk:-blk] loss = criterion(sr, gt) # compute loss new_loss = regularization(loss, model, flag=flag or (iteration == 0)) epoch_train_mae.append(loss.item()) # save all losses into a vector for one epoch new_loss.backward() # fixed optimizer.step() # fixed with torch.no_grad(): loss = nn.MSELoss()(sr, gt) loss.requires_grad = False epoch_train_mse.append(loss.item()) t_loss1 = np.nanmean(np.array(epoch_train_mae)) # compute the mean value of all losses, as one epoch loss t_loss2 = np.nanmean(np.array(epoch_train_mse)) writer.add_scalar('mae_loss/t_mae', t_loss1, epoch) # write to tensorboard to check writer.add_scalar('mae_loss/t_mse', t_loss2, epoch) print('Epoch: {}/{} training L1-loss: {:.7f}, L2-loss: {:.7f}'.format(epochs, epoch, t_loss1, t_loss2)) # print loss for each epoch # if epoch % ckpt == 0: # if each ckpt epochs, then start to save model # save_checkpoint(model, epoch) # ============Epoch Validate=============== # model.eval() with torch.no_grad(): for iteration, batch in enumerate(validate_data_loader, 1): gt, lms, ms, pan = batch[0].cuda(), batch[1].cuda(), batch[2].cuda(), batch[3].cuda() gt = gt - lms lms = torch.cat([lms, pan], dim=1) sr = model(lms) gt = gt[:, :, blk:-blk, blk:-blk] loss1 = criterion(sr, gt) loss2 = nn.MSELoss()(sr, gt) epoch_val_mae.append(loss1.item()) epoch_val_mse.append(loss2.item()) v_loss1 = np.nanmean(np.array(epoch_val_mae)) v_loss2 = np.nanmean(np.array(epoch_val_mse)) writer.add_scalar('val/v_mae', v_loss1, epoch) writer.add_scalar('val/v_mse', v_loss2, epoch) print('Epoch: {}/{} validate L1-loss: {:.7f}, L2-loss: {:7f}'.format(epochs, epoch, v_loss1, v_loss2)) # print loss for each epoch ### during save and simple best save ### # vmin = 10000 if (epoch + 1) % ckpt == 0: # print("saving PNN_model_{}.pth.tar".format(epoch)) torch.save(dict(model=model, model_state=model.state_dict(), optim_state=optimizer.state_dict(), loss=v_loss1, train_params=PNN_model), './results/PNN/PNN_model_{}.pth.tar'.format(epoch + 1)) if v_loss1 < vmin: if os.path.isfile(save_best_file): os.remove(save_best_file) # print("saving PNN_model.pth.tar") torch.save(dict(model=model, model_state=model.state_dict(), optim_state=optimizer.state_dict(), loss1=v_loss1, train_params=PNN_model), './results/PNN/best_PNN_model_{}.pth.tar'.format(epoch)) torch.save(dict(model=model, model_state=model.state_dict(), optim_state=optimizer.state_dict(), loss1=v_loss1, train_params=PNN_model), './pretrained_models/' + sensor + '_PNNplus_model.pth.tar') vmin = v_loss1 writer.close() # close tensorboard def fine_tune_test(file_path, training_data_loader): from main_test_wv3 import Tester, eval_test # tester = Tester(file_path, mode='ft') # call initial model evaluator = Tester(file_path) criterion = nn.L1Loss(reduction='mean').cuda() " LOAD PRETRAINED MODEL" model_path = "./results/PNN/.pth.tar" if os.path.isfile(model_path): print("loading model") checkpoint = torch.load(model_path) # checkpoint = torch.load('./pretrained_models/' + sensor_model) print(checkpoint.keys()) net = checkpoint['model'] print(net.conv1.weight.data[0, 0, 0, 0]) net.load_state_dict(checkpoint['model_state']) train_params = checkpoint['train_params'] lr = train_params['lr'] # learning rate print("lr", lr) FT_epochs = 1000 # number of fine tuning epochs else: lr = 0.0001 * 17 * 17 * nr_bands FT_epochs = epochs net = APNN().cuda() print(net.conv1.weight.data[0, 0, 0, 0]) ''' tensor(-0.0003, device='cuda:0') tensor(-0.0204, device='cuda:0') ''' print(net.conv1.weight.data[0, 0, 0, 0]) print(net) # print("pretrain loss: ", checkpoint["loss1"]) print(dict(net.named_parameters()).keys()) target_layerParam = list(map(id, net.conv3.parameters())) base_layerParam = filter(lambda p: id(p) not in target_layerParam, net.parameters()) training_parameters = [{'params': net.conv3.parameters(), 'lr': lr/10}, {'params': base_layerParam}] optimizer = optim.SGD(training_parameters, lr=lr, momentum=0.9, weight_decay=0) try: optimizer.load_state_dict(checkpoint["optim_state"]) except: print("default optim_state") v_min = 10000 ft_loss = np.zeros(FT_epochs) eval_test(net, evaluator, mode="eval", mode2="pre")#0.0114576 for epoch in range(FT_epochs): net.train() epoch_train_mae = [] for iteration, batch in enumerate(training_data_loader, 1): gt, lms, ms, pan = batch[0].cuda(), batch[1].cuda(), batch[2].cuda(), batch[3].cuda() gt = gt - lms lms = torch.cat([lms, pan], dim=1) optimizer.zero_grad() # fixed sr = net(lms) # call model gt = gt[:, :, blk:-blk, blk:-blk] loss = criterion(sr, gt) # compute loss new_loss = regularization(loss, net, flag=False) epoch_train_mae.append(loss.item()) # save all losses into a vector for one epoch new_loss.backward() # fixed optimizer.step() # fixed running_loss = np.nanmean(epoch_train_mae) ft_loss[epoch] = running_loss if running_loss < v_min: v_min = running_loss PATH = './ft_network/' if not os.path.exists(PATH): os.makedirs(PATH) torch.save(dict(model=net, model_state=net.state_dict(), loss=ft_loss), PATH + '/net.pth.tar') net.eval() eval_test(net, evaluator, mode="eval", mode2="ft") print('[%d] loss: %.20f' % (epoch + 1, running_loss)) ################################################################### # ------------------- Main Function (Run first) ------------------- ################################################################### if __name__ == "__main__": train_set = Dataset_Pro('./training_data/train_wv3_10000.h5') # creat data for training training_data_loader = DataLoader(dataset=train_set, num_workers=0, batch_size=batch_size, shuffle=True, pin_memory=True, drop_last=True) # put training data to DataLoader for batches validate_set = Dataset_Pro('./training_data/valid_wv3_10000.h5') # creat data for validation validate_data_loader = DataLoader(dataset=validate_set, num_workers=0, batch_size=batch_size, shuffle=False, pin_memory=True, drop_last=True) # put training data to DataLoader for batches train(training_data_loader, validate_data_loader) # call train function (call: Line 53) # file_path = "./test_data/TestData_wv3.h5" # fine_tune_test(file_path, training_data_loader) ''' ''' ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/main_pre_train_trainData_wv4.py ================================================ import os import torch import torch.backends.cudnn as cudnn import torch.nn as nn import torch.optim as optim from torch.utils.tensorboard import SummaryWriter from torch.autograd import Variable from torch.utils.data import DataLoader from data_wv4 import Dataset_Pro from model_wv4 import APNN, summaries, loss_with_l2_regularization, weights_init from logger import create_logger, log_string import numpy as np import argparse parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') parser.add_argument('--out_dir', metavar='DIR', default='../results', help='path to save model') parser.add_argument('--log_dir', metavar='DIR', default='logs', help='path to save log') parser.add_argument('--tfb_dir', metavar='DIR', default=None, help='useless in this script.') parser.add_argument('--arch', '-a', metavar='ARCH', default='APNN') args = parser.parse_args() args.experimental_desc = "APNN" args.dataset = "WV4" out_dir, model_save_dir, tfb_dir = create_logger(args, args.experimental_desc) print(model_save_dir) #import shutil #from torch.utils.tensorboard import SummaryWriter ################################################################### # ------------------- Pre-Define Part---------------------- ################################################################### # ================== Pre-Define =================== # SEED = 10 torch.manual_seed(SEED) torch.cuda.manual_seed(SEED) torch.cuda.manual_seed_all(SEED) # cudnn.benchmark = True ###自动寻找最优算法 cudnn.deterministic = True # ============= 2) HYPER PARAMS(Pre-Defined) ==========# """ CHANGES: 1. row 48: in APNN the L1 loss is averaged only on the minibatch size, for the learning rate in case the loss is averaged on minibatches, patches size, and bands is lr=0.0001*17*17*nr_bands a. nr_bands takes into account the number of bands 2. row 46: should be indicated by user here, or taken from data 3. row 46: depends on the 4. the dataset is already normalized, so we do not need anymore and 5. row 49: in APNN epochs=10000 6. row 71: in APNN weight_decay=0 7. rows 182-195: in pretrained_models the best PNN model is saved """ # os.environ["CUDA_VISIBLE_DEVICES"] = "1" sensor = 'WV4' nr_bands = 4 #selected by user or taken from data? lr = 0.0001*17*17*nr_bands epochs = 15000 ckpt = 50 batch_size = 128 model_path = "../results/wv4/best_PNN_model_1706.pth.tar.pth.tar" v_min = 10000 #TODO L2 norm to do where # ============= 3) Load Model + Loss + Optimizer + Learn_rate_update ==========# model = APNN().cuda() model.apply(weights_init) if os.path.isfile(model_path): log_string("loading") checkpoint = torch.load(model_path) model.load_state_dict(checkpoint["model_state"]) ## Load the pretrained Encoder v_min = checkpoint["loss1"] print("best_loss {:.7f}".format(v_min)) log_string('APNN is Successfully Loaded from %s' % (model_path)) # summaries(model, grad=True) ## Summary the Network criterion = nn.L1Loss(reduction='mean').cuda() regularization = loss_with_l2_regularization().cuda() #用model里有的实例id去指定model中的其他参数,而不要遍历model.parameters() target_layerParam = list(map(id, model.conv3.parameters())) base_layerParam = filter(lambda p: id(p) not in target_layerParam, model.parameters()) training_parameters = [{'params': model.conv3.parameters(), 'lr': lr/10}, {'params': base_layerParam}] optimizer = optim.SGD(training_parameters, lr=lr, momentum=0.9) log_string("inspect optimizer setting: {}\n".format(optimizer.state_dict())) log_string("target id: {}".format(target_layerParam)) #模型卷积层宽卷积零填充范围 # (input_size - kernel_size + 1) // 2 = 2* pad = 2 * blk = net_scope net_scope = 0 for name, layer in model.named_parameters(): if 'conv' in name and 'bias' not in name: net_scope += layer.shape[-1]-1 net_scope = np.sum(net_scope) + 1 blk = net_scope//2 #8 save_best_file = '../results/PNN/PNN_model.pth.tar' PNN_model = {'sensor': sensor, 'lr': lr, 'epochs': epochs, 'model_sampling_period': ckpt, 'net_scope': net_scope, 'batch_size': batch_size} writer = SummaryWriter('../train_logs') ## Tensorboard_show: case 2 def save_checkpoint(model, epoch): # save model function model_out_path = 'Weights' + '/' + "{}.pth".format(epoch) torch.save(model.state_dict(), model_out_path) ################################################################### # ------------------- Main Train (Run second)---------------------------------- ################################################################### def train(training_data_loader, validate_data_loader, start_epoch=0, v_min=10000): log_string('Start training...') for epoch in range(start_epoch, epochs, 1): epoch += 1 epoch_train_mae, epoch_train_mse, epoch_val_mae, epoch_val_mse = [], [], [], [] # ============Epoch Train=============== # model.train() for iteration, batch in enumerate(training_data_loader, 1): gt, lms, ms, pan = batch[0].cuda(), batch[1].cuda(), batch[2].cuda(), batch[3].cuda() gt = gt - lms lms = torch.cat([lms, pan], dim=1) optimizer.zero_grad() # fixed sr = model(lms) # call model gt = gt[:, :, blk:-blk, blk:-blk] loss = criterion(sr, gt) # compute loss new_loss = regularization(loss, model, flag=False) epoch_train_mae.append(loss.item()) # save all losses into a vector for one epoch new_loss.backward() # fixed optimizer.step() # fixed with torch.no_grad(): loss = nn.MSELoss()(sr, gt) loss.requires_grad = False epoch_train_mse.append(loss.item()) t_loss1 = np.nanmean(np.array(epoch_train_mae)) # compute the mean value of all losses, as one epoch loss t_loss2 = np.nanmean(np.array(epoch_train_mse)) writer.add_scalar('mae_loss/t_mae', t_loss1, epoch) # write to tensorboard to check writer.add_scalar('mae_loss/t_mse', t_loss2, epoch) log_string('Epoch: {}/{} training L1-loss: {:.7f}, L2-loss: {:.7f}'.format(epochs, epoch, t_loss1, t_loss2)) # print loss for each epoch # if epoch % ckpt == 0: # if each ckpt epochs, then start to save model # save_checkpoint(model, epoch) # ============Epoch Validate=============== # model.eval() with torch.no_grad(): for iteration, batch in enumerate(validate_data_loader, 1): gt, lms, ms, pan = batch[0].cuda(), batch[1].cuda(), batch[2].cuda(), batch[3].cuda() gt = gt - lms lms = torch.cat([lms, pan], dim=1) sr = model(lms) gt = gt[:, :, blk:-blk, blk:-blk] loss1 = criterion(sr, gt) loss2 = nn.MSELoss()(sr, gt) epoch_val_mae.append(loss1.item()) epoch_val_mse.append(loss2.item()) v_loss1 = np.nanmean(np.array(epoch_val_mae)) v_loss2 = np.nanmean(np.array(epoch_val_mse)) writer.add_scalar('val/v_mae', v_loss1, epoch) writer.add_scalar('val/v_mse', v_loss2, epoch) log_string('Epoch: {}/{} validate L1-loss: {:.7f}, L2-loss: {:7f}'.format(epochs, epoch, v_loss1, v_loss2)) # print loss for each epoch ### during save and simple best save ### # vmin = 10000 if (epoch + 1) % ckpt == 0: # print("saving PNN_model_{}.pth.tar".format(epoch)) torch.save(dict(model=model, model_state=model.state_dict(), loss=v_loss1, train_params=PNN_model), '{}/PNN_model_{}.pth.tar'.format(model_save_dir, epoch + 1)) if v_loss1 < v_min: if os.path.isfile(save_best_file): os.remove(save_best_file) # print("saving PNN_model.pth.tar") torch.save(dict(model=model, model_state=model.state_dict(), loss1=v_loss1, train_params=PNN_model), '{}/best_PNN_model_{}.pth.tar'.format(model_save_dir, epoch)) torch.save(dict(model=model, model_state=model.state_dict(), loss1=v_loss1, train_params=PNN_model), '../pretrained_models/'+sensor+'_PNNplus_model.pth.tar') v_min = v_loss1 writer.close() # close tensorboard ################################################################### # ------------------- Main Function (Run first) ------------------- ################################################################### if __name__ == "__main__": train_set = Dataset_Pro('../training_data/train_wv4_10000.h5') # creat data for training training_data_loader = DataLoader(dataset=train_set, num_workers=0, batch_size=batch_size, shuffle=True, pin_memory=True, drop_last=True) # put training data to DataLoader for batches validate_set = Dataset_Pro('../training_data/valid_wv4_10000.h5') # creat data for validation validate_data_loader = DataLoader(dataset=validate_set, num_workers=0, batch_size=batch_size, shuffle=False, pin_memory=True, drop_last=True) # put training data to DataLoader for batches train(training_data_loader, validate_data_loader, 1707, v_min) # call train function (call: Line 53) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/main_test_qb.py ================================================ import os import torch import torch.backends.cudnn as cudnn import torch.nn as nn import torch.optim as optim from torch.autograd import Variable from torch.utils.data import DataLoader from data_qb import Dataset_Pro import h5py from data_single_read import load_set from evaluate import compute_index from model_qb import APNN, summaries, weights_init, loss_with_l2_regularization import numpy as np import scipy.io as sio from time import time from evaluate import analysis_accu ################################################################### # ------------------- Sub-Functions (will be used) ------------------- ################################################################### def load_set(file_path, blk): suffix = file_path.split('.') if suffix[-1] == 'h5': data = h5py.File(file_path) # NxCxHxW = 0x1x2x3 ms1 = data["ms"][...] # NxCxHxW=0,1,2,3 shape_size = len(ms1.shape) elif suffix[-1] == 'mat': ## ===== case2: HxWxC data = sio.loadmat(file_path) # print(data.keys()) ms1 = data["I_MS_LR"][...] # NxCxHxW=0,1,2,3 shape_size = len(ms1.shape) else: raise NotImplemented("file foramt is not supported") if suffix[-2][-2:] == 'FR': data['I_GT'] = data['I_MS_LR'] #exception if shape_size == 4: # NxCxHxW # tensor type: lms1 = data['lms'][...] # NxCxHxW = 4x8x512x512 lms1 = np.array(lms1, dtype=np.float32) / 2047.0 lms = torch.from_numpy(lms1) pan1 = data['pan'][...] # NxCxHxW = 4x8x512x512 pan1 = np.array(pan1, dtype=np.float32) / 2047.0 pan = torch.from_numpy(pan1) test_I_in1 = np.concatenate([lms1, pan1], axis=1) # NxCxHxW = Nx9xHxW test_I_in1 = np.pad(test_I_in1, ((0, 0), (0, 0), (blk, blk), (blk, blk)), mode='edge') # pading test_I_in = torch.from_numpy(test_I_in1) # NxCxHxW = Nx9xHxW ms1 = data['ms'][...] # NxCxHxW = 4x8x512x512 ms1 = np.array(ms1, dtype=np.float32) / 2047.0 ms = torch.from_numpy(ms1) gt1 = data['gt'][...] # NxCxHxW = 4x8x512x512 gt1 = np.array(gt1, dtype=np.float32) / 2047.0 gt = torch.from_numpy(gt1) return test_I_in, ms, pan, gt if shape_size == 3: # HxWxC # tensor type: lms1 = data['I_MS'][...] # HxWxC=0,1,2 lms1 = np.expand_dims(lms1, axis=0) # 1xHxWxC lms1 = np.array(lms1, dtype=np.float32) / 2047.0 # 1xHxWxC lms = torch.from_numpy(lms1).permute(0, 3, 1, 2) # NxCxHxW or HxWxC pan1 = data['I_PAN'][...] # HxW pan1 = np.expand_dims(pan1, axis=0) # 1xHxW pan1 = np.expand_dims(pan1, axis=3) # 1xHxWx1 pan1 = np.array(pan1, dtype=np.float32) / 2047. # 1xHxWx1 pan = torch.from_numpy(pan1).permute(0, 3, 1, 2) # Nx1xHxW: test_I_in1 = np.concatenate([lms1, pan1], axis=3) # 1xHxWx(C+1) = Nx9xHxW test_I_in1 = np.transpose(test_I_in1, (0, 3, 1, 2)) # 1x(C+1)xHxW test_I_in1 = np.pad(test_I_in1, ((0, 0), (0, 0), (blk, blk), (blk, blk)), mode='edge') # NCHW test_I_in = torch.from_numpy(test_I_in1) # NxCxHxW = Nx9xHxW ms1 = data['I_MS_LR'][...] # HxWxC=0,1,2 ms1 = np.expand_dims(ms1, axis=0) # 1xHxWxC ms1 = np.array(ms1, dtype=np.float32) / 2047.0 # 1xHxWxC ms = torch.from_numpy(ms1).permute(0, 3, 1, 2) # NxCxHxW or HxWxC gt1 = data['I_GT'][...] # HxWxC=0,1,2 gt1 = np.expand_dims(gt1, axis=0) # 1xHxWxC gt1 = np.array(gt1, dtype=np.float32) / 2047.0 # 1xHxWxC gt = torch.from_numpy(gt1).permute(0, 3, 1, 2) # NxCxHxW or HxWxC return test_I_in, ms, pan, gt ################################################################### # ------------------- Main Test (Run second) ------------------- ################################################################### ## 1) initial test by model ##\ blk = 8 # 4 class Tester(): def __init__(self, file_path, mode): test_I_in, test_ms, test_pan, test_gt = load_set(file_path, blk) self.test_I_in = test_I_in self.test_ms = test_ms self.test_pan = test_pan self.test_gt = test_gt self.mode = mode self.file_path = file_path "the fine tuning phase requires downgraded input resolution" if mode == 'ft': from wald_utilities import wald_protocol ms_lr, pan_lr = wald_protocol(test_ms, test_pan, 4., 'QB', channels=4) self.test_I_in = torch.cat((ms_lr, pan_lr), dim=1) self.test_I_in = torch.nn.functional.pad(self.test_I_in, (8, 8, 8, 8), mode='reflect') # NCHW self.test_gt = self.test_ms def __call__(self, model): x = self.test_I_in # send to cuda, important! x = x.cuda().float() # convert to tensor type: out2 = model(x) if self.mode == 'test' or self.mode == 'RR': sr = out2 + self.test_I_in[:, :-1, blk:-blk, blk:-blk].cuda() # NxCxHxW sr = sr.permute(0, 2, 3, 1) gt = self.test_gt.permute(0, 2, 3, 1).cuda() our_CC, our_PSNR, our_SSIM, our_SAM, our_ERGAS = analysis_accu(gt[0, ...], sr[0, ...], 4) print(f'[{self.file_path}]: our_CC: {our_CC}, our_PSNR: {our_PSNR}, ' f'our_SSIM: {our_SSIM},\n' f'our_SAM: {our_SAM} our_ERGAS: {our_ERGAS}') return out2 ## 2) target-adative's fine_tune_training, i.e., PNNplus## def test(file_path, sensor_model): suffix = file_path.split('.')[-2][-2:] if suffix == 'RR' or suffix == 'FR': simulated = suffix else: simulated = 'test' tester = Tester(file_path, mode='ft') # call initial model evaluator = Tester(file_path, mode=simulated) criterion = nn.L1Loss(reduction='mean').cuda() regularization = loss_with_l2_regularization().cuda() " LOAD PRETRAINED MODEL" init_loss = 0 model_path = "../pretrained_models/1QB_PNNplus_model.pth.tar" if os.path.isfile(model_path): print("loading model") checkpoint = torch.load(model_path) # checkpoint = torch.load('./pretrained_models/' + sensor_model) print(checkpoint.keys()) net = checkpoint['model'] print(net.conv1.weight.data[0, 0, 0, 0]) net.load_state_dict(checkpoint['model_state']) ''' ft: lr setting 1-SF 2e-4 epoch 150 1-ik 2e-4 7-h5 1e-5 ''' lr_ = 2e-4#0.0001 * 17 * 17 * 8 # 1e-3#2e-4#0.0001 * 17 * 17 * 8 FT_epochs = 50 # number of fine tuning epochs # init_loss = checkpoint["loss1"] else: nr_bands = 4 # selected by user or taken from data? lr_ = 0.0001 * 17 * 17 * nr_bands FT_epochs = 5000 net = APNN().cuda() net.apply(weights_init) print(net.conv1.weight.data[0, 0, 0, 0]) print(net.conv1.weight.data[0, 0, 0, 0]) print(net) test_gt = tester.test_gt - tester.test_I_in[:, :-1, blk:-blk, blk:-blk] pretrain_inIt_loss = criterion(net(tester.test_I_in.cuda()), test_gt.cuda()).item() print("init loss: {:.20f} pretrain_inIt loss: {:.20f}".format(init_loss, pretrain_inIt_loss)) eval_test(net, evaluator, mode="eval", mode2="pre") print('-'*100) "scaling learning rate on last layer" target_layerParam = list(map(id, net.conv3.parameters())) base_layerParam = filter(lambda p: id(p) not in target_layerParam, net.parameters()) training_parameters = [{'params': net.conv3.parameters(), 'lr': lr_ / 10}, {'params': base_layerParam}] optimizer = optim.SGD(training_parameters, lr=lr_, momentum=0.9) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") v_min = 10000 ft_loss = np.zeros(FT_epochs) Train_time = time() ## 2.1) "FINE TUNING"--training for epoch in range(FT_epochs): # loop over the testing image multiple times net.train() # running_loss = 0.0 # loading testing image test_I_in = tester.test_I_in test_gt = tester.test_gt # residual test_gt = test_gt - test_I_in[:, :-1, blk:-blk, blk:-blk] # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize x1 = test_I_in # send to cuda, important! x2 = test_gt x1 = x1.cuda().float() # convert to tensor type: x2 = x2.cuda().float() # convert to tensor type: outputs = net(x1) loss = criterion(outputs, x2) # compute loss new_loss = regularization(loss, net, flag=False) new_loss.backward() optimizer.step() running_loss = loss.item() ft_loss[epoch] = running_loss if running_loss < v_min: PATH = '../ft_network/QB' if not os.path.exists(PATH): os.makedirs(PATH) torch.save(dict(model=net, model_state=net.state_dict(), loss=ft_loss), PATH + '/net.pth.tar') # if np.abs(running_loss - v_min) > 1e-3: net.eval() eval_test(net, evaluator, mode="eval", mode2="ft") v_min = running_loss print('[%d] loss: %.7f' % (epoch + 1, running_loss)) Train_time = time() - Train_time ## 2.2) "FINE TUNING"--testing " LOAD BEST MODEL" checkpoint = torch.load('../ft_network/QB/net.pth.tar') net = checkpoint['model'] net.load_state_dict(checkpoint['model_state']) " PANSHARPENING " net.to(device) net.eval() print(net.conv3.weight.data[0, 0, 0, 0], net.conv2.weight.data[0, 0, 0, 0], net.conv1.weight.data[0, 0, 0, 0]) eval_test(net, evaluator, mode="eval", mode2="ft") def eval_test(net, evaluator, mode="pre", mode2="pre"): with torch.no_grad(): Test_time = time() sr = evaluator(net) # NxCxHxW Test_time = time() - Test_time # skip connection to add low resolution ms and residual(np version) sr = sr.cpu().detach().numpy() + evaluator.test_I_in[:, :-1, blk:-blk, blk:-blk].cpu().detach().numpy() # NxCxHxW # convert to numpy type with permute and squeeze: HxWxC (go to cpu for easy saving) sr = torch.from_numpy(sr) # convert to tensor version sr = sr.permute(0, 2, 3, 1).cpu().detach().numpy() # to: NxHxWxC sr = np.clip(sr, 0, 1) # print('------> [PNN+]: Fine-tuning (%d it) time = %0.4f // Prediction time = %0.4f' % ( # FT_epochs, Train_time, Test_time)) num_exm = sr.shape[0] if num_exm == 1: if evaluator.mode == "RR": file_name = "apnn_qb_rs" + '_ik_' + mode2 + ".mat" if evaluator.mode == "FR": file_name = "apnn_qb_os" + '_ik_' + mode2 + ".mat" # file_name2 = "E:/01-DL-Pansharpening-Toolbox/03-Comparisons(Matlab)/2_DL_Result/WV4/APNN" file_name2 = "../results" save_name = os.path.join(file_name2, file_name) sio.savemat(save_name, {'apnn_qb': sr[0, :, :, :]}) else: for index in range(num_exm): # save the DL results to the 03-Comparisons(Matlab) file_name = "apnn_qb_rs" + str(index) + mode2 + ".mat" # file_name2 = "E:/01-DL-Pansharpening-Toolbox/03-Comparisons(Matlab)/2_DL_Result/WV4/APNN" file_name2 = "../results" save_name = os.path.join(file_name2, file_name) sio.savemat(save_name, {'apnn_qb_rs': sr[index, :, :, :]}) ################################################################### # ------------------- Main Function (Run first) ------------------- ################################################################### if __name__ == '__main__': # file_path = "E:/01-DL-Pansharpening-Toolbox/01-Data-Simulation(Matlab)/Dataset_Gemine/01-DataSimu/QB/TestData_qb.h5" # file_path = "../test_data/imgs/San_Francisco_QB_RR.mat" # file_path = "../test_data/TestData_qb.h5" file_path = "../test_data/imgs/Toulouse_IKONOS_RR.mat" "SELECT SENSOR AND TESTING IMAGE" sensor_model = 'QB' available_models = ['IKONOS', 'GeoEye1', 'WV2', 'WV3', 'WV4', 'QB'] if sensor_model in available_models: sensor_model = sensor_model + '_PNNplus_model.pth.tar' test(file_path, sensor_model) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/main_test_wv2.py ================================================ import os import torch import torch.backends.cudnn as cudnn import torch.nn as nn import torch.optim as optim from torch.autograd import Variable from torch.utils.data import DataLoader from data_wv2 import Dataset_Pro import h5py from data_single_read import load_set from evaluate import compute_index from model_wv2 import APNN, summaries, weights_init, loss_with_l2_regularization import numpy as np import scipy.io as sio from time import time from evaluate import analysis_accu ################################################################### # ------------------- Sub-Functions (will be used) ------------------- ################################################################### def load_set(file_path, blk): suffix = file_path.split('.') if suffix[-1] == 'h5': data = h5py.File(file_path) # NxCxHxW = 0x1x2x3 ms1 = data["ms"][...] # NxCxHxW=0,1,2,3 shape_size = len(ms1.shape) elif suffix[-1] == 'mat': # ===== case2: HxWxC data = sio.loadmat(file_path) # print(data.keys()) ms1 = data["I_MS_LR"][...] # NxCxHxW=0,1,2,3 shape_size = len(ms1.shape) else: print("file format is not supporetd") raise NotImplemented if suffix[-2][-2:] == 'FR': data['I_GT'] = data['I_MS_LR'] #exception if shape_size == 4: # NxCxHxW # tensor type: lms1 = data['lms'][...] # NxCxHxW = 4x8x512x512 lms1 = np.array(lms1, dtype=np.float32) / 2047.0 lms = torch.from_numpy(lms1) pan1 = data['pan'][...] # NxCxHxW = 4x8x512x512 pan1 = np.array(pan1, dtype=np.float32) / 2047.0 pan = torch.from_numpy(pan1) test_I_in1 = np.concatenate([lms1, pan1], axis=1) # NxCxHxW = Nx9xHxW test_I_in1 = np.pad(test_I_in1, ((0, 0), (0, 0), (blk, blk), (blk, blk)), mode='edge') # pading test_I_in = torch.from_numpy(test_I_in1) # NxCxHxW = Nx9xHxW ms1 = data['ms'][...] # NxCxHxW = 4x8x512x512 ms1 = np.array(ms1, dtype=np.float32) / 2047.0 ms = torch.from_numpy(ms1) gt1 = data['gt'][...] # NxCxHxW = 4x8x512x512 gt1 = np.array(gt1, dtype=np.float32) / 2047.0 gt = torch.from_numpy(gt1) return test_I_in, ms, pan, gt if shape_size == 3: # HxWxC # tensor type: lms1 = data['I_MS'][...] # HxWxC=0,1,2 lms1 = np.expand_dims(lms1, axis=0) # 1xHxWxC lms1 = np.array(lms1, dtype=np.float32) / 2047.0 # 1xHxWxC lms = torch.from_numpy(lms1).permute(0, 3, 1, 2) # NxCxHxW or HxWxC pan1 = data['I_PAN'][...] # HxW pan1 = np.expand_dims(pan1, axis=0) # 1xHxW pan1 = np.expand_dims(pan1, axis=3) # 1xHxWx1 pan1 = np.array(pan1, dtype=np.float32) / 2047. # 1xHxWx1 pan = torch.from_numpy(pan1).permute(0, 3, 1, 2) # Nx1xHxW: test_I_in1 = np.concatenate([lms1, pan1], axis=3) # 1xHxWx(C+1) = Nx9xHxW test_I_in1 = np.transpose(test_I_in1, (0, 3, 1, 2)) # 1x(C+1)xHxW test_I_in1 = np.pad(test_I_in1, ((0, 0), (0, 0), (blk, blk), (blk, blk)), mode='edge') # NCHW test_I_in = torch.from_numpy(test_I_in1) # NxCxHxW = Nx9xHxW ms1 = data['I_MS_LR'][...] # HxWxC=0,1,2 ms1 = np.expand_dims(ms1, axis=0) # 1xHxWxC ms1 = np.array(ms1, dtype=np.float32) / 2047.0 # 1xHxWxC ms = torch.from_numpy(ms1).permute(0, 3, 1, 2) # NxCxHxW or HxWxC gt1 = data['I_GT'][...] # HxWxC=0,1,2 gt1 = np.expand_dims(gt1, axis=0) # 1xHxWxC gt1 = np.array(gt1, dtype=np.float32) / 2047.0 # 1xHxWxC gt = torch.from_numpy(gt1).permute(0, 3, 1, 2) # NxCxHxW or HxWxC return test_I_in, ms, pan, gt ################################################################### # ------------------- Main Test (Run second) ------------------- ################################################################### ## 1) initial test by model ## blk = 8 class Tester(): def __init__(self, file_path, mode): test_I_in, test_ms, test_pan, test_gt = load_set(file_path, blk) self.test_I_in = test_I_in self.test_ms = test_ms self.test_pan = test_pan self.test_gt = test_gt self.mode = mode self.file_path = file_path "the fine tuning phase requires downgraded input resolution" if mode == 'ft': from wald_utilities import wald_protocol ms_lr, pan_lr = wald_protocol(test_ms, test_pan, 4., 'WV2', channels=8) self.test_I_in = torch.cat((ms_lr, pan_lr), dim=1) self.test_I_in = torch.nn.functional.pad(self.test_I_in, (8, 8, 8, 8), mode='reflect') # NCHW self.test_gt = self.test_ms def __call__(self, model): x = self.test_I_in # send to cuda, important! x = x.cuda().float() # convert to tensor type: out2 = model(x) if self.mode == 'test' or self.mode == 'RR': sr = out2 + self.test_I_in[:, :-1, blk:-blk, blk:-blk].cuda() # NxCxHxW sr = sr.permute(0, 2, 3, 1) gt = self.test_gt.permute(0, 2, 3, 1).cuda() our_CC, our_PSNR, our_SSIM, our_SAM, our_ERGAS = analysis_accu(gt[0, ...], sr[0, ...], 4) print(f'[{self.file_path}]: our_CC: {our_CC}, our_PSNR: {our_PSNR}, ' f'our_SSIM: {our_SSIM},\n' f'our_SAM: {our_SAM} our_ERGAS: {our_ERGAS}') return out2 ## 2) target-adative's fine_tune_training, i.e., PNNplus## def test(file_path, sensor_model): suffix = file_path.split('.')[-2][-2:] if suffix== 'RR' or suffix == 'FR': simulated = suffix else: simulated = 'test' tester = Tester(file_path, mode='ft') # call initial model evaluator = Tester(file_path, mode=simulated) criterion = nn.L1Loss(reduction='mean').cuda() regularization = loss_with_l2_regularization().cuda() " LOAD PRETRAINED MODEL" init_loss = 0 model_path = "../pretrained_models/1WV2_PNNplus_model.pth.tar" if os.path.isfile(model_path): print("loading model") checkpoint = torch.load(model_path) # checkpoint = torch.load('./pretrained_models/' + sensor_model) print(checkpoint.keys()) net = checkpoint['model'] print(net.conv1.weight.data[0, 0, 0, 0]) net.load_state_dict(checkpoint['model_state']) lr_ = 1e-6 FT_epochs = 200 # number of fine tuning epochs # init_loss = checkpoint["loss1"] else: nr_bands = 8 # selected by user or taken from data? lr_ = 0.0001 * 17 * 17 * nr_bands FT_epochs = 5000 net = APNN().cuda() net.apply(weights_init) print(net.conv1.weight.data[0, 0, 0, 0]) print(net.conv1.weight.data[0, 0, 0, 0]) print(net) test_gt = tester.test_gt - tester.test_I_in[:, :-1, blk:-blk, blk:-blk] pretrain_inIt_loss = criterion(net(tester.test_I_in.cuda()), test_gt.cuda()).item() print("init loss: {:.20f} pretrain_inIt loss: {:.20f}".format(init_loss, pretrain_inIt_loss)) eval_test(net, evaluator, mode="eval", mode2="pre") print('-' * 100) "scaling learning rate on last layer" target_layerParam = list(map(id, net.conv3.parameters())) base_layerParam = filter(lambda p: id(p) not in target_layerParam, net.parameters()) training_parameters = [{'params': net.conv3.parameters(), 'lr': lr_ / 10}, {'params': base_layerParam}] optimizer = optim.SGD(training_parameters, lr=lr_, momentum=0.9) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") v_min = 10000 ft_loss = np.zeros(FT_epochs) Train_time = time() print(tester.test_gt.shape) ## 2.1) "FINE TUNING"--training for epoch in range(FT_epochs): # loop over the testing image multiple times net.train() # running_loss = 0.0 # loading testing image test_I_in = tester.test_I_in test_gt = tester.test_gt # residual test_gt = test_gt - test_I_in[:, :-1, blk:-blk, blk:-blk] # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize x1 = test_I_in # send to cuda, important! x2 = test_gt x1 = x1.cuda().float() # convert to tensor type: x2 = x2.cuda().float() # convert to tensor type: outputs = net(x1) loss = criterion(outputs, x2) # compute loss new_loss = regularization(loss, net, flag=False) new_loss.backward() optimizer.step() running_loss = loss.item() ft_loss[epoch] = running_loss if running_loss < v_min: PATH = '../ft_network/WV2' if not os.path.exists(PATH): os.makedirs(PATH) torch.save(dict(model=net, model_state=net.state_dict(), loss=ft_loss), PATH + '/net.pth.tar') if np.abs(running_loss - v_min) > 1e-3: net.eval() eval_test(net, evaluator, mode="eval", mode2="ft") v_min = running_loss print('[%d] loss: %.20f' % (epoch + 1, running_loss)) Train_time = time() - Train_time ## 2.2) "FINE TUNING"--testing " LOAD BEST MODEL" checkpoint = torch.load('../ft_network/WV2/net.pth.tar') net = checkpoint['model'] net.load_state_dict(checkpoint['model_state']) " PANSHARPENING " net.to(device) net.eval() print(net.conv3.weight.data[0, 0, 0, 0], net.conv2.weight.data[0, 0, 0, 0], net.conv1.weight.data[0, 0, 0, 0]) eval_test(net, evaluator, mode="eval", mode2="ft") def eval_test(net, evaluator, mode="pre", mode2="pre"): with torch.no_grad(): Test_time = time() sr = evaluator(net) # NxCxHxW Test_time = time() - Test_time # skip connection to add low resolution ms and residual(np version) sr = sr.cpu().detach().numpy() + evaluator.test_I_in[:, :-1, blk:-blk, blk:-blk].cpu().detach().numpy() # NxCxHxW # convert to numpy type with permute and squeeze: HxWxC (go to cpu for easy saving) sr = torch.from_numpy(sr) # convert to tensor version sr = sr.permute(0, 2, 3, 1).cpu().detach().numpy() # to: NxHxWxC sr = np.clip(sr, 0, 1) # print('------> [PNN+]: Fine-tuning (%d it) time = %0.4f // Prediction time = %0.4f' % ( # FT_epochs, Train_time, Test_time)) num_exm = sr.shape[0] if num_exm == 1: if evaluator.mode == "RR": file_name = "apnn_wv2_rs" + '_rio_' + mode2 + ".mat" if evaluator.mode == "FR": file_name = "apnn_wv2_os" + '_rio_' + mode2 + ".mat" # file_name2 = "E:/01-DL-Pansharpening-Toolbox/03-Comparisons(Matlab)/2_DL_Result/WV4/APNN" file_name2 = "../results" save_name = os.path.join(file_name2, file_name) sio.savemat(save_name, {'apnn_wv2': sr[0, :, :, :]}) else: for index in range(num_exm): # save the DL results to the 03-Comparisons(Matlab) file_name = "apnn_wv2_rs" + str(index) + mode2 + ".mat" # file_name2 = "E:/01-DL-Pansharpening-Toolbox/03-Comparisons(Matlab)/2_DL_Result/WV4/APNN" file_name2 = "../results" save_name = os.path.join(file_name2, file_name) sio.savemat(save_name, {'apnn_wv2_rs': sr[index, :, :, :]}) ################################################################### # ------------------- Main Function (Run first) ------------------- ################################################################### if __name__ == '__main__': file_path = "../test_data/imgs/Rio_WV2_FR.mat" # file_path = "../test_data/TestData_wv2.h5" "SELECT SENSOR AND TESTING IMAGE" sensor_model = 'WV2' available_models = ['IKONOS', 'GeoEye1', 'WV2', 'WV3', 'WV4', 'QB'] if sensor_model in available_models: sensor_model = sensor_model + '_PNNplus_model.pth.tar' test(file_path, sensor_model) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/main_test_wv3.py ================================================ import os import torch import torch.backends.cudnn as cudnn import torch.nn as nn import torch.optim as optim from torch.autograd import Variable from torch.utils.data import DataLoader from data_wv3 import Dataset_Pro import h5py from data_single_read import load_set from evaluate import compute_index from model_wv3 import APNN, summaries, weights_init, loss_with_l2_regularization import numpy as np import scipy.io as sio from time import time from evaluate import analysis_accu import math class L1_Loss_clip(nn.Module): def __init__(self, multiple=2048.0, clip_flag=True, inter=False): super(L1_Loss_clip, self).__init__() self.clip_flag = clip_flag self.inter = inter self.multiple = multiple self.criterion = nn.L1Loss(reduction='mean') def forward(self, x, target, lms): if self.clip_flag: # x = torch.clamp(x * self.multiple, 0, 2048) / self.multiple x = x - lms if self.inter: loss = torch.mean((x - target) ** 2)#self.criterion(x, target)#torch.mean(torch.abs(x - target)) return loss else: l1_loss = self.criterion(x, target) return l1_loss ################################################################### # ------------------- Sub-Functions (will be used) ------------------- ################################################################### def load_set(file_path, blk): suffix = file_path.split('.') if suffix[-1] == 'h5': ## ===== case1: NxCxHxW data = h5py.File(file_path) ms1 = data["ms"][...] # NxCxHxW=0,1,2,3 shape_size = len(ms1.shape) elif suffix[-1] == 'mat': # ===== case2: HxWxC data = sio.loadmat(file_path) # ms1 = data["I_MS_LR"][...] # NxCxHxW=0,1,2,3 shape_size = len(ms1.shape) else: raise NotImplemented("file format is not suppoted") """this is en exception to be addressed: RR data has four fields, FR has no GT For the finetuing the GT is necessary both in RR and FR testing case. the I_MS_LR has taken as GT in both cases """ if suffix[-2][-2:] == 'FR': data['I_GT'] = data['I_MS_LR'] #exception if shape_size == 4: # NxCxHxW # tensor type: lms1 = data['lms'][...] # NxCxHxW = 4x8x512x512 lms1 = np.array(lms1, dtype=np.float32) / 2047.0 lms = torch.from_numpy(lms1) pan1 = data['pan'][...] # NxCxHxW = 4x8x512x512 pan1 = np.array(pan1, dtype=np.float32) / 2047.0 pan = torch.from_numpy(pan1) test_I_in1 = np.concatenate([lms1, pan1], axis=1) # NxCxHxW = Nx9xHxW test_I_in1 = np.pad(test_I_in1, ((0, 0), (0, 0), (blk, blk), (blk, blk)), mode='edge') # pading test_I_in = torch.from_numpy(test_I_in1) # NxCxHxW = Nx9xHxW ms1 = data['ms'][...] # NxCxHxW = 4x8x512x512 ms1 = np.array(ms1, dtype=np.float32) / 2047.0 ms = torch.from_numpy(ms1) gt1 = data['gt'][...] # NxCxHxW = 4x8x512x512 gt1 = np.array(gt1, dtype=np.float32) / 2047.0 gt = torch.from_numpy(gt1) return test_I_in, ms, pan, gt if shape_size == 3: # HxWxC # tensor type: lms1 = data['I_MS'][...] # HxWxC=0,1,2 lms1 = np.expand_dims(lms1, axis=0) # 1xHxWxC lms1 = np.array(lms1, dtype=np.float32) / 2047.0 # 1xHxWxC lms = torch.from_numpy(lms1).permute(0, 3, 1, 2) # NxCxHxW or HxWxC pan1 = data['I_PAN'][...] # HxW pan1 = np.expand_dims(pan1, axis=0) # 1xHxW pan1 = np.expand_dims(pan1, axis=3) # 1xHxWx1 pan1 = np.array(pan1, dtype=np.float32) / 2047. # 1xHxWx1 pan = torch.from_numpy(pan1).permute(0, 3, 1, 2) # Nx1xHxW: test_I_in1 = np.concatenate([lms1, pan1], axis=3) # 1xHxWx(C+1) = Nx9xHxW test_I_in1 = np.transpose(test_I_in1, (0, 3, 1, 2)) # 1x(C+1)xHxW test_I_in1 = np.pad(test_I_in1, ((0, 0), (0, 0), (blk, blk), (blk, blk)), mode='edge') # NCHW test_I_in = torch.from_numpy(test_I_in1) # NxCxHxW = Nx9xHxW #预先wald仿真的 ms1 = data['I_MS_LR'][...] # HxWxC=0,1,2 ms1 = np.expand_dims(ms1, axis=0) # 1xHxWxC ms1 = np.array(ms1, dtype=np.float32) / 2047.0 # 1xHxWxC ms_lr = torch.from_numpy(ms1).permute(0, 3, 1, 2) # NxCxHxW or HxWxC 1,8,128,128 gt1 = data['I_GT'][...] # HxWxC=0,1,2 gt1 = np.expand_dims(gt1, axis=0) # 1xHxWxC gt1 = np.array(gt1, dtype=np.float32) / 2047.0 # 1xHxWxC gt = torch.from_numpy(gt1).permute(0, 3, 1, 2) # NxCxHxW or HxWxC return test_I_in, ms_lr, pan, gt ################################################################### # ------------------- Main Test (Run second) ------------------- ################################################################### ## 1) initial test by model ## blk = 8 class Tester(): def __init__(self, file_path, mode): "if mode==ft the input data need to be downgraded as in row 134" test_I_in, test_ms_lr, test_pan, test_gt = load_set(file_path, blk) self.test_I_in = test_I_in #ms 1,9,528,528 # self.test_lms = test_lms #lms - wald 1,8,512,512 self.test_pan = test_pan #pan 1,1,512,512 self.test_gt = test_gt #ms^ 1,8,512,512 self.test_ms = test_ms_lr #1,8,128,128 self.mode = mode self.file_path = file_path self.loss_clip = L1_Loss_clip(inter=True, clip_flag=True).cuda() "the fine tuning phase requires downgraded input resolution" if mode == 'ft': from wald_utilities import wald_protocol ms_lr, pan_lr = wald_protocol(test_ms_lr, test_pan, 4., 'WV3') self.test_I_in = torch.cat((test_ms_lr, pan_lr), dim=1) self.test_I_in = torch.nn.functional.pad(self.test_I_in, (8, 8, 8, 8), mode='reflect') # NCHW self.test_gt = self.test_ms #假如数据在matlab里仿真了,则有ms_lr: 512, pan 512, ms: 512, pan 2048,那应该不再需要再处理一次 # lms是128 but pan是512, pan没有仿真数据,即pan_lr 128, 又没有gt是128 #所以pan应该是仿真过的,即原始是2048,现在是512, 那ms还要wald处理一下吗 # from wald_utilities import wald_protocol_v2 # # pan_lr = wald_protocol_v2(None, test_pan, 4., 'WV3') # self.test_I_in = torch.cat((test_lms, pan_lr), dim=1) # self.test_I_in = torch.nn.functional.pad(self.test_I_in, (8, 8, 8, 8), mode='reflect') # NCHW # self.test_ms = test_ms.unsqueeze(dim=0).float() # convert to tensor type: 1xCxHxW (unsqueeze(dim=0)) # self.test_pan = test_pan.unsqueeze(dim=0).float() # convert to tensor type: 1x1xHxW # self.test_I_in = test_I_in.unsqueeze(dim=0).float() # 1xCxHxW # self.test_gt = (test_gt * 2047.0).cuda() def __call__(self, model, err=None): x = self.test_I_in # send to cuda, important! x = x.cuda().float() # convert to tensor type: out2 = model(x)#1.4942e-03 # result_our = out2 + x[:, :-1, blk:-blk, blk:-blk] # out2 = model(self.test_I_in) # result_our = out2 + self.test_I_in[:, :-1, blk:-blk, blk:-blk] # result_our = torch.squeeze(result_our).permute(1, 2, 0) # result_our = result_our* 2047 # our_SAM, our_ERGAS = compute_index(self.test_gt, result_our, 4) # print('our_SAM: {} our_ERGAS: {}'.format(our_SAM, our_ERGAS)) # print loss for each epoch if self.mode == 'RR' or self.mode == 'test': test_in = self.test_I_in[:, :-1, blk:-blk, blk:-blk].cuda() sr = out2 + test_in#self.test_I_in[:, :-1, blk:-blk, blk:-blk].cuda() # NxCxHxW sr = sr.permute(0, 2, 3, 1) gt = self.test_gt.permute(0, 2, 3, 1).cuda() # sr = out2.permute(0, 2, 3, 1) # gt = self.test_gt.cuda() - test_in # NxCxHxW # gt = gt.permute(0, 2, 3, 1) our_CC, our_PSNR, our_SSIM, our_SAM, our_ERGAS, our_Q8 = analysis_accu(gt[0, ...], sr[0, ...], 4) print(f'[{self.file_path}]: our_CC: {our_CC}, our_PSNR: {our_PSNR}, ' f'our_SSIM: {our_SSIM},\n' f'our_SAM: {our_SAM} our_ERGAS: {our_ERGAS} our_Q8: {our_Q8}') return out2 ## 2) target-adative's fine_tune_training, i.e., PNNplus## def test(file_path, sensor_model): suffix = file_path.split('.')[-2][-2:] if suffix== 'RR' or suffix == 'FR': simulated = suffix else: simulated = 'test' tester = Tester(file_path, mode='ft') # call initial model evaluator = Tester(file_path, mode=simulated) criterion = nn.MSELoss(reduction='mean').cuda()#L1_Loss_clip(clip_flag=True, inter=True).cuda() regularization = loss_with_l2_regularization().cuda() " LOAD PRETRAINED MODEL" init_loss = 0 model_path = "./pretrained_models/1WV3_PNNplus_model.tar" if os.path.isfile(model_path): print("loading model") checkpoint = torch.load(model_path) # checkpoint = torch.load('./pretrained_models/' + sensor_model) print(checkpoint.keys()) net = checkpoint['model'] print(net.conv1.weight.data[0, 0, 0, 0]) net.load_state_dict(checkpoint['model_state']) ''' 4-.h5 1e-4 1-.mat 1e-4 ''' lr_ = 1e-4#0.0001 * 17 * 17 * 8#1e-4#0.0001 * 17 * 17 * 8#1e-3#2e-4#0.0001 * 17 * 17 * 8 FT_epochs = 50 # number of fine tuning epochs # init_loss = checkpoint["loss1"] else: nr_bands = 8 # selected by user or taken from data? lr_ = 0.0001 * 17 * 17 * nr_bands FT_epochs = 5000 net = APNN().cuda() net.apply(weights_init) print(net.conv1.weight.data[0, 0, 0, 0]) print(net.conv1.weight.data[0, 0, 0, 0]) print(net) test_gt = tester.test_gt - tester.test_I_in[:, :-1, blk:-blk, blk:-blk] pretrain_inIt_loss = criterion(net(tester.test_I_in.cuda()), test_gt.cuda())#, tester.test_I_in[:, :-1, blk:-blk, blk:-blk].cuda()) print("init loss: {:.20f} pretrain_inIt loss: {:.20f}".format(init_loss, pretrain_inIt_loss.item())) eval_test(net, evaluator, mode="eval", mode2="pre", err=pretrain_inIt_loss) # print("-" * 30) "scaling learning rate on last layer" # print(dict(net.conv3.named_parameters()).keys()) target_layerParam = list(map(id, net.conv3.parameters())) base_layerParam = filter(lambda p: id(p) not in target_layerParam, net.parameters()) training_parameters = [{'params': net.conv3.parameters(), 'lr': lr_/10 }, {'params': base_layerParam}] optimizer = optim.SGD(training_parameters, lr=lr_, momentum=0.9, weight_decay=0) try: optimizer.load_state_dict(checkpoint["optim_state"]) except: print("default optim_state") print(net.conv3.weight.requires_grad) print("inspect optimizer setting:\n", optimizer.state_dict()) print("target id:", target_layerParam) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") v_min = 10000 ft_loss = np.zeros(FT_epochs) Train_time = time() print(tester.test_gt.shape) ## 2.1) "FINE TUNING"--training for epoch in range(FT_epochs): # loop over the testing image multiple times net.train() # running_loss = 0.0 # loading testing image test_I_in = tester.test_I_in[:, :-1, blk:-blk, blk:-blk] test_I_in = test_I_in.cuda() test_gt = tester.test_gt.cuda() # residual # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize x1 = tester.test_I_in # send to cuda, important! x2 = test_gt - test_I_in x1 = x1.cuda().float() # convert to tensor type: x2 = x2.cuda().float() # convert to tensor type: outputs = net(x1) test_I_in.requires_grad = False loss = criterion(outputs, x2) # compute loss # loss = criterion(outputs+test_I_in, x2, test_I_in) # compute loss # loss = criterion(outputs + test_I_in, test_gt, 0) # compute loss new_loss = regularization(loss, net, flag=False) new_loss.backward() # try: # scheduler.step(epoch) # except: # print() optimizer.step() running_loss = loss.item() ft_loss[epoch] = running_loss if running_loss < v_min: PATH = './ft_network/' if not os.path.exists(PATH): os.makedirs(PATH) torch.save(dict(model=net, model_state=net.state_dict(), loss=ft_loss), PATH + '/wv_3_net.pth.tar') # if np.abs(running_loss - v_min) > 1e-3: net.eval() eval_test(net, evaluator, mode="eval", mode2="ft", err=loss) v_min = running_loss print('[%d] loss: %.20f' % (epoch + 1, running_loss)) print('-') Train_time = time() - Train_time ## 2.2) "FINE TUNING"--testing " LOAD BEST MODEL" checkpoint = torch.load('./ft_network/wv_3_net.pth.tar') net = checkpoint['model'] net.load_state_dict(checkpoint['model_state']) print("-" * 100) print("pretrain_InIt_loss {:.20f}".format(pretrain_inIt_loss)) for loss in checkpoint['loss']: if loss > 0: print(loss, " "); # print("-"*100) # print(checkpoint['loss']) " PANSHARPENING " "testing phase requires input at actual testing resolution" net.to(device) net.eval() print(net.conv3.weight.data[0, 0, 0, 0], net.conv2.weight.data[0, 0, 0, 0], net.conv1.weight.data[0, 0, 0, 0]) eval_test(net, evaluator, mode="eval", mode2="ft") def eval_test(net, evaluator, mode="pre", mode2="pre", err=None): with torch.no_grad(): Test_time = time() sr = evaluator(net, err=err) # NxCxHxW Test_time = time() - Test_time # skip connection to add low resolution ms and residual(np version) sr = sr.cpu().detach().numpy() + evaluator.test_I_in[:, :-1, blk:-blk, blk:-blk].cpu().detach().numpy() # NxCxHxW # convert to numpy type with permute and squeeze: HxWxC (go to cpu for easy saving) sr = torch.from_numpy(sr) # convert to tensor version sr = sr.permute(0, 2, 3, 1).cpu().detach().numpy() # to: NxHxWxC "clipping is not necessary" sr = np.clip(sr, 0, 1) num_exm = sr.shape[0] if mode == "eval": if num_exm == 1: if evaluator.mode == "RR": key = "apnn_wv3_rs" file_name = key + '_ny_' + mode2 + ".mat" if evaluator.mode == "FR": key = "apnn_wv3_os" file_name = key + '_ny_' + mode2 + ".mat" file_name2 = './results/' save_name = os.path.join(file_name2, file_name) sio.savemat(save_name, {key: sr[0, :, :, :]}) else: for index in range(num_exm): # save the DL results to the 03-Comparisons(Matlab) file_name = "apnn_wv3_rs" + str(index) + mode2 + ".mat" file_name2 = './results/' save_name = os.path.join(file_name2, file_name) sio.savemat(save_name, {'apnn_wv3_rs': sr[index, :, :, :]}) ################################################################### # ------------------- Main Function (Run first) ------------------- ################################################################### if __name__ == '__main__': import random import torch.backends.cudnn as cudnn file_path = "./test_data/imgs/NY1_WV3_RR.mat" "SELECT SENSOR AND TESTING IMAGE" sensor_model = 'WV3' available_models = ['IKONOS', 'GeoEye1', 'WV2', 'WV3', 'WV4', 'QB'] if sensor_model in available_models: sensor_model = sensor_model + '_PNNplus_model.pth.tar' test(file_path, sensor_model) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/main_test_wv4.py ================================================ import os import torch import torch.backends.cudnn as cudnn import torch.nn as nn import torch.optim as optim from torch.autograd import Variable from torch.utils.data import DataLoader from data_wv4 import Dataset_Pro import h5py from data_single_read import load_set from evaluate import compute_index from model_wv4 import APNN, summaries, weights_init, loss_with_l2_regularization import numpy as np import scipy.io as sio from time import time from evaluate import analysis_accu import math class L1_Loss_clip(nn.Module): def __init__(self, multiple=2048.0, clip_flag=True, inter=False): super(L1_Loss_clip, self).__init__() self.clip_flag = clip_flag self.inter = inter self.multiple = multiple self.criterion = nn.L1Loss(reduction='mean') def forward(self, x, target, lms): if self.clip_flag: # x = torch.clamp(x * self.multiple, 0, 2048) / self.multiple x = x - lms if self.inter: loss = torch.mean((x - target) ** 2)#self.criterion(x, target)#torch.mean(torch.abs(x - target)) return loss else: l1_loss = self.criterion(x, target) return l1_loss ################################################################### # ------------------- Sub-Functions (will be used) ------------------- ################################################################### def load_set(file_path, blk): suffix = file_path.split('.') if suffix[-1] == 'h5': ## ===== case1: NxCxHxW data = h5py.File(file_path) ms1 = data["ms"][...] # NxCxHxW=0,1,2,3 shape_size = len(ms1.shape) elif suffix[-1] == 'mat': # ===== case2: HxWxC data = sio.loadmat(file_path) # print(data.keys()) ms1 = data["I_MS_LR"][...] # NxCxHxW=0,1,2,3 shape_size = len(ms1.shape) else: print("file format is not supported") raise NotImplemented if suffix[-2][-2:] == 'FR': data['I_GT'] = data['I_MS_LR'] #exception if shape_size == 4: # NxCxHxW # tensor type: lms1 = data['lms'][...] # NxCxHxW = 4x8x512x512 lms1 = np.array(lms1, dtype=np.float32) / 2047.0 lms = torch.from_numpy(lms1) pan1 = data['pan'][...] # NxCxHxW = 4x8x512x512 pan1 = np.array(pan1, dtype=np.float32) / 2047.0 pan = torch.from_numpy(pan1) test_I_in1 = np.concatenate([lms1, pan1], axis=1) # NxCxHxW = Nx9xHxW test_I_in1 = np.pad(test_I_in1, ((0, 0), (0, 0), (blk, blk), (blk, blk)), mode='edge') # pading test_I_in = torch.from_numpy(test_I_in1) # NxCxHxW = Nx9xHxW ms1 = data['ms'][...] # NxCxHxW = 4x8x512x512 ms1 = np.array(ms1, dtype=np.float32) / 2047.0 ms = torch.from_numpy(ms1) gt1 = data['gt'][...] # NxCxHxW = 4x8x512x512 gt1 = np.array(gt1, dtype=np.float32) / 2047.0 gt = torch.from_numpy(gt1) return test_I_in, ms, pan, gt if shape_size == 3: # HxWxC # tensor type: lms1 = data['I_MS'][...] # HxWxC=0,1,2 lms1 = np.expand_dims(lms1, axis=0) # 1xHxWxC lms1 = np.array(lms1, dtype=np.float32) / 2047.0 # 1xHxWxC lms = torch.from_numpy(lms1).permute(0, 3, 1, 2) # NxCxHxW or HxWxC pan1 = data['I_PAN'][...] # HxW pan1 = np.expand_dims(pan1, axis=0) # 1xHxW pan1 = np.expand_dims(pan1, axis=3) # 1xHxWx1 pan1 = np.array(pan1, dtype=np.float32) / 2047. # 1xHxWx1 pan = torch.from_numpy(pan1).permute(0, 3, 1, 2) # Nx1xHxW: test_I_in1 = np.concatenate([lms1, pan1], axis=3) # 1xHxWx(C+1) = Nx9xHxW test_I_in1 = np.transpose(test_I_in1, (0, 3, 1, 2)) # 1x(C+1)xHxW test_I_in1 = np.pad(test_I_in1, ((0, 0), (0, 0), (blk, blk), (blk, blk)), mode='edge') # NCHW test_I_in = torch.from_numpy(test_I_in1) # NxCxHxW = Nx9xHxW ms1 = data['I_MS_LR'][...] # HxWxC=0,1,2 ms1 = np.expand_dims(ms1, axis=0) # 1xHxWxC ms1 = np.array(ms1, dtype=np.float32) / 2047.0 # 1xHxWxC ms = torch.from_numpy(ms1).permute(0, 3, 1, 2) # NxCxHxW or HxWxC gt1 = data['I_GT'][...] # HxWxC=0,1,2 gt1 = np.expand_dims(gt1, axis=0) # 1xHxWxC gt1 = np.array(gt1, dtype=np.float32) / 2047.0 # 1xHxWxC gt = torch.from_numpy(gt1).permute(0, 3, 1, 2) # NxCxHxW or HxWxC return test_I_in, ms, pan, gt ################################################################### # ------------------- Main Test (Run second) ------------------- ################################################################### ## 1) initial test by model ## blk = 8#4 class Tester(): def __init__(self, file_path, mode): test_I_in, test_ms, test_pan, test_gt = load_set(file_path, blk) self.test_I_in = test_I_in self.test_ms = test_ms self.test_pan = test_pan self.test_gt = test_gt self.mode = mode self.file_path = file_path self.loss_clip = L1_Loss_clip(inter=True, clip_flag=True).cuda() "the fine tuning phase requires downgraded input resolution" if mode == 'ft': from wald_utilities import wald_protocol ms_lr, pan_lr = wald_protocol(test_ms, test_pan, 4., 'WV4', channels=4) self.test_I_in = torch.cat((ms_lr, pan_lr), dim=1) self.test_I_in = torch.nn.functional.pad(self.test_I_in, (8, 8, 8, 8), mode='reflect') # NCHW self.test_gt = self.test_ms # ... def __call__(self, model): x = self.test_I_in # send to cuda, important! x = x.cuda().float() # convert to tensor type: out2 = model(x) # result_our = out2 + x[:, :-1, blk:-blk, blk:-blk] # out2 = model(self.test_I_in) # result_our = out2 + self.test_I_in[:, :-1, blk:-blk, blk:-blk] # result_our = torch.squeeze(result_our).permute(1, 2, 0) # result_our = result_our* 2047 # our_SAM, our_ERGAS = compute_index(self.test_gt, result_our, 4) # print('our_SAM: {} our_ERGAS: {}'.format(our_SAM, our_ERGAS)) # print loss for each epoch if self.mode == 'RR' or self.mode == 'test': test_in = self.test_I_in[:, :-1, blk:-blk, blk:-blk].cuda() sr = out2 + test_in # self.test_I_in[:, :-1, blk:-blk, blk:-blk].cuda() # NxCxHxW sr = sr.permute(0, 2, 3, 1) gt = self.test_gt.permute(0, 2, 3, 1).cuda() # sr = out2.permute(0, 2, 3, 1) # gt = self.test_gt.cuda() - test_in # NxCxHxW # gt = gt.permute(0, 2, 3, 1) our_CC, our_PSNR, our_SSIM, our_SAM, our_ERGAS = analysis_accu(gt[0, ...], sr[0, ...], 4) print(f'[{self.file_path}]: our_CC: {our_CC}, our_PSNR: {our_PSNR}, ' f'our_SSIM: {our_SSIM},\n' f'our_SAM: {our_SAM} our_ERGAS: {our_ERGAS}') return out2 ## 2) target-adative's fine_tune_training, i.e., PNNplus## def test(file_path, sensor_model): suffix = file_path.split('.')[-2][-2:] if suffix== 'RR' or suffix == 'FR': simulated = suffix else: simulated = 'test' tester = Tester(file_path, mode='ft') # call initial model evaluator = Tester(file_path, mode=simulated) criterion = nn.L1Loss(reduction='mean').cuda()#L1_Loss_clip(clip_flag=True, inter=True).cuda()# regularization = loss_with_l2_regularization().cuda() " LOAD PRETRAINED MODEL" init_loss = 0 model_path = "../pretrained_models/1WV4_PNNplus_model.pth.tar" if os.path.isfile(model_path): print("loading model") checkpoint = torch.load(model_path) # checkpoint = torch.load('./pretrained_models/' + sensor_model) print(checkpoint.keys()) net = checkpoint['model'] print(net.conv1.weight.data[0, 0, 0, 0]) net.load_state_dict(checkpoint['model_state']) lr_ = 1e-4 FT_epochs = 50 # number of fine tuning epochs # init_loss = checkpoint["loss1"] else: nr_bands = 4 # selected by user or taken from data? lr_ = 0.0001 * 17 * 17 * nr_bands FT_epochs = 5000 net = APNN().cuda() net.apply(weights_init) print(net.conv1.weight.data[0, 0, 0, 0]) print(net.conv1.weight.data[0, 0, 0, 0]) print(net) test_gt = tester.test_gt - tester.test_I_in[:, :-1, blk:-blk, blk:-blk] pretrain_inIt_loss = criterion(net(tester.test_I_in.cuda()), test_gt.cuda()).item()#, tester.test_I_in[:, :-1, blk:-blk, blk:-blk].cuda()).item() print("init loss: {:.20f} pretrain_inIt loss: {:.20f}".format(init_loss, pretrain_inIt_loss)) eval_test(net, evaluator, mode="eval", mode2="pre") print('-' * 100) "scaling learning rate on last layer" target_layerParam = list(map(id, net.conv3.parameters())) base_layerParam = filter(lambda p: id(p) not in target_layerParam, net.parameters()) training_parameters = [{'params': net.conv3.parameters(), 'lr': lr_ / 10}, {'params': base_layerParam}] optimizer = optim.SGD(training_parameters, lr=lr_, momentum=0.9) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") v_min = 10000 ft_loss = np.zeros(FT_epochs) Train_time = time() print(tester.test_gt.shape) ## 2.1) "FINE TUNING"--training for epoch in range(FT_epochs): # loop over the testing image multiple times net.train() # running_loss = 0.0 # loading testing image test_I_in = tester.test_I_in[:, :-1, blk:-blk, blk:-blk] test_I_in = test_I_in.cuda() test_gt = tester.test_gt.cuda() # residual # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize x1 = tester.test_I_in # send to cuda, important! x2 = test_gt - test_I_in x1 = x1.cuda().float() # convert to tensor type: x2 = x2.cuda().float() # convert to tensor type: outputs = net(x1) test_I_in.requires_grad = False loss = criterion(outputs, x2) # compute loss # loss = criterion(outputs + test_I_in, x2, test_I_in) # compute loss # loss = criterion(outputs + test_I_in, test_gt, 0) # compute loss new_loss = regularization(loss, net, flag=False) new_loss.backward() optimizer.step() running_loss = loss.item() ft_loss[epoch] = running_loss if running_loss < v_min: PATH = '../ft_network/WV4' if not os.path.exists(PATH): os.makedirs(PATH) torch.save(dict(model=net, model_state=net.state_dict(), loss=ft_loss), PATH + '/net.pth.tar') # if np.abs(running_loss - v_min) > 1e-3: net.eval() eval_test(net, evaluator, mode="eval", mode2="ft") v_min = running_loss print('[%d] loss: %.20f' % (epoch + 1, running_loss)) Train_time = time() - Train_time ## 2.2) "FINE TUNING"--testing " LOAD BEST MODEL" checkpoint = torch.load('../ft_network/WV4/net.pth.tar') net = checkpoint['model'] net.load_state_dict(checkpoint['model_state']) print("-" * 100) print("pretrain_InIt_loss {:.20f}".format(pretrain_inIt_loss)) for loss in checkpoint['loss']: if loss > 0: print(loss, " "); " PANSHARPENING " "testing phase requires input at actual testing resolution" net.to(device) net.eval() print(net.conv3.weight.data[0, 0, 0, 0], net.conv2.weight.data[0, 0, 0, 0], net.conv1.weight.data[0, 0, 0, 0]) eval_test(net, evaluator, mode="eval", mode2="ft") def eval_test(net, evaluator, mode="pre", mode2="pre"): with torch.no_grad(): Test_time = time() sr = evaluator(net) # NxCxHxW Test_time = time() - Test_time # skip connection to add low resolution ms and residual(np version) sr = sr.cpu().detach().numpy() + evaluator.test_I_in[:, :-1, blk:-blk, blk:-blk].cpu().detach().numpy() # NxCxHxW # convert to numpy type with permute and squeeze: HxWxC (go to cpu for easy saving) sr = torch.from_numpy(sr) # convert to tensor version sr = sr.permute(0, 2, 3, 1).cpu().detach().numpy() # to: NxHxWxC sr = np.clip(sr, 0, 1) # print('------> [PNN+]: Fine-tuning (%d it) time = %0.4f // Prediction time = %0.4f' % ( # FT_epochs, Train_time, Test_time)) num_exm = sr.shape[0] if num_exm == 1: if evaluator.mode == "RR": key = 'apnn_wv4_rs_alice' file_name = "apnn_wv4_rs" + '_alice_' + mode2 + ".mat" if evaluator.mode == "FR": key = 'apnn_wv4_os_alice' file_name = "apnn_wv4_os" + '_alice_' + mode2 + ".mat" # file_name2 = "E:/01-DL-Pansharpening-Toolbox/03-Comparisons(Matlab)/2_DL_Result/WV4/APNN" file_name2 = "../results" save_name = os.path.join(file_name2, file_name) sio.savemat(save_name, {key: sr[0, :, :, :]}) else: for index in range(num_exm): # save the DL results to the 03-Comparisons(Matlab) file_name = "apnn_wv4_rs" + str(index)+ mode2 + ".mat" # file_name2 = "E:/01-DL-Pansharpening-Toolbox/03-Comparisons(Matlab)/2_DL_Result/WV4/APNN" file_name2 = "../results" save_name = os.path.join(file_name2, file_name) sio.savemat(save_name, {'apnn_wv4_rs': sr[index, :, :, :]}) ################################################################### # ------------------- Main Function (Run first) ------------------- ################################################################### if __name__ == '__main__': # file_path = "../test_data/TestData_wv4.h5" file_path = "../test_data/imgs/Alice_WV4_RR.mat" ## case2: test on single image with the size of HxWxC # file_path = "E:/01-DL-Pansharpening-Toolbox/03-Comparisons(Matlab)/1_TestData/Datasets Testing/NY1_WV3_RR.mat" # file_path = "E:/01-DL-Pansharpening-Toolbox/03-Comparisons(Matlab)/1_TestData/Datasets Testing/NY1_WV3_FR.mat" "SELECT SENSOR AND TESTING IMAGE" sensor_model = 'WV4' available_models = ['IKONOS', 'GeoEye1', 'WV2', 'WV3', 'WV4', 'QB'] if sensor_model in available_models: sensor_model = sensor_model + '_PNNplus_model.pth.tar' test(file_path, sensor_model) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/model_qb.py ================================================ import torch import torch.nn as nn import math class loss_with_l2_regularization(nn.Module): def __init__(self): super(loss_with_l2_regularization, self).__init__() def forward(self, criterion, model, weight_decay=1e-5, flag=True): regularizations = [] for k, v in model.named_parameters(): if 'conv' in k and 'weight' in k: # print(k) penality = weight_decay * ((v.data ** 2).sum() / 2) regularizations.append(penality) if flag: print("{} : {}".format(k, penality)) # r = torch.sum(regularizations) loss = criterion + sum(regularizations) return loss # def weights_init(m): # 1 # classname = m.__class__.__name__ # 2 # if classname.find('Conv') != -1: # 3 # variance_scaling_initializer(m.weight.data) # -------------Initialization---------------------------------------- def init_weights(*modules): for module in modules: for m in module.modules(): if isinstance(m, nn.Conv2d): ## initialization for Conv2d variance_scaling_initializer(m.weight) # method 1: initialization if m.bias is not None: nn.init.constant_(m.bias, 0.0) elif isinstance(m, nn.BatchNorm2d): ## initialization for BN nn.init.constant_(m.weight, 1.0) nn.init.constant_(m.bias, 0.0) elif isinstance(m, nn.Linear): ## initialization for nn.Linear # variance_scaling_initializer(m.weight) nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0.0) class APNN(nn.Module): def __init__(self): super(APNN, self).__init__() channel = 48 spectral_num = 4 # ConvTranspose2d: output = (input - 1)*stride + outpading - 2*padding + kernelsize ''' C.Using deeper network Finally, during training, we stabilize the layers’ inputs by means of batch normalization ''' self.conv1 = nn.Conv2d(in_channels=spectral_num + 1, out_channels=channel, kernel_size=9, stride=1, bias=True) self.conv2 = nn.Conv2d(in_channels=channel, out_channels=32, kernel_size=5, stride=1, bias=True) self.conv3 = nn.Conv2d(in_channels=32, out_channels=spectral_num, kernel_size=5, stride=1, bias=True) self.relu = nn.ReLU(inplace=True) init_weights(self.conv1, self.conv2, self.conv3) def forward(self, x): # x= lms; y = pan #input1 = torch.cat((x, y), 1) # Bsx9x64x64 # input1 = self.bn(input1) rs = self.relu(self.conv1(x)) rs = self.relu(self.conv2(rs)) output = self.conv3(rs) return output # ----------------- End-Main-Part ------------------------------------ # QB def variance_scaling_initializer(tensor): from scipy.stats import truncnorm def truncated_normal_(tensor, mean=0, std=1): with torch.no_grad(): size = tensor.shape tmp = tensor.new_empty(size + (4,)).normal_() valid = (tmp < 2) & (tmp > -2) ind = valid.max(-1, keepdim=True)[1] tensor.data.copy_(tmp.gather(-1, ind).squeeze(-1)) tensor.data.mul_(std).add_(mean) return tensor def variance_scaling(x, scale=1.0, mode="fan_in", distribution="truncated_normal", seed=None): fan_in, fan_out = torch.nn.init._calculate_fan_in_and_fan_out(x) if mode == "fan_in": scale /= max(1., fan_in) elif mode == "fan_out": scale /= max(1., fan_out) else: scale /= max(1., (fan_in + fan_out) / 2.) if distribution == "normal" or distribution == "truncated_normal": # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.) stddev = math.sqrt(scale) / .87962566103423978 # print(fan_in,fan_out,scale,stddev)#100,100,0.01,0.1136 truncated_normal_(x, 0.0, 0.001) return x/10*1.28 variance_scaling(tensor) return tensor ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/model_wv2.py ================================================ import torch import torch.nn as nn import math class loss_with_l2_regularization(nn.Module): def __init__(self): super(loss_with_l2_regularization, self).__init__() def forward(self, criterion, model, weight_decay=1e-5, flag=True): regularizations = [] for k, v in model.named_parameters(): if 'conv' in k and 'weight' in k: # print(k) penality = weight_decay * ((v.data ** 2).sum() / 2) regularizations.append(penality) if flag: print("{} : {}".format(k, penality)) # r = torch.sum(regularizations) loss = criterion + sum(regularizations) return loss def weights_init(m): # 1 classname = m.__class__.__name__ # 2 if classname.find('Conv') != -1: # 3 variance_scaling_initializer(m.weight.data) # -------------Initialization---------------------------------------- def init_weights(*modules): for module in modules: for m in module.modules(): if isinstance(m, nn.Conv2d): ## initialization for Conv2d # try: # import tensorflow as tf # tensor = tf.get_variable(shape=m.weight.shape, initializer=tf.variance_scaling_initializer(seed=1)) # m.weight.data = tensor.eval() # except: # print("try error, run variance_scaling_initializer") # variance_scaling_initializer(m.weight) variance_scaling_initializer(m.weight) # method 1: initialization #nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu') # method 2: initialization if m.bias is not None: nn.init.constant_(m.bias, 0.0) elif isinstance(m, nn.BatchNorm2d): ## initialization for BN nn.init.constant_(m.weight, 1.0) nn.init.constant_(m.bias, 0.0) elif isinstance(m, nn.Linear): ## initialization for nn.Linear # variance_scaling_initializer(m.weight) nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0.0) class APNN(nn.Module): def __init__(self): super(APNN, self).__init__() channel = 48 spectral_num = 8 # ConvTranspose2d: output = (input - 1)*stride + outpading - 2*padding + kernelsize ''' C.Using deeper network Finally, during training, we stabilize the layers’ inputs by means of batch normalization ''' self.conv1 = nn.Conv2d(in_channels=spectral_num + 1, out_channels=channel, kernel_size=9, stride=1, bias=True) self.conv2 = nn.Conv2d(in_channels=channel, out_channels=32, kernel_size=5, stride=1, bias=True) self.conv3 = nn.Conv2d(in_channels=32, out_channels=spectral_num, kernel_size=5, stride=1, bias=True) self.relu = nn.ReLU(inplace=True) # init_weights(self.conv1, self.conv2, self.conv3) def forward(self, x): # x= lms; y = pan rs = self.relu(self.conv1(x)) rs = self.relu(self.conv2(rs)) output = self.conv3(rs) return output # ----------------- End-Main-Part ------------------------------------ def variance_scaling_initializer(tensor): from scipy.stats import truncnorm def truncated_normal_(tensor, mean=0, std=1): with torch.no_grad(): size = tensor.shape tmp = tensor.new_empty(size + (4,)).normal_() valid = (tmp < 2) & (tmp > -2) ind = valid.max(-1, keepdim=True)[1] tensor.data.copy_(tmp.gather(-1, ind).squeeze(-1)) tensor.data.mul_(std).add_(mean) return tensor def variance_scaling(x, scale=1.0, mode="fan_in", distribution="truncated_normal", seed=None): fan_in, fan_out = torch.nn.init._calculate_fan_in_and_fan_out(x) if mode == "fan_in": scale /= max(1., fan_in) elif mode == "fan_out": scale /= max(1., fan_out) else: scale /= max(1., (fan_in + fan_out) / 2.) if distribution == "normal" or distribution == "truncated_normal": # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.) stddev = math.sqrt(scale) / .87962566103423978 # print(fan_in,fan_out,scale,stddev)#100,100,0.01,0.1136 truncated_normal_(x, 0.0, 0.001) return x/10*1.28 variance_scaling(tensor) return tensor ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/model_wv3.py ================================================ import torch import torch.nn as nn import math from variance_sacling_initializer import variance_scaling_initializer class loss_with_l2_regularization(nn.Module): def __init__(self): super(loss_with_l2_regularization, self).__init__() def forward(self, criterion, model, weight_decay=1e-5, flag=True): regularizations = [] for k, v in model.named_parameters(): if 'conv' in k and 'weight' in k: # print(k) penality = weight_decay * ((v.data ** 2).sum() / 2) regularizations.append(penality) if flag: print("{} : {}".format(k, penality)) # r = torch.sum(regularizations) loss = criterion + sum(regularizations) return loss def weights_init(m): # 1 classname = m.__class__.__name__ # 2 if classname.find('Conv') != -1: # 3 variance_scaling_initializer(m.weight.data) # netG.apply(weights_init) # 8 class APNN(nn.Module): def __init__(self): super(APNN, self).__init__() channel = 48 spectral_num = 8 # ConvTranspose2d: output = (input - 1)*stride + outpading - 2*padding + kernelsize ''' C.Using deeper network Finally, during training, we stabilize the layers’ inputs by means of batch normalization ''' self.conv1 = nn.Conv2d(in_channels=spectral_num + 1, out_channels=channel, kernel_size=9, stride=1, bias=True) self.conv2 = nn.Conv2d(in_channels=channel, out_channels=32, kernel_size=5, stride=1, bias=True) self.conv3 = nn.Conv2d(in_channels=32, out_channels=spectral_num, kernel_size=5, stride=1, bias=True) self.relu = nn.ReLU(inplace=True) # init_weights(self.conv1, self.conv2, self.conv3) def forward(self, x): # x= lms; y = pan #input1 = torch.cat((x, y), 1) # Bsx9x64x64 # input1 = self.bn(input1) rs = self.relu(self.conv1(x)) rs = self.relu(self.conv2(rs)) output = self.conv3(rs) return output ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/model_wv4.py ================================================ import torch import torch.nn as nn import math class loss_with_l2_regularization(nn.Module): def __init__(self): super(loss_with_l2_regularization, self).__init__() def forward(self, criterion, model, weight_decay=1e-5, flag=True): regularizations = [] for k, v in model.named_parameters(): if 'conv' in k and 'weight' in k: # print(k) penality = weight_decay * ((v.data ** 2).sum() / 2) regularizations.append(penality) if flag: print("{} : {}".format(k, penality)) # r = torch.sum(regularizations) loss = criterion + sum(regularizations) return loss class APNN(nn.Module): def __init__(self): super(APNN, self).__init__() channel = 48 spectral_num = 4 # ConvTranspose2d: output = (input - 1)*stride + outpading - 2*padding + kernelsize ''' C.Using deeper network Finally, during training, we stabilize the layers’ inputs by means of batch normalization ''' self.conv1 = nn.Conv2d(in_channels=spectral_num + 1, out_channels=channel, kernel_size=9, stride=1, bias=True) self.conv2 = nn.Conv2d(in_channels=channel, out_channels=32, kernel_size=5, stride=1, bias=True) self.conv3 = nn.Conv2d(in_channels=32, out_channels=spectral_num, kernel_size=5, stride=1, bias=True) self.relu = nn.ReLU(inplace=True) # init_weights(self.conv1, self.conv2, self.conv3) def forward(self, x): # x= lms; y = pan rs = self.relu(self.conv1(x)) rs = self.relu(self.conv2(rs)) output = self.conv3(rs) return output # ----------------- End-Main-Part ------------------------------------ def variance_scaling_initializer(tensor): from scipy.stats import truncnorm def truncated_normal_(tensor, mean=0, std=1): with torch.no_grad(): size = tensor.shape tmp = tensor.new_empty(size + (4,)).normal_() valid = (tmp < 2) & (tmp > -2) ind = valid.max(-1, keepdim=True)[1] tensor.data.copy_(tmp.gather(-1, ind).squeeze(-1)) tensor.data.mul_(std).add_(mean) return tensor def variance_scaling(x, scale=1.0, mode="fan_in", distribution="truncated_normal", seed=None): fan_in, fan_out = torch.nn.init._calculate_fan_in_and_fan_out(x) if mode == "fan_in": scale /= max(1., fan_in) elif mode == "fan_out": scale /= max(1., fan_out) else: scale /= max(1., (fan_in + fan_out) / 2.) if distribution == "normal" or distribution == "truncated_normal": # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.) stddev = math.sqrt(scale) / .87962566103423978 # print(fan_in,fan_out,scale,stddev)#100,100,0.01,0.1136 truncated_normal_(x, 0.0, 0.001) return x/10*1.28 variance_scaling(tensor) return tensor ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/variance_sacling_initializer.py ================================================ import torch import torch.nn as nn import math def truncated_normal_(tensor, mean=0.0, std=1.0): with torch.no_grad(): size = tensor.shape tmp = tensor.new_empty(size + (4,)).normal_() valid = (tmp < 2) & (tmp > -2) ind = valid.max(-1, keepdim=True)[1] tensor.data.copy_(tmp.gather(-1, ind).squeeze(-1)) tensor.data.mul_(std).add_(mean) return tensor def variance_scaling_initializer(tensor): from scipy.stats import truncnorm def calculate_fan(shape, factor=2.0, mode='FAN_IN', uniform=False): # 64 9 3 3 -> 3 3 9 64 # 64 64 3 3 -> 3 3 64 64 if shape: # fan_in = float(shape[1]) if len(shape) > 1 else float(shape[0]) # fan_out = float(shape[0]) fan_in = float(shape[-2]) if len(shape) > 1 else float(shape[-1]) fan_out = float(shape[-1]) else: fan_in = 1.0 fan_out = 1.0 for dim in shape[:-2]: fan_in *= float(dim) fan_out *= float(dim) if mode == 'FAN_IN': # Count only number of input connections. n = fan_in elif mode == 'FAN_OUT': # Count only number of output connections. n = fan_out elif mode == 'FAN_AVG': # Average number of inputs and output connections. n = (fan_in + fan_out) / 2.0 if uniform: raise NotImplemented # # To get stddev = math.sqrt(factor / n) need to adjust for uniform. # limit = math.sqrt(3.0 * factor / n) # return random_ops.random_uniform(shape, -limit, limit, # dtype, seed=seed) else: # To get stddev = math.sqrt(factor / n) need to adjust for truncated. trunc_stddev = math.sqrt(1.3 * factor / n) return fan_in, fan_out, trunc_stddev def variance_scaling(x, scale=1.0, mode="fan_in", distribution="truncated_normal", seed=None): # fan_in, fan_out = torch.nn.init._calculate_fan_in_and_fan_out(x) x = x.permute(3, 2, 1, 0) # .permute(2, 3, 1, 0) fan_in, fan_out, trunc_stddev = calculate_fan(x.shape) print(trunc_stddev) # if mode == "fan_in": # scale /= max(1., fan_in) # elif mode == "fan_out": # scale /= max(1., fan_out) # else: # scale /= max(1., (fan_in + fan_out) / 2.) # if distribution == "normal" or distribution == "truncated_normal": # # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.) # stddev = math.sqrt(scale) / .87962566103423978 # print(fan_in,fan_out,scale,stddev)#100,100,0.01,0.1136 truncated_normal_(x, 0.0, trunc_stddev) # 0.001) x = x.permute(3, 2, 0, 1) print(x.min(), x.max()) return x # /10*1.28 variance_scaling(tensor) return tensor ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/wald_utilities.py ================================================ import numpy as np import torch import torch.nn as nn import math import scipy.ndimage.filters as ft def fspecial_gauss(size, sigma): # Function to mimic the 'fspecial' gaussian MATLAB function m, n = [(ss-1.)/2. for ss in size] y, x = np.ogrid[-m:m+1, -n:n+1] h = np.exp( -(x*x + y*y) / (2.*sigma*sigma) ) h[ h < np.finfo(h.dtype).eps*h.max() ] = 0 sumh = h.sum() if sumh != 0: h /= sumh #h = np.round(h, 4) return h def fir_filter_wind(Hd, w): """ compute fir filter with window method Hd: desired freqeuncy response (2D) w: window (2D) """ hd = np.rot90(np.fft.fftshift(np.rot90(Hd, 2)), 2) h = np.fft.fftshift(np.fft.ifft2(hd)) h = np.rot90(h, 2) h = h * w h = np.clip(h, a_min=0, a_max=np.max(h)) h = h / np.sum(h) return h def NyquistFilterGenerator(Gnyq, ratio, N): assert isinstance(Gnyq, (np.ndarray, list)), 'Error: GNyq must be a list or a ndarray' if isinstance(Gnyq, list): Gnyq = np.asarray(Gnyq) nbands = Gnyq.shape[0] kernel = np.zeros((N, N, nbands)) # generic kerenel (for normalization purpose) fcut = 1 / np.double(ratio) for j in range(nbands): alpha = np.sqrt(((N - 1) * (fcut / 2)) ** 2 / (-2 * np.log(Gnyq[j]))) H = fspecial_gauss((N,N), alpha) Hd = H / np.max(H) h = np.kaiser(N, 0.5) kernel[:, :, j] = np.real(fir_filter_wind(Hd, h)) #kernel = np.round(kernel, 4) return kernel def MTF(ratio, sensor, N=41): if (sensor=='QB'): GNyq = np.asarray([0.34, 0.32, 0.30, 0.22]) #Bands Order: B,G,R,NIR elif ((sensor=='Ikonos') or (sensor=='IKONOS')): GNyq = np.asarray([0.26, 0.28, 0.29, 0.28]) #Bands Order: B,G,R,NIR elif (sensor=='GeoEye1') or (sensor == 'WV4'): GNyq = np.asarray([0.23, 0.23, 0.23, 0.23]) #Bands Order: B, G, R, NIR elif (sensor=='WV2'): GNyq = 0.35 * np.ones((1, 7)); GNyq = np.append(GNyq, 0.27) elif (sensor=='WV3'): GNyq = [0.325, 0.355, 0.360, 0.350, 0.365, 0.360, 0.335, 0.315] h = NyquistFilterGenerator(GNyq,ratio, N) return h def MTF_PAN(ratio, sensor, N=41): if (sensor=='QB'): GNyq = np.array([0.15]) elif ((sensor=='Ikonos') or (sensor=='IKONOS')): GNyq = np.array([0.17]) elif (sensor=='GeoEye1') or (sensor == 'WV4'): GNyq = np.array([0.16]) elif (sensor=='WV2'): GNyq = np.array([0.11]) elif (sensor=='WV3'): GNyq = np.array([0.14]) else: GNyq = np.array([0.15]) return NyquistFilterGenerator(GNyq, ratio, N) def interp23tap(img, ratio): assert((2**(round(math.log(ratio, 2)))) == ratio), 'Error: Only resize factors power of 2' r,c,b = img.shape CDF23 = np.asarray([0.5, 0.305334091185, 0, -0.072698593239, 0, 0.021809577942, 0, -0.005192756653, 0, 0.000807762146, 0, -0.000060081482]) CDF23 = [element * 2 for element in CDF23] BaseCoeff = np.expand_dims(np.concatenate([np.flip(CDF23[1:]), CDF23]), axis=-1) for z in range(int(ratio/2)): I1LRU = np.zeros(((2 ** (z+1)) * r, (2 ** (z+1)) * c, b)) if z == 0: I1LRU[1::2, 1::2,:] = img else: I1LRU [::2,::2,:] = img for i in range(b): temp = ft.convolve(np.transpose(I1LRU[:,:,i]), BaseCoeff, mode='wrap') I1LRU[:, :, i] = ft.convolve(np.transpose(temp), BaseCoeff, mode='wrap') img = I1LRU return img def interp23tap_GPU(img, ratio): assert((2**(round(math.log(ratio, 2)))) == ratio), 'Error: Only resize factors power of 2' r,c,b = img.shape CDF23 = np.asarray([0.5, 0.305334091185, 0, -0.072698593239, 0, 0.021809577942, 0, -0.005192756653, 0, 0.000807762146, 0, -0.000060081482]) CDF23 = [element * 2 for element in CDF23] BaseCoeff = np.expand_dims(np.concatenate([np.flip(CDF23[1:]), CDF23]), axis=-1) BaseCoeff = np.expand_dims(BaseCoeff, axis=(0,1)) BaseCoeff = np.concatenate([BaseCoeff]*b, axis=0) BaseCoeff = torch.from_numpy(BaseCoeff) img = img.astype(np.float32) img = np.moveaxis(img, -1, 0) for z in range(int(ratio/2)): I1LRU = np.zeros((b, (2 ** (z+1)) * r, (2 ** (z+1)) * c)) if z == 0: I1LRU[:,1::2, 1::2] = img else: I1LRU [:,::2,::2] = img I1LRU = np.expand_dims(I1LRU, axis=0) conv = nn.Conv2d(in_channels=b, out_channels=b, padding=(11,0), kernel_size=BaseCoeff.shape, groups=b, bias=False, padding_mode='circular') conv.weight.data = BaseCoeff conv.weight.requires_grad = False t = conv(torch.transpose(torch.from_numpy(I1LRU), 2, 3)) img = conv(torch.transpose(t, 2,3)).numpy() img = np.squeeze(img) img = np.moveaxis(img, 0,-1) return img def wald_protocol(ms,pan,ratio,sensor, channels=8): mtf_kernel = MTF(ratio, sensor) MTF_kern = np.moveaxis(mtf_kernel, -1, 0) MTF_kern = np.expand_dims(MTF_kern, axis = 1) MTF_kern = torch.from_numpy(MTF_kern).type(torch.float32) # DepthWise-Conv2d definition depthconv = nn.Conv2d(in_channels=channels, out_channels=channels, kernel_size=MTF_kern.shape, groups=channels, padding=20, padding_mode='replicate', bias=False) depthconv.weight.data = MTF_kern depthconv.weight.requires_grad = False ms_down = depthconv(ms) ms_wald_ = nn.functional.interpolate(ms_down, scale_factor=0.25, mode='bicubic') ms_lr = torch.zeros(ms.shape) for i in range(ms_wald_.shape[0]): temp = np.copy(np.asarray(torch.squeeze(torch.squeeze(ms_wald_[i,:,:,:]).permute((1,2,0))).detach().cpu())) ms_lr[i, :, :, :]= torch.from_numpy(interp23tap_GPU(temp,ratio)).permute((2,0,1)) pan_lr = nn.functional.interpolate(pan, scale_factor=0.25, mode='bicubic') return ms_lr, pan_lr def wald_protocol_v2(ms, pan, ratio, sensor, channels=8): def genMTF_MS(): mtf_kernel = MTF(ratio, sensor) MTF_kern = np.moveaxis(mtf_kernel, -1, 0) MTF_kern = np.expand_dims(MTF_kern, axis=1) MTF_kern = torch.from_numpy(MTF_kern).type(torch.float32) # DepthWise-Conv2d definition depthconv = nn.Conv2d(in_channels=channels, out_channels=channels, kernel_size=MTF_kern.shape, groups=channels, padding=20, padding_mode='replicate', bias=False) depthconv.weight.data = MTF_kern depthconv.weight.requires_grad = False ms_down = depthconv(ms) ms_wald_ = nn.functional.interpolate(ms_down, scale_factor=0.25, mode='bicubic') ms_lr = torch.zeros(ms.shape) for i in range(ms_wald_.shape[0]): temp = np.copy(np.asarray(torch.squeeze(torch.squeeze(ms_wald_[i, :, :, :]).permute((1, 2, 0))).detach().cpu())) ms_lr[i, :, :, :] = torch.from_numpy(interp23tap_GPU(temp, ratio)).permute((2, 0, 1)) return ms_lr def genMTF_PAN(): channels = 1 mtf_kernel = MTF_PAN(ratio, sensor) MTF_kern = np.moveaxis(mtf_kernel, -1, 0) MTF_kern = np.expand_dims(MTF_kern, axis=1) MTF_kern = torch.from_numpy(MTF_kern).type(torch.float32) # DepthWise-Conv2d definition depthconv = nn.Conv2d(in_channels=channels, out_channels=channels, kernel_size=MTF_kern.shape, groups=channels, padding=20, padding_mode='replicate', bias=False) depthconv.weight.data = MTF_kern depthconv.weight.requires_grad = False pan_down = depthconv(pan) pan_lr = nn.functional.interpolate(pan_down, scale_factor=0.25, mode='bicubic') return pan_lr return genMTF_PAN()#ms_lr, pan_lr ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/BDPN/bdpn_main.py ================================================ # GPL License # Copyright (C) 2021 , UESTC # All Rights Reserved # @Author : Xiao Wu, Ran Ran, LiangJian Deng # @reference: import torch import torch.nn as nn import torch.optim as optim from .model_bdpn import BDPN class SetCriterion(nn.Module): """ This class computes the loss for DETR. The process happens in two steps: 1) we compute hungarian assignment between ground truth boxes and the outputs of the model 2) we supervise each pair of matched ground-truth / prediction (supervise class and box) """ def __init__(self, losses, weight_dict): """ Create the criterion. Parameters: num_classes: n able to compute a matching between targets and proposals weight_dict: dict containing as key the names of the losses and as values their relative weight. eos_coef: relatiumber of object categories, omitting the special no-object category matcher: moduleve classification weight applied to the no-object category losses: list of all the losses to be applied. See get_loss for list of available losses. """ super().__init__() self.weight_dict = weight_dict self.losses = losses self.loss_dicts = {} def forward(self, outputs, targets, *args, **kwargs): """ This performs the loss computation. Parameters: outputs: dict of tensors, see the output specification of the model for the format targets: list of dicts, such that len(targets) == batch_size. The expected keys in each dict depends on the losses applied, see each loss' doc """ # Compute all the requested losses for k in self.losses.keys(): # k, loss = loss_dict if k == 'Loss': loss = self.losses[k] loss_dicts = loss(outputs, targets) if isinstance(loss_dicts, dict): self.loss_dicts.update(loss(outputs, targets)) else: self.loss_dicts.update({k: loss(outputs, targets)}) else: loss = self.losses[k] loss_dicts = loss(outputs, targets, *args) if isinstance(loss_dicts, dict): self.loss_dicts.update(loss(outputs, targets, *args)) else: self.loss_dicts.update({k: loss(outputs, targets, *args)}) return self.loss_dicts from UDL.pansharpening.models import PanSharpeningModel class build_bdpn(PanSharpeningModel, name='BDPN'): def __call__(self, cfg): # important for Pansharpening models, which are from tensorflow code self.reg = cfg.reg scheduler = None if any(["wv" in v for v in cfg.dataset.values()]): spectral_num = 8 else: spectral_num = 4 loss = nn.MSELoss(size_average=True).cuda() ## Define the Loss function weight_dict = {'loss': 1} losses = {'loss': loss} criterion = SetCriterion(losses, weight_dict) model = BDPN(spectral_num, criterion).cuda() optimizer = optim.Adam(model.parameters(), lr=cfg.lr, betas=(0.9, 0.999), weight_decay=1e-5) ## optimizer 1: Adam scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=100, gamma=0.8) # lr = lr* gamma for each step_size = 180 return model, criterion, optimizer, scheduler ################################################################### # ------------------- Main Function (Run first) ------------------- ################################################################### ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/BDPN/loss_utils.py ================================================ import torch import math import numpy as np import torch.nn.functional as F from pytorch_msssim import ssim, ms_ssim, SSIM, MS_SSIM # X: (N,3,H,W) a batch of RGB images with values ranging from 0 to 255. # Y: (N,3,H,W) ssim_val=ssim(X,Y,data_range=255,size_average=False) # return (N,) ms_ssim_val=ms_ssim(X,Y,data_range=255,size_average=False) # #(N,)# or set 'size_average=True' to get a scalar value as loss.ssim_loss=ssim(X,Y,data_range=255,size_average=True) # return a scalar valuems_ssim_loss=ms_ssim(X,Y,data_range=255,size_average=True) # or reuse windows with SSIM & MS_SSIM. ssim_module=SSIM(win_size=11,win_sigma=1.5,data_range=255,size_average=True,channel=3) # ms_ssim_module=MS_SSIM(win_size=11,win_sigma=1.5,data_range=255,size_average=True,channel=3) # ssim_loss=ssim_module(X,Y)ms_ssim_loss=ms_ssim_module(X,Y) # def compute_charbonnier_loss(tensor1, tensor2, is_mean=True): # epsilon = 1e-6 # if is_mean: # loss = tf.reduce_mean(tf.reduce_mean(tf.sqrt(tf.square(tf.subtract(tensor1,tensor2))+epsilon), [1, 2, 3])) # else: # loss = tf.reduce_mean(tf.reduce_sum(tf.sqrt(tf.square(tf.subtract(tensor1,tensor2))+epsilon), [1, 2, 3])) # # return loss def compute_charbonnier_loss(tensor1, tensor2, is_mean=True): epsilon = 1e-6 if is_mean: loss = torch.mean(torch.mean(torch.sqrt(torch.square(torch.sub(tensor1, tensor2))+epsilon), [2, 3, 1])) else: loss = torch.mean(torch.sum(torch.sqrt(torch.square(torch.sub(tensor1, tensor2))+epsilon), [2, 3, 1])) return loss # def compute_ergas_loss(tensor1, tensor2): # epsilon = 1e-8 # rmse = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(tensor1,tensor2)),[1,2])+epsilon) # mean = tf.reduce_mean(tensor2, [1, 2]) # mean = tf.exp(mean) # loss = tf.sqrt(tf.reduce_mean(tf.square(tf.divide(rmse,mean)))+epsilon) # return loss def compute_ergas_loss(tensor1, tensor2): epsilon = 1e-8 rmse = torch.sqrt(torch.mean(torch.square(torch.subtract(tensor1, tensor2)), [2, 3])+epsilon) mean = torch.mean(tensor2, [2, 3]) mean = torch.exp(mean) loss = torch.sqrt(torch.mean(torch.square(torch.divide(rmse, mean)))+epsilon) return loss # def compute_spetral_shift_loss(tensor1, tensor2): # epsilon = 1e-8 # size = (int(int(tensor1.get_shape()[1])/4), int(int(tensor1.get_shape()[2])/4)) # tensor_lr1 = tf.image.resize_images(tensor1, size) # tensor_lr2 = tf.image.resize_images(tensor2, size) # loss = compute_ergas_loss(tensor_lr1, tensor_lr2) # #tf.reduce_mean(tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(tensor_lr1,tensor_lr2)),[1,2])+epsilon)) # return loss def compute_spetral_shift_loss(tensor1, tensor2): epsilon = 1e-8 size = (int(int(tensor1.get_shape()[2])/4), int(int(tensor1.get_shape()[3])/4)) tensor_lr1 = F.interpolate(tensor1, size) tensor_lr2 = F.interpolate(tensor2, size) loss = compute_ergas_loss(tensor_lr1, tensor_lr2) #tf.reduce_mean(tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(tensor_lr1,tensor_lr2)),[1,2])+epsilon)) return loss # def compute_ssim_loss(tensor1, tensor2): # ssim = tf.image.ssim_multiscale(tensor1, tensor2, np.float32(2.0)) # loss = 1 - tf.reduce_mean(ssim) # return loss def compute_ssim_loss(tensor1, tensor2, channel = 8): ssim = MS_SSIM(win_size=11, win_sigma=1.5, data_range=1, size_average=True, channel=channel) loss = 1 - torch.mean(ssim) return loss ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/BDPN/main_train_wv3.py ================================================ # GPL License # Copyright (C) 2021 , UESTC # All Rights Reserved # @Author : Xiao Wu, Ran Ran, LiangJian Deng # @reference: import os import torch import torch.backends.cudnn as cudnn import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torch.utils.data import DataLoader from data_wv3 import Dataset_Pro from model_wv3 import BDPN from torchstat import stat import numpy as np from tensorboardX import SummaryWriter import shutil from loss_utils import compute_charbonnier_loss, compute_ergas_loss os.environ['CUDA_VISIBLE_DEVICES'] = '0' ################################################################### # ------------------- Pre-Define Part---------------------- ################################################################### # ============= 1) Pre-Define =================== # SEED = 10 torch.manual_seed(SEED) torch.cuda.manual_seed(SEED) torch.cuda.manual_seed_all(SEED) # cudnn.benchmark = True ###自动寻找最优算法 cudnn.deterministic = True # ============= 2) HYPER PARAMS(Pre-Defined) ==========# lr = 0.0001 epochs = 1000 ckpt = 50 batch_size = 8 lambda_v = 1.0 lambda_init = 0.05 lambda_declay = 5 model_path = "Weights/wv3/.pth" # ============= 3) Load Model + Loss + Optimizer + Learn_rate_update ==========# model = BDPN().cuda() if os.path.isfile(model_path): model.load_state_dict(torch.load(model_path)) ## Load the pretrained Encoder print('PANnet is Successfully Loaded from %s' % (model_path)) stat(model, input_size=[(8, 16, 16), (1, 64, 64)]) #criterion = nn.MSELoss(size_average=True).cuda() #optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-7) # optimizer 2 optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999), weight_decay=1e-5) # optimizer 1 lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=100, gamma=0.8) # lr = lr* gamma for every step_size(epochs) = 180 # ============= 4) Tensorboard_show + Save_model ==========# #if os.path.exists('train_logs'): # for tensorboard: copy dir of train_logs ## Tensorboard_show: case 1 # shutil.rmtree('train_logs') # ---> console (see tensorboard): tensorboard --logdir = dir of train_logs writer = SummaryWriter('./train_logs') ## Tensorboard_show: case 2 def save_checkpoint(model, epoch): # save model function model_out_path = 'Weights' + '/' + "{}.pth".format(epoch) torch.save(model.state_dict(), model_out_path) ################################################################### # ------------------- Main Train (Run second)---------------------- ################################################################### def train(training_data_loader, validate_data_loader,start_epoch=0): global lambda_v print('Start training...') for epoch in range(start_epoch, epochs, 1): epoch += 1 epoch_train_loss, epoch_val_loss = [], [] if epoch <= 100: lambda_v = 1.0 - lambda_init*(epoch//lambda_declay) # decrease lambda_v for every lambda_declay epochs # ============Epoch Train=============== # model.train() for iteration, batch in enumerate(training_data_loader, 1): gt, ms, pan = batch[0].cuda(), batch[1].cuda(), batch[2].cuda() optimizer.zero_grad() # fixed sr, sr_down = model(ms, pan) # call model: sr=4x64x64; sr_down=4x32x32 gt_down = F.interpolate(gt, scale_factor=0.5, mode='nearest') # nearest down 2 loss1 = compute_charbonnier_loss(sr_down, gt_down) # compute loss1; orig: loss = criterion(sr, gt) loss2 = compute_charbonnier_loss(sr, gt) # compute loss2 loss = lambda_v*loss1 + (1.0 - lambda_v)*loss2 # total loss: epoch_train_loss.append(loss.item()) # save all losses into a vector for one epoch loss.backward() # fixed optimizer.step() # fixed # for name, layer in model.named_parameters(): # writer.add_histogram('torch/'+name + '_grad_weight_decay', layer.grad, epoch*iteration) # writer.add_histogram('net/'+name + '_data_weight_decay', layer, epoch*iteration) lr_scheduler.step() # if update_lr, activate here! t_loss = np.nanmean(np.array(epoch_train_loss)) # compute the mean value of all losses, as one epoch loss writer.add_scalar('mse_loss/t_loss', t_loss, epoch) # write to tensorboard to check print('Epoch: {}/{} training loss (lr={}, lam_v={}): {:.7f}'.format(epochs, epoch, lr_scheduler.get_last_lr(), lambda_v, t_loss)) # print loss for each epoch if epoch % ckpt == 0: # if each ckpt epochs, then start to save model save_checkpoint(model, epoch) # ============Epoch Validate=============== # model.eval() with torch.no_grad(): for iteration, batch in enumerate(validate_data_loader, 1): gt, ms, pan = batch[0].cuda(), batch[1].cuda(), batch[2].cuda() sr, sr_down = model(ms, pan) # call model gt_down = F.interpolate(gt, scale_factor=0.5, mode='nearest') # nearest down 2 loss1 = compute_charbonnier_loss(sr_down, gt_down) # compute loss1; orig: loss = criterion(sr, gt) loss2 = compute_charbonnier_loss(sr, gt) # compute loss2 loss = lambda_v * loss1 + (1.0 - lambda_v) * loss2 epoch_val_loss.append(loss.item()) if epoch % 10 == 0: v_loss = np.nanmean(np.array(epoch_val_loss)) writer.add_scalar('val/v_loss', v_loss, epoch) print(' validate loss: {:.7f}'.format(v_loss)) writer.close() # close tensorboard ################################################################### # ------------------- Main Function (Run first) ------------------- ################################################################### if __name__ == "__main__": train_set = Dataset_Pro('./training_data/train_wv3_10000.h5') # creat data for training training_data_loader = DataLoader(dataset=train_set, num_workers=0, batch_size=batch_size, shuffle=True, pin_memory=True, drop_last=True) # put training data to DataLoader for batches validate_set = Dataset_Pro('./training_data/valid_wv3_10000.h5') # creat data for validation validate_data_loader = DataLoader(dataset=validate_set, num_workers=0, batch_size=batch_size, shuffle=False, pin_memory=True, drop_last=True) # put training data to DataLoader for batches train(training_data_loader, validate_data_loader) # call train function (call: Line 53) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/BDPN/model_bdpn.py ================================================ # GPL License # Copyright (C) 2021 , UESTC # All Rights Reserved # @Author : Xiao Wu, Ran Ran, LiangJian Deng # @reference: import torch import torch.nn as nn import numpy as np import math import torch.nn.init as int import sys # print(sys.path) import torch import torch.nn as nn import math from UDL.Basis.variance_sacling_initializer import variance_scaling_initializer # -------------Initialization---------------------------------------- def init_weights(*modules): for module in modules: for m in module.modules(): if isinstance(m, nn.Conv2d): ## initialization for Conv2d # try: # import tensorflow as tf # tensor = tf.get_variable(shape=m.weight.shape, initializer=tf.variance_scaling_initializer(seed=1)) # m.weight.data = tensor.eval() # except: # print("try error, run variance_scaling_initializer") # variance_scaling_initializer(m.weight) variance_scaling_initializer(m.weight) # method 1: initialization #nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu') # method 2: initialization if m.bias is not None: nn.init.constant_(m.bias, 0.0) elif isinstance(m, nn.BatchNorm2d): ## initialization for BN nn.init.constant_(m.weight, 1.0) nn.init.constant_(m.bias, 0.0) elif isinstance(m, nn.Linear): ## initialization for nn.Linear # variance_scaling_initializer(m.weight) nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0.0) # ---------------------------------------------------- class Resblock(nn.Module): def __init__(self): super(Resblock, self).__init__() channel = 64 self.conv20 = nn.Conv2d(in_channels=channel, out_channels=channel, kernel_size=3, stride=1, padding=1, bias=True) self.conv21 = nn.Conv2d(in_channels=channel, out_channels=channel, kernel_size=3, stride=1, padding=1, bias=True) self.prelu = nn.PReLU(num_parameters = 1, init = 0.2) def forward(self, x): rs1 = self.prelu(self.conv20(x)) # Bsx32x64x64 rs1 = self.conv21(rs1) # Bsx32x64x64 rs = torch.add(x, rs1) # Bsx32x64x64 return rs # ----------------------------------------------------- class BDPN(nn.Module): def __init__(self, spectral_num, criterion, channel=64): super(BDPN, self).__init__() channel1 = channel spectral_num = spectral_num channel2 = 4*spectral_num self.criterion = criterion # ConvTranspose2d: output = (input - 1)*stride + outpading - 2*padding + kernelsize # Conv2d: padding = kernel_size//2 self.conv1 = nn.Conv2d(in_channels=1, out_channels=channel1, kernel_size=3, stride=1, padding=1, bias=True) #self.conv2 = nn.Conv2d(in_channels=channel1, out_channels=channel1, kernel_size=3, stride=1, padding=1, # bias=True) self.res1 = Resblock() self.res2 = Resblock() self.res3 = Resblock() self.res4 = Resblock() self.res5 = Resblock() self.res6 = Resblock() self.res7 = Resblock() self.res8 = Resblock() self.res9 = Resblock() self.res10 = Resblock() self.rres1 = Resblock() self.rres2 = Resblock() self.rres3 = Resblock() self.rres4 = Resblock() self.rres5 = Resblock() self.rres6 = Resblock() self.rres7 = Resblock() self.rres8 = Resblock() self.rres9 = Resblock() self.rres10 = Resblock() self.conv3 = nn.Conv2d(in_channels=channel1, out_channels=spectral_num, kernel_size=3, stride=1, padding=1, bias=True) self.conv4 = nn.Conv2d(in_channels=spectral_num, out_channels=channel2, kernel_size=3, stride=1, padding=1, bias=True) self.conv5 = nn.Conv2d(in_channels=spectral_num, out_channels=channel2, kernel_size=3, stride=1, padding=1, bias=True) self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2) self.pixshuf = nn.PixelShuffle(upscale_factor=2) # out = ps(img) self.prelu = nn.PReLU(num_parameters = 1, init = 0.2) self.backbone = nn.Sequential( # method 2: 4 resnet repeated blocks self.res1, self.res2, self.res3, self.res4, self.res5, self.res6, self.res7, self.res8, self.res9, self.res10 ) self.backbone2 = nn.Sequential( # method 2: 4 resnet repeated blocks self.rres1, self.rres2, self.rres3, self.rres4, self.rres5, self.rres6, self.rres7, self.rres8, self.rres9, self.rres10 ) init_weights(self.backbone, self.backbone2, self.conv1, self.conv3, self.conv4, self.conv5, self.maxpool, self.pixshuf) # state initialization, important! def forward(self, x, y): # x= ms(Nx8x16x16); y = pan(Nx1x64x64) # ========A): pan feature (extraction)=========== # --------pan feature (stage 1:)------------ pan_feature = self.conv1(y) # Nx64x64x64 rs = pan_feature # Nx64x64x64 rs = self.backbone(rs) # Nx64x64x64 pan_feature1 = torch.add(pan_feature, rs) # Bsx64x64x64 pan_feature_level1 = self.conv3(pan_feature1) # Bsx8x64x64 pan_feature1_out = self.maxpool(pan_feature1) # Bsx64x32x32 # --------pan feature (stage 2:)------------ rs = pan_feature1_out # Bsx64x32x32 rs = self.backbone2(rs) # Nx64x32x32, ???? pan_feature2 = torch.add(pan_feature1_out, rs) # Bsx64x32x32 pan_feature_level2 = self.conv3(pan_feature2) # Bsx8x32x32 # ========B): ms feature (extraction)=========== # --------ms feature (stage 1:)------------ ms_feature1 = self.conv4(x) # x= ms(Nx8x16x16); ms_feature1 =Nx32x16x16 ms_feature_up1 = self.pixshuf(ms_feature1) # Nx8x32x32 ms_feature_level1 = torch.add(pan_feature_level2, ms_feature_up1) # Nx8x32x32 # --------ms feature (stage 2:)------------ ms_feature2 = self.conv5(ms_feature_level1) # Nx32x32x32 ms_feature_up2 = self.pixshuf(ms_feature2) # Nx8x64x64 output = torch.add(pan_feature_level1, ms_feature_up2) # Nx8x64x64 return output, ms_feature_level1 def train_step(self, data, *args, **kwargs): log_vars = {} gt, lms, ms, pan = data['gt'].cuda(), data['lms'].cuda(), \ data['ms'].cuda(), data['pan'].cuda() sr, _ = self(ms, pan) loss = self.criterion(sr, gt, *args, **kwargs) # return sr, loss log_vars.update(loss=loss['loss']) return {'loss': loss['loss'], 'log_vars': log_vars} def val_step(self, data, *args, **kwargs): gt, lms, ms, pan = data['gt'].cuda(), data['lms'].cuda(), \ data['ms'].cuda(), data['pan'].cuda() sr, _ = self(ms, pan) return sr, gt if __name__ == '__main__': lms = torch.randn([1, 8, 64, 64]) pan = torch.randn([1, 1, 64, 64]) ms = torch.randn([1, 8, 16, 16]) model = BDPN(8, None) x,_ = model(ms, pan) print(x.shape) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/DRPNN/drpnn_main.py ================================================ # GPL License # Copyright (C) 2021 , UESTC # All Rights Reserved # @Author : Xiao Wu, Ran Ran, LiangJian Deng # @reference: import torch import torch.nn as nn import torch.optim as optim from .model_drpnn import DRPNN class SetCriterion(nn.Module): """ This class computes the loss for DETR. The process happens in two steps: 1) we compute hungarian assignment between ground truth boxes and the outputs of the model 2) we supervise each pair of matched ground-truth / prediction (supervise class and box) """ def __init__(self, losses, weight_dict): """ Create the criterion. Parameters: num_classes: n able to compute a matching between targets and proposals weight_dict: dict containing as key the names of the losses and as values their relative weight. eos_coef: relatiumber of object categories, omitting the special no-object category matcher: moduleve classification weight applied to the no-object category losses: list of all the losses to be applied. See get_loss for list of available losses. """ super().__init__() self.weight_dict = weight_dict self.losses = losses self.loss_dicts = {} def forward(self, outputs, targets, *args, **kwargs): """ This performs the loss computation. Parameters: outputs: dict of tensors, see the output specification of the model for the format targets: list of dicts, such that len(targets) == batch_size. The expected keys in each dict depends on the losses applied, see each loss' doc """ # Compute all the requested losses for k in self.losses.keys(): # k, loss = loss_dict if k == 'Loss': loss = self.losses[k] loss_dicts = loss(outputs, targets) if isinstance(loss_dicts, dict): self.loss_dicts.update(loss(outputs, targets)) else: self.loss_dicts.update({k: loss(outputs, targets)}) else: loss = self.losses[k] loss_dicts = loss(outputs, targets, *args) if isinstance(loss_dicts, dict): self.loss_dicts.update(loss(outputs, targets, *args)) else: self.loss_dicts.update({k: loss(outputs, targets, *args)}) return self.loss_dicts from UDL.pansharpening.models import PanSharpeningModel class build_drpnn(PanSharpeningModel, name='DRPNN'): def __call__(self, cfg): # important for Pansharpening models, which are from tensorflow code self.reg = cfg.reg scheduler = None if any(["wv" in v for v in cfg.dataset.values()]): spectral_num = 8 else: spectral_num = 4 loss = nn.MSELoss(size_average=True).cuda() ## Define the Loss function weight_dict = {'loss': 1} losses = {'loss': loss} criterion = SetCriterion(losses, weight_dict) model = DRPNN(spectral_num, criterion).cuda() optimizer = optim.Adam(model.parameters(), lr=cfg.lr, weight_decay=0) ## optimizer 1: Adam scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=30, gamma=0.5) # lr = lr* gamma for each step_size = 180 return model, criterion, optimizer, scheduler ################################################################### # ------------------- Main Function (Run first) ------------------- ################################################################### ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/DRPNN/model_drpnn.py ================================================ # GPL License # Copyright (C) 2021 , UESTC # All Rights Reserved # @Author : Xiao Wu, Ran Ran, LiangJian Deng # @reference: import torch import torch.nn as nn import numpy as np import math import torch.nn.init as int import torch import torch.nn as nn import math from UDL.Basis.variance_sacling_initializer import variance_scaling_initializer from UDL.pansharpening.models import PanSharpeningModel # -------------Initialization---------------------------------------- class Repeatblock(nn.Module): def __init__(self): super(Repeatblock, self).__init__() channel = 32 # input_channel = self.conv2 = nn.Conv2d(in_channels=channel, out_channels=channel, kernel_size=7, stride=1, padding=3, bias=True) self.relu = nn.ReLU(inplace=True) def forward(self, x): rs = self.relu(self.conv2(x)) return rs class DRPNN(nn.Module): def __init__(self, spectral_num, criterion, channel=32): super(DRPNN, self).__init__() self.criterion = criterion # ConvTranspose2d: output = (input - 1)*stride + outpading - 2*padding + kernelsize self.conv1 = nn.Conv2d(in_channels=spectral_num+1, out_channels=channel, kernel_size=7, stride=1, padding=3, bias=True) self.conv2 = nn.Conv2d(in_channels=channel, out_channels=spectral_num+1, kernel_size=7, stride=1, padding=3, bias=True) self.conv3 = nn.Conv2d(in_channels=spectral_num+1, out_channels=spectral_num, kernel_size=7, stride=1, padding=3, bias=True) self.relu = nn.ReLU(inplace=True) self.backbone = nn.Sequential( # method 2: 4 resnet repeated blocks Repeatblock(), Repeatblock(), Repeatblock(), Repeatblock(), Repeatblock(), Repeatblock(), Repeatblock(), Repeatblock(), ) def forward(self, x, y): # x= lms; y = pan input = torch.cat([x, y], 1) # Bsx9x64x64 rs = self.relu(self.conv1(input)) # Bsx64x64x64 rs = self.backbone(rs) # backbone! Bsx64x64x64 out_res = self.conv2(rs) # Bsx9x64x64 output1 = torch.add(input, out_res) # Bsx9x64x64 output = self.conv3(output1) # Bsx8x64x64 return output def train_step(self, data, *args, **kwargs): log_vars = {} gt, lms, ms, pan = data['gt'].cuda(), data['lms'].cuda(), \ data['ms'].cuda(), data['pan'].cuda() sr = self(lms, pan) loss = self.criterion(sr, gt, *args, **kwargs) # return sr, loss log_vars.update(loss=loss['loss']) return {'loss': loss['loss'], 'log_vars': log_vars} def val_step(self, data, *args, **kwargs): gt, lms, ms, pan = data['gt'].cuda(), data['lms'].cuda(), \ data['ms'].cuda(), data['pan'].cuda() sr = self(lms, pan) return sr, gt # ----------------- End-Main-Part ------------------------------------ if __name__ == '__main__': lms = torch.randn([1, 8, 64, 64]) pan = torch.randn([1, 8, 64, 64]) model = DRPNN(8, None) x = model(lms, pan) print(x.shape) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/DiCNN/dicnn_main.py ================================================ # GPL License # Copyright (C) 2021 , UESTC # All Rights Reserved # @Author : Xiao Wu, LiangJian Deng # @reference: import torch import torch.nn as nn import torch.optim as optim from .model_dicnn import DiCNN class SetCriterion(nn.Module): """ This class computes the loss for DETR. The process happens in two steps: 1) we compute hungarian assignment between ground truth boxes and the outputs of the model 2) we supervise each pair of matched ground-truth / prediction (supervise class and box) """ def __init__(self, losses, weight_dict): """ Create the criterion. Parameters: num_classes: n able to compute a matching between targets and proposals weight_dict: dict containing as key the names of the losses and as values their relative weight. eos_coef: relatiumber of object categories, omitting the special no-object category matcher: moduleve classification weight applied to the no-object category losses: list of all the losses to be applied. See get_loss for list of available losses. """ super().__init__() self.weight_dict = weight_dict self.losses = losses self.loss_dicts = {} def forward(self, outputs, targets, *args, **kwargs): """ This performs the loss computation. Parameters: outputs: dict of tensors, see the output specification of the model for the format targets: list of dicts, such that len(targets) == batch_size. The expected keys in each dict depends on the losses applied, see each loss' doc """ # Compute all the requested losses for k in self.losses.keys(): # k, loss = loss_dict if k == 'Loss': loss = self.losses[k] loss_dicts = loss(outputs, targets) if isinstance(loss_dicts, dict): self.loss_dicts.update(loss(outputs, targets)) else: self.loss_dicts.update({k: loss(outputs, targets)}) else: loss = self.losses[k] loss_dicts = loss(outputs, targets, *args) if isinstance(loss_dicts, dict): self.loss_dicts.update(loss(outputs, targets, *args)) else: self.loss_dicts.update({k: loss(outputs, targets, *args)}) return self.loss_dicts from UDL.pansharpening.models import PanSharpeningModel class build_dicnn(PanSharpeningModel, name='DiCNN1'): def __call__(self, cfg): # important for Pansharpening models, which are from tensorflow code self.reg = cfg.reg scheduler = None if any(["wv" in v for v in cfg.dataset.values()]): spectral_num = 8 else: spectral_num = 4 loss = nn.MSELoss(size_average=True).cuda() ## Define the Loss function weight_dict = {'loss': 1} losses = {'loss': loss} criterion = SetCriterion(losses, weight_dict) model = DiCNN(spectral_num, criterion).cuda() optimizer = optim.Adam(model.parameters(), lr=cfg.lr, weight_decay=0) ## optimizer 1: Adam scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=1500, gamma=0.5) # lr = lr* gamma for each step_size = 180 return model, criterion, optimizer, scheduler ################################################################### # ------------------- Main Function (Run first) ------------------- ################################################################### ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/DiCNN/model_dicnn.py ================================================ # GPL License # Copyright (C) 2021 , UESTC # All Rights Reserved # @Author : Xiao Wu, LiangJian Deng # @reference: import torch import torch.nn as nn import math from UDL.Basis.variance_sacling_initializer import variance_scaling_initializer from UDL.pansharpening.models import PanSharpeningModel # -------------Initialization---------------------------------------- def init_weights(*modules): for module in modules: for m in module.modules(): if isinstance(m, nn.Conv2d): print("nn.Conv2D is initialized by variance_scaling_initializer") variance_scaling_initializer(m.weight) if m.bias is not None: nn.init.constant_(m.bias, 0.0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1.0) nn.init.constant_(m.bias, 0.0) elif isinstance(m, nn.Linear): nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0.0) class DiCNN(nn.Module): def __init__(self, spectral_num, criterion, channel=64, reg=True): super(DiCNN, self).__init__() self.criterion = criterion self.reg = reg # ConvTranspose2d: output = (input - 1)*stride + outpading - 2*padding + kernelsize self.conv1 = nn.Conv2d(in_channels=spectral_num + 1, out_channels=channel, kernel_size=3, stride=1, padding=1, bias=True) self.conv2 = nn.Conv2d(in_channels=channel, out_channels=channel, kernel_size=3, stride=1, padding=1, bias=True) self.conv3 = nn.Conv2d(in_channels=channel, out_channels=spectral_num, kernel_size=3, stride=1, padding=1, bias=True) self.relu = nn.ReLU(inplace=True) self.apply(init_weights) def forward(self, x, y): # x= lms; y = pan input1 = torch.cat([x, y], 1) # Bsx9x64x64 rs = self.relu(self.conv1(input1)) rs = self.relu(self.conv2(rs)) out = self.conv3(rs) output = x + out return output def train_step(self, data, *args, **kwargs): log_vars = {} gt, lms, ms, pan = data['gt'].cuda(), data['lms'].cuda(), \ data['ms'].cuda(), data['pan'].cuda() sr = self(lms, pan) loss = self.criterion(sr, gt, *args, **kwargs) # return sr, loss log_vars.update(loss=loss['loss']) return {'loss': loss['loss'], 'log_vars': log_vars} def val_step(self, data, *args, **kwargs): gt, lms, ms, pan = data['gt'].cuda(), data['lms'].cuda(), \ data['ms'].cuda(), data['pan'].cuda() sr = self(lms, pan) return sr, gt if __name__ == '__main__': lms = torch.randn([1, 8, 64, 64]) pan = torch.randn([1, 8, 64, 64]) model = DiCNN(8, None) x = model(lms, pan) print(x.shape) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/FusionNet/fusionnet_main.py ================================================ import os import torch import torch.backends.cudnn as cudnn import torch.nn as nn import torch.optim as optim from .model_fusionnet import FusionNet import numpy as np class SetCriterion(nn.Module): """ This class computes the loss for DETR. The process happens in two steps: 1) we compute hungarian assignment between ground truth boxes and the outputs of the model 2) we supervise each pair of matched ground-truth / prediction (supervise class and box) """ def __init__(self, losses, weight_dict): """ Create the criterion. Parameters: num_classes: n able to compute a matching between targets and proposals weight_dict: dict containing as key the names of the losses and as values their relative weight. eos_coef: relatiumber of object categories, omitting the special no-object category matcher: moduleve classification weight applied to the no-object category losses: list of all the losses to be applied. See get_loss for list of available losses. """ super().__init__() self.weight_dict = weight_dict self.losses = losses self.loss_dicts = {} def forward(self, outputs, targets, *args, **kwargs): """ This performs the loss computation. Parameters: outputs: dict of tensors, see the output specification of the model for the format targets: list of dicts, such that len(targets) == batch_size. The expected keys in each dict depends on the losses applied, see each loss' doc """ # lms = kwargs.get('lms') # outputs = outputs + lms # outputs: hp_sr # Compute all the requested losses for k in self.losses.keys(): # k, loss = loss_dict if k == 'loss': loss = self.losses[k] loss_dicts = loss(outputs, targets) if isinstance(loss_dicts, dict): self.loss_dicts.update(loss(outputs, targets)) else: self.loss_dicts.update({k: loss(outputs, targets)}) else: loss = self.losses[k] loss_dicts = loss(outputs, targets, *args) if isinstance(loss_dicts, dict): self.loss_dicts.update(loss(outputs, targets, *args)) else: self.loss_dicts.update({k: loss(outputs, targets, *args)}) return self.loss_dicts from UDL.pansharpening.models import PanSharpeningModel class build_fusionnet(PanSharpeningModel, name='FusionNet'): def __call__(self, args): scheduler = None if any(["wv" in v for v in args.dataset.values()]): spectral_num = 8 else: spectral_num = 4 loss = nn.MSELoss(size_average=True).cuda() ## Define the Loss function weight_dict = {'loss': 1} losses = {'loss': loss} criterion = SetCriterion(losses, weight_dict) model = FusionNet(spectral_num, criterion).cuda() optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0) ## optimizer 1: Adam return model, criterion, optimizer, scheduler ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/FusionNet/model_fusionnet.py ================================================ # This is a pytorch version for the work of PanNet # YW Jin, X Wu, LJ Deng(UESTC); # 2020-09; import torch import torch.nn as nn import numpy as np import math import torch.nn.init as int from UDL.Basis.variance_sacling_initializer import variance_scaling_initializer class loss_with_l2_regularization(nn.Module): def __init__(self): super(loss_with_l2_regularization, self).__init__() def forward(self, criterion, model, weight_decay=1e-5, flag=False): regularizations = [] for k, v in model.named_parameters(): if 'conv' in k and 'weight' in k: # print(k) penality = weight_decay * ((v.data ** 2).sum() / 2) regularizations.append(penality) if flag: print("{} : {}".format(k, penality)) # r = torch.sum(regularizations) loss = criterion + sum(regularizations) return loss # -------------Initialization---------------------------------------- def init_weights(*modules): for module in modules: for m in module.modules(): if isinstance(m, nn.Conv2d): ## initialization for Conv2d print("initial nn.Conv2d with var_scale_new: ", m) # try: # import tensorflow as tf # tensor = tf.get_variable(shape=m.weight.shape, initializer=tf.variance_scaling_initializer(seed=1)) # m.weight.data = tensor.eval() # except: # print("try error, run variance_scaling_initializer") # variance_scaling_initializer(m.weight) variance_scaling_initializer(m.weight) # method 1: initialization # nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu') # method 2: initialization if m.bias is not None: nn.init.constant_(m.bias, 0.0) elif isinstance(m, nn.BatchNorm2d): ## initialization for BN nn.init.constant_(m.weight, 1.0) nn.init.constant_(m.bias, 0.0) elif isinstance(m, nn.Linear): ## initialization for nn.Linear # variance_scaling_initializer(m.weight) nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0.0) # -------------ResNet Block (One)---------------------------------------- class Resblock(nn.Module): def __init__(self): super(Resblock, self).__init__() channel = 32 self.conv20 = nn.Conv2d(in_channels=channel, out_channels=channel, kernel_size=3, stride=1, padding=1, bias=True) self.conv21 = nn.Conv2d(in_channels=channel, out_channels=channel, kernel_size=3, stride=1, padding=1, bias=True) self.relu = nn.ReLU(inplace=True) def forward(self, x): # x= hp of ms; y = hp of pan rs1 = self.relu(self.conv20(x)) # Bsx32x64x64 rs1 = self.conv21(rs1) # Bsx32x64x64 rs = torch.add(x, rs1) # Bsx32x64x64 return rs class FusionNet(nn.Module): def __init__(self, spectral_num, criterion, channel=32): super(FusionNet, self).__init__() # ConvTranspose2d: output = (input - 1)*stride + outpading - 2*padding + kernelsize self.spectral_num = spectral_num self.criterion = criterion self.conv1 = nn.Conv2d(in_channels=spectral_num, out_channels=channel, kernel_size=3, stride=1, padding=1, bias=True) self.res1 = Resblock() self.res2 = Resblock() self.res3 = Resblock() self.res4 = Resblock() self.conv3 = nn.Conv2d(in_channels=channel, out_channels=spectral_num, kernel_size=3, stride=1, padding=1, bias=True) self.relu = nn.ReLU(inplace=True) self.backbone = nn.Sequential( # method 2: 4 resnet repeated blocks self.res1, self.res2, self.res3, self.res4 ) # init_weights(self.backbone, self.conv1, self.conv3) # state initialization, important! # self.apply(init_weights) def forward(self, x, y): # x= lms; y = pan pan_concat = y.repeat(1, self.spectral_num, 1, 1) # Bsx8x64x64 input = torch.sub(pan_concat, x) # Bsx8x64x64 rs = self.relu(self.conv1(input)) # Bsx32x64x64 rs = self.backbone(rs) # ResNet's backbone! output = self.conv3(rs) # Bsx8x64x64 return output # lms + outs def train_step(self, data, *args, **kwargs): log_vars = {} gt, lms, ms, pan = data['gt'].cuda(), data['lms'].cuda(), \ data['ms'].cuda(), data['pan'].cuda() res = self(lms, pan) sr = lms + res # output:= lms + hp_sr loss = self.criterion(sr, gt, *args, **kwargs)['loss'] # outputs = loss # return loss log_vars.update(pan2ms=loss.item(), loss=loss.item()) metrics = {'loss': loss, 'log_vars': log_vars} return metrics def val_step(self, data, *args, **kwargs): # gt, lms, ms, pan = data gt, lms, ms, pan = data['gt'].cuda(), data['lms'].cuda(), \ data['ms'].cuda(), data['pan'].cuda() res = self(lms, pan) sr = lms + res # output:= lms + hp_sr return sr, gt ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/FusionNet/run_fusionnet.py ================================================ from UDL.Basis.config import Config from UDL.pansharpening.common.main_pansharpening import main from UDL.Basis.auxiliary import set_random_seed from UDL.pansharpening.models.FusionNet.option_fusionnet import cfg as args from UDL.pansharpening.models.FusionNet.fusionnet_main import build_fusionnet as builder if __name__ == '__main__': # cfg = Config.fromfile("../pansharpening/DCFNet/option_DCFNet.py") set_random_seed(args.seed) # print(cfg.builder) args.builder = builder main(args) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/MSDCNN/model_msdcnn.py ================================================ # GPL License # Copyright (C) 2021 , UESTC # All Rights Reserved # @Author : Xiao Wu, Ran Ran, LiangJian Deng # @reference: import torch import torch.nn as nn import numpy as np import math import torch.nn.init as int # import sys # sys.path.append('/home/office-401-remote/桌面/Machine Learning/RanRan') # print(sys.path) import torch import torch.nn as nn import math from UDL.Basis.variance_sacling_initializer import variance_scaling_initializer from UDL.pansharpening.models import PanSharpeningModel class MSDCNN(nn.Module): def __init__(self, spectral_num, criterion, channel=64): super(MSDCNN, self).__init__() self.criterion = criterion input_channel = spectral_num + 1 output_channel = spectral_num self.conv1 = nn.Conv2d(in_channels=input_channel, out_channels=60, kernel_size=7, stride=1, padding=3, bias=True) self.conv2_1 = nn.Conv2d(in_channels=60, out_channels=20, kernel_size=3, stride=1, padding=1, bias=True) self.conv2_2 = nn.Conv2d(in_channels=60, out_channels=20, kernel_size=5, stride=1, padding=2, bias=True) self.conv2_3 = nn.Conv2d(in_channels=60, out_channels=20, kernel_size=7, stride=1, padding=3, bias=True) self.conv3 = nn.Conv2d(in_channels=60, out_channels=30, kernel_size=3, stride=1, padding=1, bias=True) self.conv4_1 = nn.Conv2d(in_channels=30, out_channels=10, kernel_size=3, stride=1, padding=1, bias=True) self.conv4_2 = nn.Conv2d(in_channels=30, out_channels=10, kernel_size=5, stride=1, padding=2, bias=True) self.conv4_3 = nn.Conv2d(in_channels=30, out_channels=10, kernel_size=7, stride=1, padding=3, bias=True) self.conv5 = nn.Conv2d(in_channels=30, out_channels=output_channel, kernel_size=5, stride=1, padding=2, bias=True) self.shallow1 = nn.Conv2d(in_channels=input_channel, out_channels=64, kernel_size=9, stride=1, padding=4, bias=True) self.shallow2 = nn.Conv2d(in_channels=64, out_channels=32, kernel_size=1, stride=1, padding=0, bias=True) self.shallow3 = nn.Conv2d(in_channels=32, out_channels=output_channel, kernel_size=5, stride=1, padding=2, bias=True) self.relu = nn.ReLU(inplace=True) def forward(self, x, y): # x: lms; y: pan concat = torch.cat([x, y], 1) # Bsx9x64x64 out1 = self.relu(self.conv1(concat)) # Bsx60x64x64 out21 = self.conv2_1(out1) # Bsx20x64x64 out22 = self.conv2_2(out1) # Bsx20x64x64 out23 = self.conv2_3(out1) # Bsx20x64x64 out2 = torch.cat([out21, out22, out23], 1) # Bsx60x64x64 out2 = self.relu(torch.add(out2, out1)) # Bsx60x64x64 out3 = self.relu(self.conv3(out2)) # Bsx30x64x64 out41 = self.conv4_1(out3) # Bsx10x64x64 out42 = self.conv4_2(out3) # Bsx10x64x64 out43 = self.conv4_3(out3) # Bsx10x64x64 out4 = torch.cat([out41, out42, out43], 1) # Bsx30x64x64 out4 = self.relu(torch.add(out4, out3)) # Bsx30x64x64 out5 = self.conv5(out4) # Bsx8x64x64 shallow1 = self.relu(self.shallow1(concat)) # Bsx64x64x64 shallow2 = self.relu(self.shallow2(shallow1)) # Bsx32x64x64 shallow3 = self.shallow3(shallow2) # Bsx8x64x64 out = torch.add(out5, shallow3) # Bsx8x64x64 out = self.relu(out) # Bsx8x64x64 return out def train_step(self, data, *args, **kwargs): log_vars = {} gt, lms, ms, pan = data['gt'].cuda(), data['lms'].cuda(), \ data['ms'].cuda(), data['pan'].cuda() sr = self(lms, pan) loss = self.criterion(sr, gt, *args, **kwargs) # return sr, loss log_vars.update(loss=loss['loss']) return {'loss': loss['loss'], 'log_vars': log_vars} def val_step(self, data, *args, **kwargs): gt, lms, ms, pan = data['gt'].cuda(), data['lms'].cuda(), \ data['ms'].cuda(), data['pan'].cuda() sr = self(lms, pan) return sr, gt if __name__ == '__main__': lms = torch.randn([1, 8, 64, 64]) pan = torch.randn([1, 1, 64, 64]) ms = torch.randn([1, 8, 16, 16]) model = BDPN(8, None) x,_ = model(ms, pan) print(x.shape) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/MSDCNN/msdcnn_main.py ================================================ # GPL License # Copyright (C) 2021 , UESTC # All Rights Reserved # @Author : Xiao Wu, Ran Ran, LiangJian Deng # @reference: import torch import torch.nn as nn import torch.optim as optim from .model_msdcnn import MSDCNN class SetCriterion(nn.Module): """ This class computes the loss for DETR. The process happens in two steps: 1) we compute hungarian assignment between ground truth boxes and the outputs of the model 2) we supervise each pair of matched ground-truth / prediction (supervise class and box) """ def __init__(self, losses, weight_dict): """ Create the criterion. Parameters: num_classes: n able to compute a matching between targets and proposals weight_dict: dict containing as key the names of the losses and as values their relative weight. eos_coef: relatiumber of object categories, omitting the special no-object category matcher: moduleve classification weight applied to the no-object category losses: list of all the losses to be applied. See get_loss for list of available losses. """ super().__init__() self.weight_dict = weight_dict self.losses = losses self.loss_dicts = {} def forward(self, outputs, targets, *args, **kwargs): """ This performs the loss computation. Parameters: outputs: dict of tensors, see the output specification of the model for the format targets: list of dicts, such that len(targets) == batch_size. The expected keys in each dict depends on the losses applied, see each loss' doc """ # Compute all the requested losses for k in self.losses.keys(): # k, loss = loss_dict if k == 'Loss': loss = self.losses[k] loss_dicts = loss(outputs, targets) if isinstance(loss_dicts, dict): self.loss_dicts.update(loss(outputs, targets)) else: self.loss_dicts.update({k: loss(outputs, targets)}) else: loss = self.losses[k] loss_dicts = loss(outputs, targets, *args) if isinstance(loss_dicts, dict): self.loss_dicts.update(loss(outputs, targets, *args)) else: self.loss_dicts.update({k: loss(outputs, targets, *args)}) return self.loss_dicts from UDL.pansharpening.models import PanSharpeningModel class build_msdcnn(PanSharpeningModel, name='MSDCNN'): def __call__(self, cfg): # important for Pansharpening models, which are from tensorflow code self.reg = cfg.reg scheduler = None if any(["wv" in v for v in cfg.dataset.values()]): spectral_num = 8 else: spectral_num = 4 loss = nn.MSELoss(size_average=True).cuda() ## Define the Loss function weight_dict = {'loss': 1} losses = {'loss': loss} criterion = SetCriterion(losses, weight_dict) model = MSDCNN(spectral_num, criterion).cuda() optimizer = optim.Adam(model.parameters(), lr=cfg.lr, weight_decay=1e-5) ## optimizer 1: Adam scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=40, gamma=0.5) # <=> lr = opt.lr * (0.5 ** (epoch // opt.step)) return model, criterion, optimizer, scheduler ################################################################### # ------------------- Main Function (Run first) ------------------- ################################################################### ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/PNN/model_pnn.py ================================================ import torch import torch.nn as nn from torch.nn import functional as F import math # from UDL.Basis.variance_sacling_initializer import variance_scaling_initializer class PNN(nn.Module): def __init__(self, spectral_num, criterion, channel=64): super(PNN, self).__init__() self.criterion = criterion # ConvTranspose2d: output = (input - 1)*stride + outpading - 2*padding + kernelsize self.conv1 = nn.Conv2d(in_channels=spectral_num + 1, out_channels=channel, kernel_size=9, stride=1, bias=True) self.conv2 = nn.Conv2d(in_channels=channel, out_channels=32, kernel_size=5, stride=1, bias=True) self.conv3 = nn.Conv2d(in_channels=32, out_channels=spectral_num, kernel_size=5, stride=1, bias=True) self.relu = nn.ReLU(inplace=True) # init_weights(self.conv1, self.conv2, self.conv3) def forward(self, x): # x = cat(lms,pan) input1 = x # Bsx9x64x64 rs = self.relu(self.conv1(input1)) rs = self.relu(self.conv2(rs)) output = self.conv3(rs) return output def train_step(self, data, *args, **kwargs): log_vars = {} gt, lms, ms, pan = data['gt'].cuda(), data['lms'].cuda(), \ data['ms'].cuda(), data['pan'].cuda() blk = self.blk gt = gt[:, :, blk:-blk, blk:-blk] lms = torch.cat([lms, pan], dim=1) sr = self(lms) loss = self.criterion(sr, gt, *args, **kwargs) # return sr, loss log_vars.update(loss=loss['loss']) return {'loss': loss['loss'], 'log_vars': log_vars} def val_step(self, data, *args, **kwargs): blk = self.blk gt, lms, ms, pan = data['gt'].cuda(), data['lms'].cuda(), \ data['ms'].cuda(), data['pan'].cuda() test_I_in1 = torch.cat([lms, pan], dim=1) test_I_in1 = F.pad(test_I_in1, (blk, blk, blk, blk), mode='replicate') sr = self(test_I_in1) return sr, gt @classmethod def set_blk(cls, blk): cls.blk = blk # ----------------- End-Main-Part ------------------------------------ ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/PNN/pnn_main.py ================================================ import torch.nn as nn import torch.optim as optim from .model_pnn import PNN import numpy as np class SetCriterion(nn.Module): """ This class computes the loss for DETR. The process happens in two steps: 1) we compute hungarian assignment between ground truth boxes and the outputs of the model 2) we supervise each pair of matched ground-truth / prediction (supervise class and box) """ def __init__(self, losses, weight_dict): """ Create the criterion. Parameters: num_classes: n able to compute a matching between targets and proposals weight_dict: dict containing as key the names of the losses and as values their relative weight. eos_coef: relatiumber of object categories, omitting the special no-object category matcher: moduleve classification weight applied to the no-object category losses: list of all the losses to be applied. See get_loss for list of available losses. """ super().__init__() self.weight_dict = weight_dict self.losses = losses self.loss_dicts = {} def forward(self, outputs, targets, *args, **kwargs): """ This performs the loss computation. Parameters: outputs: dict of tensors, see the output specification of the model for the format targets: list of dicts, such that len(targets) == batch_size. The expected keys in each dict depends on the losses applied, see each loss' doc """ # Compute all the requested losses for k in self.losses.keys(): # k, loss = loss_dict if k == 'Loss': loss = self.losses[k] loss_dicts = loss(outputs, targets) if isinstance(loss_dicts, dict): self.loss_dicts.update(loss(outputs, targets)) else: self.loss_dicts.update({k: loss(outputs, targets)}) else: loss = self.losses[k] loss_dicts = loss(outputs, targets, *args) if isinstance(loss_dicts, dict): self.loss_dicts.update(loss(outputs, targets, *args)) else: self.loss_dicts.update({k: loss(outputs, targets, *args)}) return self.loss_dicts from UDL.pansharpening.models import PanSharpeningModel class build_pnn(PanSharpeningModel, name='PNN'): def __call__(self, cfg): # important for Pansharpening models, which are from tensorflow code self.reg = cfg.reg scheduler = None if any(["wv" in v for v in cfg.dataset.values()]): spectral_num = 8 else: spectral_num = 4 lr = 0.0001 * 17 * 17 * spectral_num cfg.lr = lr print(f"PNN adopted another lr: {lr} in \"build_pnn in pnn_main.py\" ") loss = nn.MSELoss(size_average=True).cuda() ## Define the Loss function weight_dict = {'loss': 1} losses = {'loss': loss} criterion = SetCriterion(losses, weight_dict) model = PNN(spectral_num, criterion).cuda() target_layerParam = list(map(id, model.conv3.parameters())) base_layerParam = filter(lambda p: id(p) not in target_layerParam, model.parameters()) training_parameters = [{'params': model.conv3.parameters(), 'lr': lr / 10}, {'params': base_layerParam}] optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9) ## optimizer 2: SGD net_scope = 0 for name, layer in model.named_parameters(): if 'conv' in name and 'bias' not in name: net_scope += layer.shape[-1] - 1 net_scope = np.sum(net_scope) + 1 blk = net_scope // 2 # 8 model.set_blk(blk) return model, criterion, optimizer, scheduler ################################################################### # ------------------- Main Function (Run first) ------------------- ################################################################### ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/PanNet/model_pannet.py ================================================ # GPL License # Copyright (C) 2021 , UESTC # All Rights Reserved # @Author : Xiao Wu, LiangJian Deng # @reference: import torch import torch.nn as nn import numpy as np import math import torch.nn.init as int from UDL.Basis.variance_sacling_initializer import variance_scaling_initializer from UDL.pansharpening.models import PanSharpeningModel # -------------Initialization---------------------------------------- def init_weights(*modules): for module in modules: for m in module.modules(): if isinstance(m, nn.Conv2d): ## initialization for Conv2d print("nn.Conv2D is initialized by variance_scaling_initializer") variance_scaling_initializer(m.weight) # method 1: initialization # nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu') # method 2: initialization if m.bias is not None: nn.init.constant_(m.bias, 0.0) elif isinstance(m, nn.BatchNorm2d): ## initialization for BN nn.init.constant_(m.weight, 1.0) nn.init.constant_(m.bias, 0.0) elif isinstance(m, nn.Linear): ## initialization for nn.Linear # variance_scaling_initializer(m.weight) nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0.0) # -------------ResNet Block (One)---------------------------------------- class Resblock(nn.Module): def __init__(self): super(Resblock, self).__init__() channel = 32 self.conv20 = nn.Conv2d(in_channels=channel, out_channels=channel, kernel_size=3, stride=1, padding=1, bias=True) self.conv21 = nn.Conv2d(in_channels=channel, out_channels=channel, kernel_size=3, stride=1, padding=1, bias=True) self.relu = nn.ReLU(inplace=True) def forward(self, x): # x= hp of ms; y = hp of pan rs1 = self.relu(self.conv20(x)) # Bsx32x64x64 rs1 = self.conv21(rs1) # Bsx32x64x64 rs = torch.add(x, rs1) # Bsx32x64x64 return rs # ----------------------------------------------------- class PanNet(nn.Module): def __init__(self, spectral_num, criterion, channel=32, reg=True): super(PanNet, self).__init__() self.criterion = criterion self.reg = reg # ConvTranspose2d: output = (input - 1)*stride + outpading - 2*padding + kernelsize self.deconv = nn.ConvTranspose2d(in_channels=spectral_num, out_channels=spectral_num, kernel_size=8, stride=4, padding=2, bias=True) self.conv1 = nn.Conv2d(in_channels=spectral_num + 1, out_channels=channel, kernel_size=3, stride=1, padding=1, bias=True) self.res1 = Resblock() self.res2 = Resblock() self.res3 = Resblock() self.res4 = Resblock() self.conv3 = nn.Conv2d(in_channels=channel, out_channels=spectral_num, kernel_size=3, stride=1, padding=1, bias=True) self.relu = nn.ReLU(inplace=True) self.backbone = nn.Sequential( # method 2: 4 resnet repeated blocks self.res1, self.res2, self.res3, self.res4 ) self.apply(init_weights) # init_weights(self.backbone, self.deconv, self.conv1, self.conv3) # state initialization, important! def forward(self, x, y):# x= hp of ms; y = hp of pan output_deconv = self.deconv(x) input = torch.cat([output_deconv, y], 1) # Bsx9x64x64 rs = self.relu(self.conv1(input)) # Bsx32x64x64 rs = self.backbone(rs) # ResNet's backbone! output = self.conv3(rs) # Bsx8x64x64 return output def train_step(self, data, *args, **kwargs): log_vars = {} gt, lms, ms_hp, pan_hp = data['gt'].cuda(), data['lms'].cuda(), \ data['ms_hp'].cuda(), data['pan_hp'].cuda() hp_sr = self(ms_hp, pan_hp) sr = lms + hp_sr # output:= lms + hp_sr loss = self.criterion(sr, gt, *args, **kwargs) # return sr, loss log_vars.update(loss=loss['loss']) return {'loss': loss['loss'], 'log_vars': log_vars} def val_step(self, data, *args, **kwargs): # gt, lms, ms, pan = data gt, lms, ms_hp, pan_hp = data['gt'].cuda(), data['lms'].cuda(), \ data['ms'].cuda(), data['pan'].cuda() hp_sr = self(ms_hp, pan_hp) sr = lms + hp_sr # output:= lms + hp_sr return sr, gt # ----------------- End-Main-Part ------------------------------------ ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/PanNet/pannet_main.py ================================================ ''' [Flops]: ConvTranspose2d is not supported! [Memory]: ConvTranspose2d is not supported! =============================================================================================================================================== Total params: 83,024 ----------------------------------------------------------------------------------------------------------------------------------------------- Total memory: 7.25MB Total MAdd: 646.84MMAdd Total Flops: 323.91MFlops Total MemR+W: 14.57MB ''' import torch.nn as nn import torch.optim as optim from .model_pannet import PanNet class SetCriterion(nn.Module): """ This class computes the loss for DETR. The process happens in two steps: 1) we compute hungarian assignment between ground truth boxes and the outputs of the model 2) we supervise each pair of matched ground-truth / prediction (supervise class and box) """ def __init__(self, losses, weight_dict): """ Create the criterion. Parameters: num_classes: n able to compute a matching between targets and proposals weight_dict: dict containing as key the names of the losses and as values their relative weight. eos_coef: relatiumber of object categories, omitting the special no-object category matcher: moduleve classification weight applied to the no-object category losses: list of all the losses to be applied. See get_loss for list of available losses. """ super().__init__() self.weight_dict = weight_dict self.losses = losses self.loss_dicts = {} def forward(self, outputs, targets, *args, **kwargs): """ This performs the loss computation. Parameters: outputs: dict of tensors, see the output specification of the model for the format targets: list of dicts, such that len(targets) == batch_size. The expected keys in each dict depends on the losses applied, see each loss' doc """ # Compute all the requested losses for k in self.losses.keys(): # k, loss = loss_dict if k == 'Loss': loss = self.losses[k] loss_dicts = loss(outputs, targets) if isinstance(loss_dicts, dict): self.loss_dicts.update(loss(outputs, targets)) else: self.loss_dicts.update({k: loss(outputs, targets)}) else: loss = self.losses[k] loss_dicts = loss(outputs, targets, *args) if isinstance(loss_dicts, dict): self.loss_dicts.update(loss(outputs, targets, *args)) else: self.loss_dicts.update({k: loss(outputs, targets, *args)}) return self.loss_dicts from UDL.pansharpening.models import PanSharpeningModel class build_pannet(PanSharpeningModel, name='PanNet'): def __call__(self, cfg): if not all(['hp' in name for name in list(cfg.dataset.values())]): raise ValueError(f"{cfg.dataset} is wrong for PanNet, you need high-pass filter dataset.") # important for Pansharpening models, which are from tensorflow code self.reg = cfg.reg scheduler = None if any(["wv" in v for v in cfg.dataset.values()]): spectral_num = 8 else: spectral_num = 4 loss = nn.MSELoss(size_average=True).cuda() ## Define the Loss function weight_dict = {'loss': 1} losses = {'loss': loss} criterion = SetCriterion(losses, weight_dict) model = PanNet(spectral_num, criterion).cuda() optimizer = optim.Adam(model.parameters(), lr=cfg.lr, weight_decay=0) ## optimizer 1: Adam return model, criterion, optimizer, scheduler ################################################################### # ------------------- Main Function (Run first) ------------------- ################################################################### ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/__init__.py ================================================ from UDL.AutoDL import PanSharpeningModel from .DiCNN.dicnn_main import build_dicnn, DiCNN from .FusionNet.fusionnet_main import build_fusionnet, FusionNet from .PNN.pnn_main import build_pnn, PNN from .PanNet.pannet_main import build_pannet, PanNet from .DRPNN.drpnn_main import build_drpnn, DRPNN from .BDPN.bdpn_main import build_bdpn, BDPN from .MSDCNN.msdcnn_main import build_msdcnn, MSDCNN ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/run_pansharpening.py ================================================ # GPL License # Copyright (C) UESTC # All Rights Reserved # @Author : Xiao Wu, LiangJian Deng # @reference: import sys sys.path.append('../..') from UDL.AutoDL import TaskDispatcher from UDL.AutoDL.trainer import main if __name__ == '__main__': cfg = TaskDispatcher.new(task='pansharpening', mode='entrypoint', arch='FusionNet') print(TaskDispatcher._task.keys()) main(cfg) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/run_test_pansharpening.py ================================================ # GPL License # Copyright (C) UESTC # All Rights Reserved # @Author : Xiao Wu, LiangJian Deng # @reference: import sys sys.path.append('../..') from UDL.AutoDL import TaskDispatcher from UDL.AutoDL.trainer import main if __name__ == '__main__': cfg = TaskDispatcher.new(task='pansharpening', mode='entrypoint', arch='MSDCNN') # cfg.resume_from = "../pretrained-model/WV3/pannet.pth" cfg.eval = True cfg.workflow = [('val', 1)] print(TaskDispatcher._task.keys()) main(cfg) ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pretrained-model/QB/readme.txt ================================================ none ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pretrained-model/WV2/readme.txt ================================================ none ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/pretrained-model/WV4/readme.txt ================================================ none ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/readme.md ================================================ test ================================================ FILE: 01-DL-toolbox(Pytorch)/UDL/results/readme.txt ================================================ ================================================ FILE: 01-DL-toolbox(Pytorch)/readme.md ================================================ # DL toolbox "DL toolbox" for Remote Sensing Pansharpening [English]([https://github.com/XiaoXiao-Woo/PanCollection/edit/dev/README.md](https://github.com/liangjiandeng/DLPan-Toolbox/edit/main/01-DL-toolbox(Pytorch)/readme.md)) | [简体中文](https://github.com.md) This repository is the official PyTorch implementation of our IEEE GRSM paper “Machine Learning in Pansharpening: A Benchmark, from Shallow to Deep Networks”, 2022 ([paper](https://github.com/liangjiandeng/liangjiandeng.github.io/tree/master/papers/2022/review-grsm2022.pdf) | [homepage](https://github.com/liangjiandeng/DLPan-Toolbox)). ## Features ## Requirements * Python3.7+, Pytorch>=1.6.0 * NVIDIA GPU + CUDA * Run `python setup.py develop` Note: Our project is based on MMCV, but you needn't to install it currently. ## Quick Start **Step0. Set your Python environment.** >git clone https://github.com/liangjiandeng/DLPan-Toolbox/tree/main/01-DL-toolbox(Pytorch) Then, > python setup.py develop **Step1. Put datasets and set path** * Put datasets (WorldView-3, QuickBird, GaoFen2, WorldView2) into the `UDL/Data/pansharpening`, see following path structure. ``` |-$ROOT/Data ├── pansharpening │   ├── training_data │   │   ├── train_wv3.h5 │   │   ├── ... │   ├── validation_data │   │   │   ├── valid_wv3.h5 │   │   │   ├── ... │   ├── test_data │   │   ├── WV3 │   │   │   ├── NY1_WV3_RR.mat │   │   │   ├── ... │   │   │   ├── ... ``` * Check and revise your dataset path in `01-DL-toolbox(Pytorch)/UDL/Basis/option.py` (line 100 or line 102, may not need to revise); Or, you can print the output of `run_pansharpening.py`, then set __cfg.data_dir__ (also line 100 or line 102) to your dataset path. **Step2. How to train?** > open `01-DL-toolbox(Pytorch)/UDL/pansharpening` > run `python run_pansharpening.py` for training > if you want to change the network, you could: 1) revise arch='BDPN' in the following codes to other network's name, e.g., arch='xxx'; ```python import sys sys.path.append('../..') from UDL.AutoDL import TaskDispatcher from UDL.AutoDL.trainer import main if __name__ == '__main__': cfg = TaskDispatcher.new(task='pansharpening', mode='entrypoint', arch='BDPN') print(TaskDispatcher._task.keys()) main(cfg) ``` 2) revise the corresponding setting in `pansharpening/configs/option_bdpn.py`, e.g., hyperparameters, validation data ```python cfg.eval = False, cfg.workflow = [('train', 50), ('val', 1)], cfg.dataset = {'train': 'wv3', 'val': 'valid_wv3.h5'} ``` **Step3. How to test?** > open `01-DL-toolbox(Pytorch)/UDL/pansharpening` > run `run_test_pansharpening.py` for testing > Note you need to ensure `cfg.eval = True` or `cfg.workflow = [('val', 1)]` in the following `run_test_pansharpening.py` to run ```python import sys sys.path.append('../..') from UDL.AutoDL import TaskDispatcher from UDL.AutoDL.trainer import main if __name__ == '__main__': cfg = TaskDispatcher.new(task='pansharpening', mode='entrypoint', arch='MSDCNN') cfg.eval = True cfg.workflow = [('val', 1)] print(TaskDispatcher._task.keys()) main(cfg) ``` > How to get test outcome using the pretrained models? 1) find the given one example (i.e., `NY1_WV3_RR.mat`) in the path `UDL/Data/pansharpening/test_data`; 2) load pretrained model by setting __model_path__ = "your_model_path" located in the folder of `pansharpening/configs/option_bdpn.py` (line 15); Or __cfg.resume_from__ = "your_model_path" (line 31). 3) run `run_test_pansharpening.py`, then you may find the test results in the folder of `UDL/results` ## FAQ **Q1.** How to customize your new network/model in this framework? > 1) Construct your model, loss, optimizer, scheduler in `UDL/pansharpening/models/modelName/modelName_main.py` (you need to create your modelName in `modelName_main.py`, i.e., the similar operation as other methods in the path). > 2) Update `UDL/pansharpening/models/__init__.py` > 3) Add `option_modelName.py` in `UDL/pansharpening/configs/Option_modelName.py`, and configure your hyperparameters in this file (see other methods' configuration in `UDL/pansharpening/configs` for easy usage). > 4) train your model and infer your results, see __step2__ and __step3__ for details. > 5) save your model early, add or change `cfg.save_freq_print` and `cfg.save_top_k` in `UDL/pansharpening/configs/Option_modelName.py`. We set the default for it starting at epoch 5 and save models every 10 epochs. **Q2.** How to customize your datasets? You need to update: `UDL/pansharpening/common/psdata.py` (revise/add lines 24-29 to customize your datasets). **Q3.** How to customized training settings, such as saving model, recording logs, etc.? You need to update: `UDL/mmcv/mmcv/runner/hooks` (generally, it does not need to revise if you do not require more complicated training settings). **Q4.** How to know more details of runner about how to train/test in `UDL/AutoDL/trainer.py`? Please see `UDL/mmcv/mmcv/runner/epoch_based_runner.py`. **Note:** Don't put any files into the folder of AutoDL. ## Citation * If you use this toolbox, please kindly cite our paper: ```bibtex @ARTICLE{deng2022grsm, author={L.-J. Deng, G. Vivone, M. E. Paoletti, G. Scarpa, J. He, Y. Zhang, J. Chanussot, and A. Plaza}, booktitle={IEEE Geoscience and Remote Sensing Magazine}, title={Machine Learning in Pansharpening: A Benchmark, from Shallow to Deep Networks}, year={2022}, pages={}, } ``` * Also, the codes of traditional methods are from the "pansharpening toolbox for distribution", thus please cite the corresponding paper: ```bibtex @ARTICLE{vivone2021grsm, author={Vivone, Gemine and Dalla Mura, Mauro and Garzelli, Andrea and Restaino, Rocco and Scarpa, Giuseppe and Ulfarsson, Magnus O. and Alparone, Luciano and Chanussot, Jocelyn}, journal={IEEE Geoscience and Remote Sensing Magazine}, title={A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting Pansharpening With Classical and Emerging Pansharpening Methods}, year={2021}, volume={9}, number={1}, pages={53-81}, doi={10.1109/MGRS.2020.3019315} } ``` ## Acknowledgement - [MMCV](https://github.com/open-mmlab/mmcv): OpenMMLab foundational library for computer vision. - We appreciate the great contribution of [Xiao Wu](https://xiaoxiao-woo.github.io/) who is a graduate student in [UESTC](https://www.uestc.edu.cn/) to this toolbox. ## Contribution We appreciate all contributions to improving '01-DL-toolbox(Pytorch)'. Looking forward to your contribution to DLPan-Toolbox. ## License & Copyright This project is open sourced under GNU General Public License v3.0. ================================================ FILE: 01-DL-toolbox(Pytorch)/setup.py ================================================ from setuptools import setup, find_packages setup( classifiers= ['Programming Language :: Python :: 3.7+', ], name='udl', description="unified pytorch framework for vision task", author="XiaoXiao-Woo", author_email="wxwsx1997@gmail.com", url='https://github.com/XiaoXiao-Woo/PanCollection', version='0.1', packages=find_packages(), license='GPLv3', python_requires='>=3.7', install_requires=[ "psutil", "opencv-python", "numpy", "matplotlib", "tensorboard", "addict", "yapf", "imageio", "colorlog", "scipy", "timm" ], ) ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/1_TestData/Datasets Testing/Download link for WV3-NewYork test data.txt ================================================ This folders contain the testing examples, including: 1) "Datasets Testing": A full-resolution WV3-NewYork example + A reduced-resolution WV3-NewYork example 2) "QB", "WV2", "WV3" and "WV4": Save the test datasets for different sensors ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/1_TestData/QB/readme.txt ================================================ none ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/1_TestData/WV2/readme.txt ================================================ none ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/1_TestData/WV3/readme.txt ================================================ none ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/1_TestData/WV4/readme.txt ================================================ none ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/1_TestData/readme.txt ================================================ This folders contain the testing examples, including: 1) "Datasets Testing": A full-resolution WV3-NewYork example + A reduced-resolution WV3-NewYork example 2) "QB", "WV2", "WV3" and "WV4": Save the test datasets for different sensors ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/QB/readme.txt ================================================ none ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/WV2/readme.txt ================================================ none ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/WV3/APNN/readme.txt ================================================ none ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/WV3/BDPN/readme.txt ================================================ none ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/WV3/DRPNN/readme.txt ================================================ none ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/WV3/DiCNN1/readme.txt ================================================ none ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/WV3/Download link for the 8 DL methods on WV3 dataset.txt ================================================ This folders contain the testing examples, including: 1) "Datasets Testing": A full-resolution WV3-NewYork example + A reduced-resolution WV3-NewYork example 2) "QB", "WV2", "WV3" and "WV4": Save the test datasets for different sensors ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/WV3/FusionNet/readme.txt ================================================ none ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/WV3/MSDCNN/readme.txt ================================================ none ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/WV3/PNN/readme.txt ================================================ none ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/WV3/PanNet/readme.txt ================================================ none ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/WV4/readme.txt ================================================ none ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/readme.txt ================================================ This folder cotains the outcomes of the 8 DL methods on QB, WV2, WV3 and WV4 sensors. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/3_EPS/QB/readme.txt ================================================ none ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/3_EPS/WV2/readme.txt ================================================ none ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/3_EPS/WV3/readme.txt ================================================ none ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/3_EPS/WV4/readme.txt ================================================ none ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/3_EPS/readme.txt ================================================ This folder stores the visual ouput with .eps format, which can be used in your latex editing. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/AWLP/AWLP.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % AWLP fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images by % exploiting the Additive Wavelet Luminance Proportional (AWLP) algorithm. % % Interface: % I_Fus_AWLP = AWLP(I_MS,I_PAN,ratio) % % Inputs: % I_MS: MS image upsampled at PAN scale; % I_PAN: PAN image; % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value. % % Outputs: % I_Fus_AWLP: AWLP pasharpened image. % % References: % [Otazu05] X. Otazu, M. Gonzalez-Audcana, O. Fors, and J. Nunez, Introduction of sensor spectral response into image fusion methods. % Application to wavelet-based methods, IEEE Transactions on Geoscience and Remote Sensing, vol. 43, no. 10, pp. 23762385, % October 2005. % [Vivone15] G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, % IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015. % [Alparone17] L. Alparone, A. Garzelli, and G. Vivone, "Intersensor statistical matching for pansharpening: Theoretical issues and practical solutions", % IEEE Transactions on Geoscience and Remote Sensing, vol. 55, no. 8, pp. 4682-4695, 2017. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. % % % % % % % % % % % % % % % Version: 1 % % % % % % % % % % % % % % % % Copyright (C) 2019 % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function I_Fus_AWLP = AWLP(I_MS,I_PAN,ratio) [Height,Width,Bands]=size(I_MS); I_Fus_AWLP=zeros(Height,Width,Bands,'double'); SumImage=sum(I_MS,3)/Bands; IntensityRatio = zeros(size(I_MS)); for i=1:Bands IntensityRatio(:,:,i)=I_MS(:,:,i)./(SumImage+eps); end I_PAN = repmat(I_PAN,[1 1 size(I_MS,3)]); % for ii = 1 : size(I_MS,3) % I_PAN(:,:,ii) = (I_PAN(:,:,ii) - mean2(I_PAN(:,:,ii))).*(std2(I_MS(:,:,ii))./std2(I_PAN(:,:,ii))) + mean2(I_MS(:,:,ii)); % end imageHR_LR=imresize(imresize(I_PAN,1/ratio),ratio); for ii = 1 : size(I_MS,3) I_PAN(:,:,ii) = (I_PAN(:,:,ii) - mean2(I_PAN(:,:,ii))).*(std2(I_MS(:,:,ii))./std2(imageHR_LR(:,:,ii))) + mean2(I_MS(:,:,ii)); end h=[1 4 6 4 1 ]/16; g=[0 0 1 0 0 ]-h; htilde=[ 1 4 6 4 1]/16; gtilde=[ 0 0 1 0 0 ]+htilde; h=sqrt(2)*h; g=sqrt(2)*g; htilde=sqrt(2)*htilde; gtilde=sqrt(2)*gtilde; WF={h,g,htilde,gtilde}; Levels = ceil(log2(ratio)); for i=1:Bands WT = ndwt2_working(I_PAN(:,:,i),Levels,WF); for ii = 2 : numel(WT.dec), WT.dec{ii} = zeros(size(WT.dec{ii})); end StepDetails = I_PAN(:,:,i) - indwt2_working(WT,'c'); %%%%%%%%% OLD [as in the article Otazu05] % sINI = WT.sizeINI; % % StepDetails = zeros(sINI); % % for ii = 2 : numel(WT.dec) % h = WT.dec{ii}; % h = imcrop(h,[(size(h,1) - sINI(1))/2 + 1,(size(h,2) - sINI(2))/2 + 1, sINI(1) - 1, sINI(2) - 1]); % StepDetails = StepDetails + h; % end %%%%%%%%%%%%%%%%%%% I_Fus_AWLP(:,:,i) = StepDetails .* IntensityRatio(:,:,i)+I_MS(:,:,i); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Avg_RR_Assessment.tex ================================================ ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/BDSD/BDSD.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % BDSD fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images by % exploiting the Band-Dependent Spatial-Detail (BDSD) algorithm. % % Interface: % I_Fus_BDSD = BDSD(I_MS,I_PAN,ratio,S,sensor) % % Inputs: % I_MS: MS image upsampled at PAN scale; % I_PAN: PAN image; % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value; % S: Local estimation on SxS distinct blocks (typically 128x128); % sensor: String for type of sensor (e.g. 'WV2', 'IKONOS'). % % Output: % I_Fus_BDSD: BDSD pansharpened image. % % References: % [Garzelli08] A. Garzelli, F. Nencini, and L. Capobianco, Optimal MMSE pan sharpening of very high resolution multispectral images, % IEEE Transactions on Geoscience and Remote Sensing, vol. 46, no. 1, pp. 228236, January 2008. % [Vivone15] G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, % IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. % % % % % % % % % % % % % % % Version: 1 % % % % % % % % % % % % % % % % Copyright (C) 2019 % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function I_Fus_BDSD = BDSD(I_MS,I_PAN,ratio,S,sensor) %%% % Control of input parameters and initialization %%% if (S > 1) if(rem(S,2) && S >1) fprintf(1,'\n\n '); error('block size for local estimation must be even') end if(rem(S,ratio)) fprintf(1,'\n\n '); error('block size must be multiple of ratio') end [N,M] = size(I_PAN); if(rem(N,S)||rem(M,S)) fprintf(1,'\n\n '); error('x and y dims of pan must be multiple of the block size') end end I_MS = double(I_MS); I_PAN = double(I_PAN); %%% % Reduced resolution %%% pan_LP = MTF_PAN(I_PAN,sensor,ratio); pan_LP_d = pan_LP(3:ratio:end,3:ratio:end); ms_orig = imresize(I_MS,1/ratio); ms_LP_d = MTF(ms_orig,sensor,ratio); %%% % Parameter estimation at reduced resolution %%% in3 = cat(3,ms_LP_d,ms_orig,pan_LP_d); fun_eg = @(bs) estimate_gamma_cube(bs.data,S,ratio); gamma = blockproc(in3,[S/ratio S/ratio],fun_eg); %%% % Fusion %%% in3 = cat(3,I_MS,I_PAN,gamma); fun_Hi = @(bs) compH_inject(bs.data,S); I_Fus_BDSD = blockproc(in3,[S S],fun_Hi); %%%_______________________________________________________________ %%% function gamma = estimate_gamma_cube(in3,S,ratio) Nb = (size(in3,3)-1)/2; hs_LP_d = in3(:,:,1:Nb); hs_orig = in3(:,:,Nb+1:2*Nb); pan_LP_d = in3(:,:,2*Nb+1); % Compute Hd Hd = zeros(S*S/ratio/ratio,Nb+1); for k=1:Nb b = hs_LP_d(:,:,k); Hd(:,k) = b(:); end Hd(:,Nb+1) = pan_LP_d(:); % Estimate gamma B = (Hd'*Hd)\Hd'; gamma = zeros(Nb+1,Nb); for k=1:Nb b = hs_orig(:,:,k); bd = hs_LP_d(:,:,k); gamma(:,k) = B *(b(:)-bd(:)); end gamma = padarray(gamma,[S-Nb-1 S-Nb],0,'post'); %%%_______________________________________________________________ %%% function ms_en = compH_inject(in3,S) Nb = size(in3,3)-2; hs = in3(:,:,1:Nb); pan = in3(:,:,Nb+1); gamma = in3(:,:,Nb+2); % Compute H [N,M,Nb] = size(hs); H = zeros(S*S,Nb+1); for k=1:Nb b = hs(:,:,k); H(:,k) = b(:); end H(:,Nb+1) = pan(:); % Inject g = gamma(1:Nb+1,1:Nb); ms_en = zeros(N,M,Nb); for k=1:Nb b = hs(:,:,k); b_en = b(:) + H * g(:,k); ms_en(:,:,k) = reshape(b_en,N,M); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/BDSD/BDSD_PC.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % BDSD_PC fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images by % exploiting the Band-Dependent Spatial-Detail (BDSD) model solving an optimization constrained problem. % % Interface: % I_Fus_BDSD = BDSD_PC(I_MS,I_PAN,ratio,S,sensor) % % Inputs: % I_MS: MS image upsampled at PAN scale; % I_PAN: PAN image; % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value; % sensor: String for type of sensor (e.g. 'WV2', 'IKONOS'). % % Output: % I_Fus_BDSD: BDSD_PC pansharpened image. % % Reference: % [Vivone19] G. Vivone, Robust Band-Dependent Spatial-Detail Approaches for Panchromatic Sharpening, % IEEE Transactions on Geoscience and Remote Sensing, 2019. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. % % % % % % % % % % % % % % % Version: 1 % % % % % % % % % % % % % % % % Copyright (C) 2019 % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function I_Fus_BDSD = BDSD_PC(I_MS,I_PAN,ratio,sensor) I_MS = double(I_MS); I_PAN = double(I_PAN); opts1 = optimset('display','off'); I_GT = imresize(I_MS,1/ratio);%,'nearest'); I_MS_LR = MTF(I_GT,sensor,ratio); I_PAN_LR = imresize(MTF_PAN(I_PAN,sensor,ratio),1/ratio,'nearest'); I_Fus_BDSD = zeros(size(I_MS)); gamma = zeros(size(I_MS,3)+1,size(I_MS,3)); for ii = 1 : size(I_MS,3) h1 = I_GT(:,:,ii); h2 = I_MS_LR(:,:,ii); H = [I_PAN_LR(:), reshape(I_MS_LR,[size(I_MS_LR,1)*size(I_MS_LR,2), size(I_MS_LR,3)])]; A = eye(size(I_MS,3)+1); A(1,1) = -1; gamma(:,ii) = lsqlin(H,h1(:)-h2(:),A,zeros(1,size(I_MS,3)+1),[],[],[],[],[],opts1); I_Fus_BDSD(:,:,ii) = I_MS(:,:,ii) + reshape([I_PAN(:),reshape(I_MS,[size(I_MS,1)*size(I_MS,2), size(I_MS,3)])]*gamma(:,ii),[size(I_MS,1) size(I_MS,2)]); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/BDSD/C_BDSD.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % C_BDSD fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images % through the Clustered Band-Dependent Spatial-Detail (C-BDSD) algorithm. % % Interface: % I_Fus_C_BDSD = C_BDSD(I_MS,I_PAN,ratio,sensor,K) % % Inputs: % I_MS: MS image upsampled at PAN scale; % I_PAN: PAN image; % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value; % sensor: String for type of sensor (e.g. 'WV2', 'IKONOS'). % K: Number of clusters (K>1) (Optional: default value K=30); % % Outputs: % I_Fus_C_BDSD: C_BDSD pansharpened image. % % Reference: % [Garzelli15] A. Garzelli, Pansharpening of Multispectral Images Based on Nonlocal Parameter Optimization, % IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 4, pp. 2096-2107, April 2015. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function I_Fus_C_BDSD = C_BDSD(I_MS,I_PAN,ratio,sensor,K) %%% % Control of input parameters and initialization %%% [N,M,Nb] = size(I_MS); if nargin == 5 if K < 2 fprintf(1,'Required number of clusters K>1.\n\n'); return end end if nargin < 5 K = 30; end if nargin < 4 fprintf(1,'\nI_Fus_C_BDSD = C_BDSD(I_MS,I_PAN,ratio,sensor,K)\n\n'); error('At least four input arguments required') end I_MS = double(I_MS); I_PAN = double(I_PAN); %%% % Reduced resolution %%% pan_LP = MTF_PAN(I_PAN,sensor,ratio); pan_LP_d = pan_LP(3:ratio:end,3:ratio:end); ms_orig = imresize(I_MS,1/ratio); ms_LP_d = MTF(ms_orig,sensor,ratio); % CLUSTER MAPS AT FULL RESOLUTION AND REDUCED RESOLUTION % Sa = stdfilt(I_PAN,ones(51)); Sa = Sa/max(Sa(:)); Sb = I_PAN; Sb = Sb/max(Sb(:)); opts = statset('TolX',1e-5); features = zeros(N/ratio,M/ratio,2,ratio*ratio); for i = 1:ratio for j = 1:ratio features(:,:,1,(i-1)*ratio+j) = Sa(1+(i-1):ratio:end,1+(j-1):ratio:end); features(:,:,2,(i-1)*ratio+j) = Sb(1+(i-1):ratio:end,1+(j-1):ratio:end); end end C_stack = zeros(N/ratio,M/ratio,ratio*ratio); f = features(:,:,:,(3-1)*ratio+3); warning off [aux, centers] = kmeans(reshape(f,[N/ratio*M/ratio,2]),K,'replicates',2,'start','cluster','options',opts); C = reshape(aux,[N/ratio M/ratio]); C_stack(:,:,(3-1)*ratio+3) = C; for i = 1:ratio for j = 1:ratio if(i*j~=9) f = features(:,:,:,(i-1)*ratio+j); aux = kmeans(reshape(f,[N/ratio*M/ratio,2]),K,'start',centers,'MaxIter',1); C_stack(:,:,(i-1)*ratio+j) = reshape(aux,[N/ratio M/ratio]); end end end C4 = zeros(size(I_PAN)); for i = 1:ratio for j = 1:ratio C4(i:ratio:end,j:ratio:end) = C_stack(:,:,(i-1)*ratio+j); end end % ESTIMATE PARAMETERS AT REDUCED RESOLUTION AND INJECT (CLUSTER BY CLUSTER) % g = zeros(K,Nb); alpha = zeros(Nb,Nb,K); offset = zeros(Nb,K); ms_ps_stack = zeros(N,M,Nb,K); % Estimate for K=1 [~,g_global,alpha_global,offset_global] = parm_est(ms_LP_d(:,:,:),pan_LP_d,ms_orig,find(C>0)); for j=1:K [~,g(j,:),alpha(:,:,j),offset(:,j)] = parm_est(ms_LP_d(:,:,:),pan_LP_d,ms_orig,find(C==j)); if(size(find(g<0)>0)) g(j,:) = g_global; alpha(:,:,j) = alpha_global; offset(:,j) = offset_global; end H = H_comp(I_PAN,I_MS,find(C4==j)); ms_ps_stack(:,:,:,j) = bdsd_injection(I_PAN,I_MS,H,g(j,:),squeeze(alpha(:,:,j)),offset(:,j),find(C4==j)); end % FORM PANSHARPENED IMAGE I_Fus_C_BDSD = sum(ms_ps_stack,4); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function [gamma,g,alpha,offset] = parm_est(hs_LP_d,pan_LP_d,hs_orig,ind) Nb = size(hs_orig,3); for i=1:Nb % compute Hd Hd = zeros(size(ind,1),Nb+2); gamma = zeros((Nb+2),Nb); for k=1:Nb bfull = hs_LP_d(:,:,k); Hd(:,k) = bfull(ind); end Hd(:,Nb+1) = ones(size(ind)); Hd(:,Nb+2) = pan_LP_d(ind); % estimate gamma for k=1:Nb Z = (Hd'*Hd)\Hd'; bfull = hs_orig(:,:,k); b = bfull(ind); bdfull = hs_LP_d(:,:,k); bd = bdfull(ind); gamma(:,k) = Z *(b(:)-bd(:)); end g = gamma(Nb+2,:); alpha = zeros(Nb); for k = 1:Nb alpha(:,k) = -gamma(1:Nb,k)/gamma(Nb+2,k); end offset = zeros(Nb,1); for k = 1:Nb offset(k) = gamma(Nb+1,k)/gamma(Nb+2,k); end end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function hs_en = bdsd_injection(pan,msexp,H,g,alpha,offset,ind) [N,M,Nb] = size(msexp); Intensity = zeros(length(ind),Nb); for k = 1:Nb Intensity(:,k) = H(:,1:Nb) * alpha(:,k) - offset(k); end pfull = pan; p = pfull(ind); hs_en = zeros(N,M,Nb); for k=1:Nb bfull = msexp(:,:,k); b = bfull(ind); b_en = b(:) + (p - Intensity(:,k)) * g(k); hs_enfull = hs_en(:,:,k); hs_enfull(ind) = b_en; hs_en(:,:,k) = reshape(hs_enfull,N,M); end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function H = H_comp(pan,hs,ind) Nb = size(hs,3); H = zeros(length(ind),Nb+2); for k=1:Nb bfull = hs(:,:,k); H(:,k) = bfull(ind); end H(:,Nb+1) = ones(size(ind)); H(:,Nb+2) = pan(ind); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/BT-H/BroveyRegHazeMin.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Brovey data fusion with haze correction % % Interface: % I_Fus_Brovey_Reg = BroveyRegHazeMin(I_MS,I_PAN,ratio) % % Inputs: % I_MS: MS image upsampled at PAN scale; % I_PAN: PAN image; % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value. % % Outputs: % I_Fus_Brovey_Reg: Pansharpened image. % % References: % [Lolli17] S. Lolli, L. Alparone, A. Garzelli, and G. Vivone, "Haze correction for contrast-based multispectral pansharpening", % IEEE Geoscience and Remote Sensing Letters, vol. 14, no. 12, pp. 2255-2259, 2017. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. % % % % % % % % % % % % % % % Version: 1 % % % % % % % % % % % % % % % % Copyright (C) 2019 % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function I_Fus_Brovey_Reg = BroveyRegHazeMin(I_MS,I_PAN,ratio) if size(I_MS,3) == 4 prc = 1; minMS = zeros(1,1,4); B = I_MS(:,:,1); G = I_MS(:,:,2); R = I_MS(:,:,3); NIR = I_MS(:,:,4); minMS(1,1,1) = 0.95 * prctile(B(:),prc); minMS(1,1,2) = 0.45 * prctile(G(:),prc); minMS(1,1,3) = 0.40 * prctile(R(:),prc); minMS(1,1,4) = 0.05 * prctile(NIR(:),prc); else minMS = zeros(1,1,size(I_MS,3)); for ii = 1 : size(I_MS, 3) minMS(1,1,ii) = min(min(I_MS(:,:,ii))); end end L = repmat(minMS, [size(I_MS,1) size(I_MS,2)]); imageLR = double(I_MS); imageHR = double(I_PAN); imageHR_LR = LPfilterGauss(imageHR,ratio); h = estimation_alpha(imageLR,imageHR_LR,'global'); alpha(1,1,:) = h; I = sum((imageLR - L) .* repmat(alpha,[size(I_MS,1) size(I_MS,2) 1]),3); imageHR = (imageHR - mean2(imageHR_LR)).*(std2(I)./std2(imageHR_LR)) + mean2(I); I_MS_L = imageLR - L; I_MS_L(I_MS_L < 0) = 0; I_Fus_Brovey_Reg = I_MS_L .* repmat(imageHR./(I+eps),[1 1 size(imageLR,3)]) + L; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Demo_Full_Resolution.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%For FUll-Resolution%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 1) This is a test demo to show all full-resolution results of traditional and DL methods % Here, we take WV3 test dataset as example. Readers can change the corresponding director % and setting to test other/your datasets % 2) The codes of traditional methods are from the "pansharpening toolbox for distribution", % thus please cite the paper: % [1] G. Vivone, et al., A new benchmark based on recent advances in multispectral pansharpening: Revisiting % pansharpening with classical and emerging pansharpening methods, IEEE Geosci. Remote Sens. Mag., % 9(1): 53C81, 2021 % 3) Also, if you use this toolbox, please cite our paper: % [2] L.-J. Deng, et al., Machine Learning in Pansharpening: A Benchmark, from Shallow to Deep Networks, % IEEE Geosci. Remote Sens. Mag., 2022 % LJ Deng (UESTC), 2020-02-27 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Note: the test dataset of full-resolution are too huge to upload to % GitHub, thus we provide cloud links to readers to download them to % successfully run this demo, including: % i) Download link for full-resolution WV3-NewYork example (named "NY1_WV3_FR.mat"): % http:******** (put into the folder of "1_TestData/Datasets Testing") % ii) Download link of DL's results for full-resolution WV3-NewYork example: % http:******** (put into the folder of "'2_DL_Result/WV3") % Once you have above datasets, you can run this demo successfully, then % understand how this demo run! %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% clear; close all; %% =======load directors======== % Tools addpath([pwd,'/Tools']); % Select algorithms to run algorithms = {'EXP','BT-H','BDSD-PC','C-GSA','SR-D',... 'MTF-GLP-HPM-R','MTF-GLP-FS','TV','PanNet','DRPNN','MSDCNN','BDPN','DiCNN','PNN','APNN','FusionNet'}; % director to save EPS figures for latex editing; if other dataset, please % change the director correspondingly data_name = '3_EPS/WV3/wv3_os_ny'; %% ==========Read Data and sensors' info==================== %% read the test dataset; if use your test dataset, please update in this folder file_test = '1_TestData/Datasets Testing/NY1_WV3_FR.mat'; % get I_MS_LR, I_MS, I_PAN and sensors' info; load(file_test) % (Note: If there is no sensor's info in your dataset, % please find and update these info in the following commented lines): %------ following are sensor's info for WV3 (an example for WV3)---- % sensor = 'WV3'; % Qblocks_size = 32; % bicubic = 0;% Interpolator % flag_cut_bounds = 1;% Cut Final Image % dim_cut = 21;% Cut Final Image % thvalues = 0;% Threshold values out of dynamic range % printEPS = 0;% Print Eps % ratio = 4;% Resize Factor % L = 11;% Radiometric Resolution %% Initialization of the Matrix of Results NumIndexes = 3; MatrixResults = zeros(numel(algorithms),NumIndexes); alg = 0; flagQNR = 0; %% Flag QNR/HQNR, 1: QNR otherwise HQNR % zoom-in interesting two regions of figure; you may change them % according to your requirment location1 = [500 700 100 300]; %default: data6: [10 50 1 60]; data7:[140 180 5 60] location2 = [200 380 1000 1250]; %default: data6: [190 240 5 60]; data7:[190 235 120 150] clear print %% show I_MS_LR, I_GT, PAN Imgs: if size(I_MS,3) == 4 showImage4LR(I_MS_LR,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); else showImage8LR(I_MS_LR,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); end % (Note: If you only want to show pan image without region zoom-in, use showPan; % otherwise, use showPan_zoomin) %showPan(I_PAN,printEPS,2,flag_cut_bounds,dim_cut); showPan_zoomin(I_PAN,printEPS,2,flag_cut_bounds,dim_cut, location1, location2); % Note: eps figure is saved in "data_name" for latex editing print('-depsc', strcat(data_name, '_pan', '.eps')) %% ======EXP =================== if ismember('EXP',algorithms) alg = alg + 1; [D_lambda_EXP,D_S_EXP,QNRI_EXP] = indexes_evaluation_FS(I_MS,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR); MatrixResults(alg,:) = [D_lambda_EXP,D_S_EXP,QNRI_EXP]; MatrixImage(:,:,:,alg) = I_MS; % (Note: You may use following "showImage8LR" without region zoom-in; otherwise, you can % use "showImage8_zoomin" for zoom-in visualization.) %showImage8LR(I_MS,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_MS,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_exp.eps')) end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%% CS-based Methods %%%%%%%%%%%%%%%%%%%%%%%%%% %% ====== 1) BT-H Method ====== if ismember('BT-H',algorithms) alg = alg + 1; cd BT-H t2=tic; I_BT_H = BroveyRegHazeMin(I_MS,I_PAN,ratio); time_BT_H = toc(t2); fprintf('Elaboration time BT-H: %.2f [sec]\n',time_BT_H); cd .. %%% Quality indexes computation [D_lambda_BT_H,D_S_BT_H,QNRI_BT_H] = indexes_evaluation_FS(I_BT_H,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR); MatrixResults(alg,:) = [D_lambda_BT_H,D_S_BT_H,QNRI_BT_H]; MatrixImage(:,:,:,alg) = I_BT_H; %showImage8LR(I_BT_H,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_BT_H,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_bth.eps')) end %% ====== 2) BDSD-PC Method ====== if ismember('BDSD-PC',algorithms) alg = alg + 1; cd BDSD t2=tic; I_BDSD_PC = BDSD_PC(I_MS,I_PAN,ratio,sensor); time_BDSD_PC = toc(t2); fprintf('Elaboration time BDSD-PC: %.2f [sec]\n',time_BDSD_PC); cd .. [D_lambda_BDSD_PC,D_S_BDSD_PC,QNRI_BDSD_PC] = indexes_evaluation_FS(I_BDSD_PC,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR); MatrixResults(alg,:) = [D_lambda_BDSD_PC,D_S_BDSD_PC,QNRI_BDSD_PC]; MatrixImage(:,:,:,alg) = I_BDSD_PC; %showImage8LR(I_BDSD_PC,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_BDSD_PC,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_bdsd_pc.eps')) end %% ====== 3) C-GSA Method ====== if ismember('C-GSA',algorithms) alg = alg + 1; PS_algorithm = 'GSA'; % Pansharpening algorithm n_segm = 5; % Number of segments cd GS t2=tic; I_C_GSA = GS_Segm(I_MS,I_PAN,gen_LP_image(PS_algorithm,I_MS,I_PAN,I_MS_LR,ratio,sensor), k_means_clustering(I_MS,n_segm)); time_C_GSA = toc(t2); fprintf('Elaboration time GSA: %.2f [sec]\n',time_C_GSA); cd .. %%% Quality indexes computation [D_lambda_C_GSA,D_S_C_GSA,QNRI_C_GSA] = indexes_evaluation_FS(I_C_GSA,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR); MatrixResults(alg,:) = [D_lambda_C_GSA,D_S_C_GSA,QNRI_C_GSA]; MatrixImage(:,:,:,alg) = I_C_GSA; %showImage8LR(I_C_GSA,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_C_GSA,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_c_gsa.eps')) end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%% MRA-based Methods %%%%%%%%%%%%%%%%%%%%%%%%%% %% ====== 1) SR-D Method ====== if ismember('SR-D',algorithms) alg = alg + 1; %%%%%%%%%%%%%%%%%%%%%%%%%% Parameters setting %%%%%%%%%%%%%%%%%%%%%%%%%%%%% TS = 7; % Tiling (dimensions of the patches are TS x TS) ol = 4; % Overlap (in pixels) between contiguous tile n_atoms = 10; % Max number of representation atoms (default value = 10) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% cd SR-D t2=tic; I_SR_D = CS(I_MS,I_PAN,I_MS_LR,ratio,sensor,TS,ol,n_atoms); time_SR_D = toc(t2); fprintf('Elaboration time SR_D: %.2f [sec]\n',time_SR_D); cd .. [D_lambda_SR_D,D_S_SR_D,QNRI_SR_D] = indexes_evaluation_FS(I_SR_D,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR); MatrixResults(alg,:) = [D_lambda_SR_D,D_S_SR_D,QNRI_SR_D]; MatrixImage(:,:,:,alg) = I_SR_D; %showImage8LR(I_SR_D,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_SR_D,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_sr_d.eps')) end %% ====== 2) MTF-GLP-HPM-R Method ====== if ismember('MTF-GLP-HPM-R',algorithms) alg = alg + 1; cd GLP t2=tic; I_MTF_GLP_HPM_R = MTF_GLP_HPM_R(I_MS,I_PAN,sensor,ratio); time_MTF_GLP_HPM_R = toc(t2); fprintf('Elaboration time MTF-GLP-HPM-R: %.2f [sec]\n',time_MTF_GLP_HPM_R); cd .. [D_lambda_MTF_GLP_HPM_R,D_S_MTF_GLP_HPM_R,QNRI_MTF_GLP_HPM_R] = indexes_evaluation_FS(I_MTF_GLP_HPM_R,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR); MatrixResults(alg,:) = [D_lambda_MTF_GLP_HPM_R,D_S_MTF_GLP_HPM_R,QNRI_MTF_GLP_HPM_R]; MatrixImage(:,:,:,alg) = I_MTF_GLP_HPM_R; %showImage8LR(I_MTF_GLP_HPM_R,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_MTF_GLP_HPM_R,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_mtfglp_hpm_r.eps')) end %% ====== 3) MTF-GLP-FS Method ====== if ismember('MTF-GLP-FS',algorithms) alg = alg + 1; cd GLP t2=tic; I_MTF_GLP_FS = MTF_GLP_FS(I_MS,I_PAN,sensor,ratio); time_MTF_GLP_FS = toc(t2); fprintf('Elaboration time MTF-GLP-FS: %.2f [sec]\n',time_MTF_GLP_FS); cd .. %%% Quality indexes computation [D_lambda_MTF_GLP_FS,D_S_MTF_GLP_FS,QNRI_MTF_GLP_FS] = indexes_evaluation_FS(I_MTF_GLP_FS,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR); MatrixResults(alg,:) = [D_lambda_MTF_GLP_FS,D_S_MTF_GLP_FS,QNRI_MTF_GLP_FS]; MatrixImage(:,:,:,alg) = I_MTF_GLP_FS; %showImage8LR(I_MTF_GLP_FS,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_MTF_GLP_FS,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_mtfglpfs.eps')) end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%% VO-based Methods %%%%%%%%%%%%%%%%%%%%%%%%%% %% ====== 1) TV Method ====== if ismember('TV',algorithms) alg = alg + 1; %%%%%%%%%%%%%%%%%%%%%%%%%% Parameters setting %%%%%%%%%%%%%%%%%%%%%%%%%%%%% switch sensor case 'IKONOS' w=[0.1091 0.2127 0.2928 0.3854]; c = 8; alpha=1.064; maxiter=10; lambda = 0.47106; case {'GeoEye1','WV4'} w=[0.1552, 0.3959, 0.2902, 0.1587]; c = 8; alpha=0.75; maxiter=50; lambda = 157.8954; case 'WV3' w=[0.0657 0.1012 0.1537 0.1473 0.1245 0.1545 0.1338 0.1192]; c = 8; alpha=0.75; maxiter=50; lambda = 1.0000e-03; end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% cd TV t2 = tic; I_TV = TV_pansharpen(I_MS_LR,I_PAN,alpha,lambda,c,maxiter,w); time_TV = toc(t2); fprintf('Elaboration time TV: %.2f [sec]\n',time_TV); cd .. %%% Quality indexes computation [D_lambda_TV,D_S_TV,QNRI_TV] = indexes_evaluation_FS(I_TV,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR); MatrixResults(alg,:) = [D_lambda_TV,D_S_TV,QNRI_TV]; MatrixImage(:,:,:,alg) = I_TV; %showImage8LR(I_TV,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_TV,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_tv.eps')) end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%% DL-based Methods %%%%%%%%%%%%%%%%%%%%%%%%%% %% ====== 1) PanNet Method ====== % if you use other sensor's data, please update the following director and % DL result. Note that the DL results here are obtained from our "01-DL toolbox (Pytorch)" folder, please check it. % Similar operation for following other DL methods. file_pannet = 'pannet_wv3_os_ny'; load(strcat('2_DL_Result/WV3/PanNet/', file_pannet, '.mat')) % (Note: val_bit = 2047 for 11-bit WV3, WV4 and QB data; val_bit = 1023 for 10-bit GF2 data) val_bit = 2047; I_pannet = val_bit*double(pannet_wv3_os_ny); if ismember('PanNet',algorithms) alg = alg + 1; %%% Quality indexes computation [D_lambda_pannet,D_S_pannet,QNRI_pannet] = indexes_evaluation_FS(I_pannet,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR); MatrixResults(alg,:) = [D_lambda_pannet,D_S_pannet,QNRI_pannet]; MatrixImage(:,:,:,alg) = I_pannet; %showImage8LR(I_pannet,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_pannet,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_pannet.eps')) end %% ====== 2) DRPNN Method ====== file_drpnn = 'drpnn_wv3_os_ny'; load(strcat('2_DL_Result/WV3/DRPNN/', file_drpnn, '.mat')) % load i-th image for DiCNN I_drpnn = val_bit*double(drpnn_wv3_os_ny); if ismember('DRPNN',algorithms) alg = alg + 1; %%% Quality indexes computation [D_lambda_drpnn,D_S_drpnn,QNRI_drpnn] = indexes_evaluation_FS(I_drpnn,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR); MatrixResults(alg,:) = [D_lambda_drpnn,D_S_drpnn,QNRI_drpnn]; MatrixImage(:,:,:,alg) = I_drpnn; %showImage8LR(I_drpnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_drpnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_drpnn.eps')) end %% ====== 3) MSDCNN Method ====== file_msdcnn = 'msdcnn_wv3_os_ny'; load(strcat('2_DL_Result/WV3/MSDCNN/', file_msdcnn, '.mat')) % load i-th image for DiCNN I_msdcnn = val_bit*double(msdcnn_wv3_os_ny); if ismember('MSDCNN',algorithms) alg = alg + 1; %%% Quality indexes computation [D_lambda_msdcnn,D_S_msdcnn,QNRI_msdcnn] = indexes_evaluation_FS(I_msdcnn,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR); MatrixResults(alg,:) = [D_lambda_msdcnn,D_S_msdcnn,QNRI_msdcnn]; MatrixImage(:,:,:,alg) = I_msdcnn; %showImage8LR(I_msdcnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_msdcnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_msdcnn.eps')) end %% ====== 4) BDPN Method ====== file_bdpn = 'bdpn_wv3_os_ny'; load(strcat('2_DL_Result/WV3/BDPN/', file_bdpn , '.mat')) % load i-th image for DiCNN I_bdpn = val_bit*double(bdpn_wv3_os_ny); if ismember('BDPN',algorithms) alg = alg + 1; %%% Quality indexes computation [D_lambda_bdpn,D_S_bdpn,QNRI_bdpn] = indexes_evaluation_FS(I_bdpn,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR); MatrixResults(alg,:) = [D_lambda_bdpn,D_S_bdpn,QNRI_bdpn]; MatrixImage(:,:,:,alg) = I_bdpn; %showImage8LR(I_bdpn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_bdpn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_bdpn.eps')) end %% ====== 5) DiCNN Method ====== file_dicnn = 'dicnn_wv3_os_ny'; load(strcat('2_DL_Result/WV3/DiCNN/', file_dicnn, '.mat')) % load i-th image for DiCNN I_dicnn = val_bit*double(dicnn_wv3_os_ny); if ismember('DiCNN',algorithms) alg = alg + 1; %%% Quality indexes computation [D_lambda_dicnn,D_S_dicnn,QNRI_dicnn] = indexes_evaluation_FS(I_dicnn,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR); MatrixResults(alg,:) = [D_lambda_dicnn,D_S_dicnn,QNRI_dicnn]; MatrixImage(:,:,:,alg) = I_dicnn; %showImage8LR(I_dicnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_dicnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_dicnn.eps')) end %% ====== 6) PNN Method ====== file_pnn = 'pnn_wv3_os_ny'; load(strcat('2_DL_Result/WV3/PNN/', file_pnn, '.mat')) % load i-th image for DiCNN I_pnn = val_bit*double(pnn_wv3_os_ny); if ismember('PNN',algorithms) alg = alg + 1; %%% Quality indexes computation [D_lambda_pnn,D_S_pnn,QNRI_pnn] = indexes_evaluation_FS(I_pnn,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR); MatrixResults(alg,:) = [D_lambda_pnn,D_S_pnn,QNRI_pnn]; MatrixImage(:,:,:,alg) = I_pnn; %showImage8LR(I_pnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_pnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_pnn.eps')) end %% ====== 7) APNN Method ====== (not true APNN, just a replacement!!) file_apnn = 'apnn_wv3_os_ny'; load(strcat('2_DL_Result/WV3/APNN/', file_apnn, '.mat')) % load i-th image for DiCNN I_apnn = val_bit*double(apnn_wv3_os_ny); % not right answer, just a replacement! if ismember('APNN',algorithms) alg = alg + 1; %%% Quality indexes computation [D_lambda_apnn,D_S_apnn,QNRI_apnn] = indexes_evaluation_FS(I_apnn,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR); MatrixResults(alg,:) = [D_lambda_apnn,D_S_apnn,QNRI_apnn]; MatrixImage(:,:,:,alg) = I_apnn; %showImage8LR(I_apnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_apnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_apnn.eps')) end %% ====== 8) FusionNet Method ====== file_fusionnet = 'fusionnet_wv3_os_ny'; load(strcat('2_DL_Result/WV3/FusionNet/', file_fusionnet, '.mat')) % load i-th image for DiCNN I_fusionnet = val_bit*double(fusionnet_wv3_os_ny); if ismember('FusionNet',algorithms) alg = alg + 1; %%% Quality indexes computation [D_lambda_fusionnet,D_S_fusionnet,QNRI_fusionnet] = indexes_evaluation_FS(I_fusionnet,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR); MatrixResults(alg,:) = [D_lambda_fusionnet,D_S_fusionnet,QNRI_fusionnet]; MatrixImage(:,:,:,alg) = I_fusionnet; %showImage8LR(I_fusionnet,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_fusionnet,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_fusionnet.eps')) end %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %%%%%%%%%%% Show and Save Results %%%%%%%%%%%%%%%%%%%%%%%%%% %% Print in LATEX if flagQNR == 1 matrix2latex(MatrixResults,'FR_Assessment.tex', 'rowLabels',algorithms,'columnLabels',[{'DL'},{'DS'},{'QNR'}],'alignment','c','format', '%.4f'); else matrix2latex(MatrixResults,'FR_Assessment.tex', 'rowLabels',algorithms,'columnLabels',[{'DL'},{'DS'},{'HQNR'}],'alignment','c','format', '%.4f'); end %% View All if size(I_MS,3) == 4 vect_index_RGB = [3,2,1]; else vect_index_RGB = [5,3,2]; end titleImages = algorithms; figure, showImagesAll(MatrixImage,titleImages,vect_index_RGB,flag_cut_bounds,dim_cut,0); %% ======Display the final average performance ======= fprintf('\n') disp('#######################################################') disp(['Display the performance for:']) disp('#######################################################') disp(' |====Q====|===Q_avg===|=====SAM=====|======ERGAS=======|=======SCC=======') MatrixResults %% %%%%%%%%%%% End %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Demo_Reduced_Resolution.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%For Reduced-Resolution%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 1) This is a test demo to show all reduced-resolution results of traditional and DL methods % Here, we take WV3 test dataset as example. Readers can change the corresponding director % and setting to test other/your datasets % 2) The codes of traditional methods are from the "pansharpening toolbox for distribution", % thus please cite the paper: % [1] G. Vivone, et al., A new benchmark based on recent advances in multispectral pansharpening: Revisiting % pansharpening with classical and emerging pansharpening methods, IEEE Geosci. Remote Sens. Mag., % 9(1): 53�C81, 2021 % 3) Also, if you use this toolbox, please cite our paper: % [2] L.-J. Deng, et al., Machine Learning in Pansharpening: A Benchmark, from Shallow to Deep Networks, % IEEE Geosci. Remote Sens. Mag., 2022 % LJ Deng (UESTC), 2020-02-27 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Note: the test datasets of reduced-resolution are still too huge to upload to % GitHub, thus we provide cloud links to readers to download them to % successfully run this demo, including: % i) Download link for reduced-resolution WV3-NewYork example (named "NY1_WV3_RR.mat"): % http:******** (put into the folder of "1_TestData/Datasets Testing") % ii) Download link of DL's results for reduced-resolution WV3-NewYork example: % http:******** (put into the folder of "'2_DL_Result/WV3") % Once you have above datasets, you can run this demo successfully, then % understand how this demo run! %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% clear; close all; %% =======load directors======== % Tools addpath([pwd,'/Tools']); % Select algorithms to run algorithms = {'GT','EXP','BT-H','BDSD-PC','C-GSA','SR-D',... 'MTF-GLP-HPM-R','MTF-GLP-FS','TV','PanNet','DRPNN','MSDCNN','BDPN','DiCNN1','PNN','APNN','FusionNet'}; % director to save EPS figures for latex editing; if other dataset, please % change the director correspondingly satellite = 'WV3'; mat_name = 'NY1_WV3_RR'; data_name = strcat('3_EPS/', satellite, '/', mat_name); %% ==========Read Data and sensors' info==================== %% read the test dataset; if use your test dataset, please update in this folder file_test = '1_TestData/Datasets Testing/NY1_WV3_RR.mat'; % get I_MS_LR, I_MS, I_PAN and sensors' info; load(file_test) % (Note: If there is no sensor's info in your dataset, % please find and update these info in the following commented lines): %------ following are sensor's info for WV3 (an example for WV3)---- % sensor = 'WV3'; % Qblocks_size = 32; % bicubic = 0;% Interpolator % flag_cut_bounds = 1;% Cut Final Image % dim_cut = 21;% Cut Final Image % thvalues = 0;% Threshold values out of dynamic range % printEPS = 0;% Print Eps % ratio = 4;% Resize Factor % L = 11;% Radiometric Resolution %% Initialization of the Matrix of Results NumIndexes = 5; MatrixResults = zeros(numel(algorithms),NumIndexes); alg = 0; % zoom-in interesting two regions of figure; you may change them % according to your requirment location1 = [50 70 10 30]; %default: data6: [10 50 1 60]; data7:[140 180 5 60] location2 = [20 38 10 50]; %default: data6: [190 240 5 60]; data7:[190 235 120 150] clear print %% show I_MS_LR, I_GT, PAN Imgs: if size(I_MS,3) == 4 showImage4LR(I_MS_LR,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); else showImage8LR(I_MS_LR,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); end % (Note: You may use following "showPan" without region zoom-in; otherwise, you can % use "showPan_zoomin" for zoom-in visualization.) %showPan(I_PAN,printEPS,2,flag_cut_bounds,dim_cut); showPan_zoomin(I_PAN,printEPS,2,flag_cut_bounds,dim_cut, location1, location2); % Note: eps figure is saved in "data_name" for latex editing print('-depsc', strcat(data_name, '_pan', '.eps')) %% ======GT =================== if ismember('GT',algorithms) alg = alg + 1; [Q_avg_GT, SAM_GT, ERGAS_GT, SCC_GT_GT, Q_GT] = indexes_evaluation(I_GT,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues); MatrixResults(alg,:) = [Q_GT,Q_avg_GT,SAM_GT,ERGAS_GT,SCC_GT_GT]; MatrixImage(:,:,:,alg) = I_GT; % (Note: You may use following "showImage8LR" without region zoom-in; otherwise, you can % use "showImage8_zoomin" for zoom-in visualization.) %showImage8LR(I_GT,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_GT,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_gt', '.eps')) end %% ======EXP =================== if ismember('EXP',algorithms) alg = alg + 1; [Q_avg_EXP, SAM_EXP, ERGAS_EXP, SCC_GT_EXP, Q_EXP] = indexes_evaluation(I_MS,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues); MatrixResults(alg,:) = [Q_EXP,Q_avg_EXP,SAM_EXP,ERGAS_EXP,SCC_GT_EXP]; MatrixImage(:,:,:,alg) = I_MS; %showImage8LR(I_MS,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_MS,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_exp.eps')) end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%% CS-based Methods %%%%%%%%%%%%%%%%%%%%%%%%%% %% ====== 1) BT-H Method ====== if ismember('BT-H',algorithms) alg = alg + 1; cd BT-H t2=tic; I_BT_H = BroveyRegHazeMin(I_MS,I_PAN,ratio); time_BT_H = toc(t2); fprintf('Elaboration time BT-H: %.2f [sec]\n',time_BT_H); cd .. %%% Quality indexes computation [Q_avg_BT_H, SAM_BT_H, ERGAS_BT_H, SCC_GT_BT_H, Q_BT_H] = indexes_evaluation(I_BT_H,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues); MatrixResults(alg,:) = [Q_BT_H,Q_avg_BT_H,SAM_BT_H,ERGAS_BT_H,SCC_GT_BT_H]; MatrixImage(:,:,:,alg) = I_BT_H; %showImage8LR(I_BT_H,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_BT_H,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_bth.eps')) end %% ====== 2) BDSD-PC Method ====== if ismember('BDSD-PC',algorithms) alg = alg + 1; cd BDSD t2=tic; I_BDSD_PC = BDSD_PC(I_MS,I_PAN,ratio,sensor); time_BDSD_PC = toc(t2); fprintf('Elaboration time BDSD-PC: %.2f [sec]\n',time_BDSD_PC); cd .. [Q_avg_BDSD_PC, SAM_BDSD_PC, ERGAS_BDSD_PC, SCC_GT_BDSD_PC, Q_BDSD_PC] = indexes_evaluation(I_BDSD_PC,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues); MatrixResults(alg,:) = [Q_BDSD_PC,Q_avg_BDSD_PC,SAM_BDSD_PC,ERGAS_BDSD_PC,SCC_GT_BDSD_PC]; MatrixImage(:,:,:,alg) = I_BDSD_PC; %showImage8LR(I_BDSD_PC,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_BDSD_PC,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_bdsd_pc.eps')) end %% ====== 3) C-GSA Method ====== if ismember('C-GSA',algorithms) alg = alg + 1; PS_algorithm = 'GSA'; % Pansharpening algorithm n_segm = 5; % Number of segments cd GS t2=tic; I_C_GSA = GS_Segm(I_MS,I_PAN,gen_LP_image(PS_algorithm,I_MS,I_PAN,I_MS_LR,ratio,sensor), k_means_clustering(I_MS,n_segm)); time_C_GSA = toc(t2); fprintf('Elaboration time GSA: %.2f [sec]\n',time_C_GSA); cd .. [Q_avg_C_GSA, SAM_C_GSA, ERGAS_C_GSA, SCC_GT_C_GSA, Q_C_GSA] = indexes_evaluation(I_C_GSA,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues); MatrixResults(alg,:) = [Q_C_GSA,Q_avg_C_GSA,SAM_C_GSA,ERGAS_C_GSA,SCC_GT_C_GSA]; MatrixImage(:,:,:,alg) = I_C_GSA; %showImage8LR(I_C_GSA,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_C_GSA,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_C_gsa.eps')) end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%% MRA-based Methods %%%%%%%%%%%%%%%%%%%%%%%%%% %% ====== 1) SR-D Method ====== if ismember('SR-D',algorithms) alg = alg + 1; %%%%%%%%%%%%%%%%%%%%%%%%%% Parameters setting %%%%%%%%%%%%%%%%%%%%%%%%%%%%% TS = 7; % Tiling (dimensions of the patches are TS x TS) ol = 4; % Overlap (in pixels) between contiguous tile n_atoms = 10; % Max number of representation atoms (default value = 10) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% cd SR-D t2=tic; I_SR_D = CS(I_MS,I_PAN,I_MS_LR,ratio,sensor,TS,ol,n_atoms); time_SR_D = toc(t2); fprintf('Elaboration time SR_D: %.2f [sec]\n',time_SR_D); cd .. [Q_avg_SR_D, SAM_SR_D, ERGAS_SR_D, SCC_GT_SR_D, Q_SR_D] = indexes_evaluation(I_SR_D,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues); MatrixResults(alg,:) = [Q_SR_D,Q_avg_SR_D,SAM_SR_D,ERGAS_SR_D,SCC_GT_SR_D]; MatrixImage(:,:,:,alg) = I_SR_D; %showImage8LR(I_SR_D,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_SR_D,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_SR_D.eps')) end %% ====== 2) MTF-GLP Method ====== if ismember('MTF-GLP-HPM-R',algorithms) alg = alg + 1; cd GLP t2=tic; I_MTF_GLP_HPM_R = MTF_GLP_HPM_R(I_MS,I_PAN,sensor,ratio); time_MTF_GLP_HPM_R = toc(t2); fprintf('Elaboration time MTF-GLP-HPM-R: %.2f [sec]\n',time_MTF_GLP_HPM_R); cd .. [Q_avg_MTF_GLP_HPM_R, SAM_MTF_GLP_HPM_R, ERGAS_MTF_GLP_HPM_R, SCC_GT_MTF_GLP_HPM_R, Q_MTF_GLP_HPM_R] = indexes_evaluation(I_MTF_GLP_HPM_R,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues); MatrixResults(alg,:) = [Q_MTF_GLP_HPM_R,Q_avg_MTF_GLP_HPM_R,SAM_MTF_GLP_HPM_R,ERGAS_MTF_GLP_HPM_R,SCC_GT_MTF_GLP_HPM_R]; MatrixImage(:,:,:,alg) = I_MTF_GLP_HPM_R; %showImage8LR(I_MTF_GLP_HPM_R,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_MTF_GLP_HPM_R,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_mtfglp_hpm_r.eps')) end %% ====== 3) MTF-GLP-FS Method ====== if ismember('MTF-GLP-FS',algorithms) alg = alg + 1; cd GLP t2=tic; I_MTF_GLP_FS = MTF_GLP_FS(I_MS,I_PAN,sensor,ratio); time_MTF_GLP_FS = toc(t2); fprintf('Elaboration time MTF-GLP-FS: %.2f [sec]\n',time_MTF_GLP_FS); cd .. [Q_avg_MTF_GLP_FS, SAM_MTF_GLP_FS, ERGAS_MTF_GLP_FS, SCC_GT_MTF_GLP_FS, Q_MTF_GLP_FS] = indexes_evaluation(I_MTF_GLP_FS,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues); MatrixResults(alg,:) = [Q_MTF_GLP_FS,Q_avg_MTF_GLP_FS,SAM_MTF_GLP_FS,ERGAS_MTF_GLP_FS,SCC_GT_MTF_GLP_FS]; MatrixImage(:,:,:,alg) = I_MTF_GLP_FS; %showImage8LR(I_MTF_GLP_FS,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_MTF_GLP_FS,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_mtfglpfs.eps')) end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%% VO-based Methods %%%%%%%%%%%%%%%%%%%%%%%%%% %% ====== 1) TV Method ====== if ismember('TV',algorithms) alg = alg + 1; %%%%%%%%%%%%%%%%%%%%%%%%%% Parameters setting %%%%%%%%%%%%%%%%%%%%%%%%%%%%% switch sensor case 'IKONOS' w=[0.1091 0.2127 0.2928 0.3854]; c = 8; alpha=1.064; maxiter=10; lambda = 0.47106; case {'GeoEye1','WV4'} w=[0.1552, 0.3959, 0.2902, 0.1587]; c = 8; alpha=0.75; maxiter=50; lambda = 157.8954; case 'WV3' w=[0.0657 0.1012 0.1537 0.1473 0.1245 0.1545 0.1338 0.1192]; c = 8; alpha=0.75; maxiter=50; lambda = 1.0000e-03; end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% cd TV t2 = tic; I_TV = TV_pansharpen(I_MS_LR,I_PAN,alpha,lambda,c,maxiter,w); time_TV = toc(t2); fprintf('Elaboration time TV: %.2f [sec]\n',time_TV); cd .. [Q_avg_TV, SAM_TV, ERGAS_TV, SCC_GT_TV, Q_TV] = indexes_evaluation(I_TV,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues); MatrixResults(alg,:) = [Q_TV,Q_avg_TV,SAM_TV,ERGAS_TV,SCC_GT_TV]; MatrixImage(:,:,:,alg) = I_TV; %showImage8LR(I_TV,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_TV,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_tv.eps')) end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%% DL-based Methods %%%%%%%%%%%%%%%%%%%%%%%%%% %% ====== 1) PanNet Method ====== % if you use other sensor's data, please update the following director and % DL result. Note that the DL results here are obtained from our "01-DL toolbox (Pytorch)" folder, please check it. % Similar operation for following other DL methods. % (Note: val_bit = 2047 for 11-bit WV3, WV4 and QB data; val_bit = 1023 for 10-bit GF2 data) val_bit = 2047; if ismember('PanNet',algorithms) % file_pannet = 'output'; load(strcat('2_DL_Result/', satellite, '/PanNet/', 'output_', mat_name, '.mat')) I_pannet = double(sr); alg = alg + 1; [Q_avg_pannet, SAM_pannet, ERGAS_pannet, SCC_pannet, Q_pannet] = indexes_evaluation(I_pannet,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues); MatrixResults(alg,:) = [Q_pannet,Q_avg_pannet,SAM_pannet,ERGAS_pannet,SCC_pannet]; MatrixImage(:,:,:,alg) = I_pannet; %showImage8LR(I_pannet,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_pannet,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_pannet.eps')) end %% ====== 2) DRPNN Method ====== if ismember('DRPNN',algorithms) % file_drpnn = 'drpnn_wv3_rs_ny'; load(strcat('2_DL_Result/', satellite, '/DRPNN/', 'output_', mat_name, '.mat')) I_drpnn = double(sr); alg = alg + 1; [Q_avg_drpnn, SAM_drpnn, ERGAS_drpnn, SCC_drpnn, Q_drpnn] = indexes_evaluation(I_drpnn,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues); MatrixResults(alg,:) = [Q_drpnn,Q_avg_drpnn,SAM_drpnn,ERGAS_drpnn,SCC_drpnn]; MatrixImage(:,:,:,alg) = I_drpnn; %showImage8LR(I_drpnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_drpnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_drpnn.eps')) end %% ====== 3) MSDCNN Method ====== if ismember('MSDCNN',algorithms) % file_msdcnn = 'msdcnn_wv3_rs_ny'; load(strcat('2_DL_Result/', satellite, '/MSDCNN/', 'output_', mat_name, '.mat')) I_msdcnn = double(sr); alg = alg + 1; [Q_avg_msdcnn, SAM_msdcnn, ERGAS_msdcnn, SCC_msdcnn, Q_msdcnn] = indexes_evaluation(I_msdcnn,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues); MatrixResults(alg,:) = [Q_msdcnn,Q_avg_msdcnn,SAM_msdcnn,ERGAS_msdcnn,SCC_msdcnn]; MatrixImage(:,:,:,alg) = I_msdcnn; %showImage8LR(I_msdcnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_msdcnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_msdcnn.eps')) end %% ====== 4) BDPN Method ====== if ismember('BDPN',algorithms) % file_bdpn = 'bdpn_wv3_rs_ny'; load(strcat('2_DL_Result/', satellite, '/BDPN/', 'output_', mat_name, '.mat')) I_bdpn = double(sr); alg = alg + 1; [Q_avg_bdpn, SAM_bdpn, ERGAS_bdpn, SCC_bdpn, Q_bdpn] = indexes_evaluation(I_bdpn,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues); MatrixResults(alg,:) = [Q_bdpn,Q_avg_bdpn,SAM_bdpn,ERGAS_bdpn,SCC_bdpn]; MatrixImage(:,:,:,alg) = I_bdpn; %showImage8LR(I_bdpn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_bdpn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_bdpn.eps')) end %% ====== 5) DiCNN Method ====== if ismember('DiCNN1',algorithms) % file_dicnn = 'dicnn_wv3_rs_ny'; load(strcat('2_DL_Result/', satellite, '/DiCNN1/', 'output_', mat_name, '.mat')) I_dicnn = double(sr); alg = alg + 1; [Q_avg_dicnn, SAM_dicnn, ERGAS_dicnn, SCC_dicnn, Q_dicnn] = indexes_evaluation(I_dicnn,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues); MatrixResults(alg,:) = [Q_dicnn,Q_avg_dicnn,SAM_dicnn,ERGAS_dicnn,SCC_dicnn]; MatrixImage(:,:,:,alg) = I_dicnn; %showImage8LR(I_dicnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_dicnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_dicnn.eps')) end %% ====== 6) PNN Method ====== if ismember('PNN',algorithms) % file_pnn = 'pnn_wv3_rs_ny'; load(strcat('2_DL_Result/', satellite ,'/PNN/', 'output_', mat_name, '.mat')) I_pnn = double(sr); alg = alg + 1; [Q_avg_pnn, SAM_pnn, ERGAS_pnn, SCC_pnn, Q_pnn] = indexes_evaluation(I_pnn,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues); MatrixResults(alg,:) = [Q_pnn,Q_avg_pnn,SAM_pnn,ERGAS_pnn,SCC_pnn]; MatrixImage(:,:,:,alg) = I_pnn; %showImage8LR(I_pnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_pnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_pnn.eps')) end %% ====== 7) APNN Method ====== if ismember('APNN',algorithms) % file_apnn = 'apnn_wv3_rs_ny'; load(strcat('2_DL_Result/', satellite, '/APNN/', 'output_', mat_name, '.mat')) I_apnn = double(sr); alg = alg + 1; [Q_avg_apnn, SAM_apnn, ERGAS_apnn, SCC_apnn, Q_apnn] = indexes_evaluation(I_apnn,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues); MatrixResults(alg,:) = [Q_apnn,Q_avg_apnn,SAM_apnn,ERGAS_apnn,SCC_apnn]; MatrixImage(:,:,:,alg) = I_apnn; %showImage8LR(I_apnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_apnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_apnn.eps')) end %% ====== 8) FusionNet Method ====== if ismember('FusionNet',algorithms) % file_fusionnet = 'fusionnet_wv3_rs_ny'; % load(strcat('2_DL_Result/', satellite ,'/FusionNet/', 'output_',mat_name, '.mat')); load(strcat('2_DL_Result/', satellite ,'/FusionNet/', 'fusionnet_wv3_rs_ny', '.mat')); I_fusionnet = val_bit * double(sr); alg = alg + 1; [Q_avg_fusionnet, SAM_fusionnet, ERGAS_fusionnet, SCC_fusionnet, Q_fusionnet] = indexes_evaluation(I_fusionnet,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues); MatrixResults(alg,:) = [Q_fusionnet,Q_avg_fusionnet,SAM_fusionnet,ERGAS_fusionnet,SCC_fusionnet]; MatrixImage(:,:,:,alg) = I_fusionnet; %showImage8LR(I_fusionnet,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio); showImage8_zoomin(I_fusionnet,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2); print('-depsc', strcat(data_name, '_fusionnet.eps')) end %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %%%%%%%%%%% Show and Save Results %%%%%%%%%%%%%%%%%%%%%%%%%% %% Print in LATEX matrix2latex(MatrixResults(:,[1,3,4]),'RR_Assessment.tex', 'rowLabels',algorithms,'columnLabels',[{'Q2n'},{'SAM'},{'ERGAS'}],'alignment','c','format', '%.4f'); %% View All if size(I_GT,3) == 4 vect_index_RGB = [3,2,1]; else vect_index_RGB = [5,3,2]; end titleImages = algorithms; figure, showImagesAll(MatrixImage,titleImages,vect_index_RGB,flag_cut_bounds,dim_cut,0); %% ======Display the final average performance ======= fprintf('\n') disp('#######################################################') disp(['Display the performance for:']) disp('#######################################################') disp(' |====Q====|===Q_avg===|=====SAM=====|======ERGAS=======|=======SCC=======') MatrixResults %% %%%%%%%%%%% End %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/FE-HPM/FE.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % FE estimates the estraction detail filter via deconvolution. % % Interface: % PSF_l = FE(I_MS,I_PAN,ratio,tap,lambda,mu,th,num_iter,filtername) % % Inputs: % I_MS: MS image upsampled at PAN scale; % I_PAN: PAN image; % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value. % tap: Filter support; % lambda: Coefficient for weighting the energy regularization term; % mu: Coefficient for weighting the derivative regularization terms; % th: Threshold on the kernel (it cuts to 0 values below threshold); % num_iter: Max number of iteration (at least 3; not sensitive); % filtername: Kind of derivative (default: 'Basic') % % Output: % PSF_l: Estimated point spread function. % % Reference: % [Vivone15] G. Vivone, M. Simoes, M. Dalla Mura, R. Restaino, J. Bioucas-Dias, G. A. Licciardi, and J. Chanussot, "Pansharpening based on semiblind deconvolution", % IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 4, pp. 1997-2010, 2015. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. % % % % % % % % % % % % % % % Version: 1 % % % % % % % % % % % % % % % % Copyright (C) 2019 % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function PSF_l = FE(I_MS,I_PAN,ratio,tap,lambda,mu,th,num_iter,filtername) if rem(tap,2) == 0 sum_tap = 0; else sum_tap = 1; end tap = floor(tap/2); [R_SIZE,C_SIZE] = size(I_PAN); switch filtername case 'Naive2' gv = zeros(2,1); gv(1,1) = -1; gv(2,1) = 1; gh = zeros(1,2); gh(1,1) = -1; gh(1,2) = 1; case 'Naive3' gv = zeros(3,1); gv(1,1) = -1; gv(3,1) = 1; gh = zeros(1,3); gh(1,1) = -1; gh(1,3) = 1; case 'Basic' gv = zeros(2,2); gv(1,:) = -1; gv(2,:) = 1; gh = zeros(2,2); gh(:,1) = -1; gh(:,2) = 1; case 'Prewitt' gv = zeros(3,3); gv(1,:) = -1; gv(3,:) = 1; gh = zeros(3,3); gh(:,1) = -1; gh(:,3) = 1; case 'Sobel' gv = zeros(3,3); gv(1,1) = -1;gv(1,2) = -2;gv(1,3) = -1; gv(3,1) = +1;gv(3,2) = +2;gv(3,3) = +1; gh = zeros(3,3); gh(1,1) = -1;gh(2,1) = -2;gh(3,1) = -1; gh(1,3) = +1;gh(2,3) = +2;gh(3,3) = +1; otherwise gv = zeros(2,2); gv(1,:) = -1; gv(2,:) = 1; gh = zeros(2,2); gh(:,1) = -1; gh(:,2) = 1; end gvf = fft2(gv,R_SIZE,C_SIZE); ghf = fft2(gh,R_SIZE,C_SIZE); gvfc = conj(gvf); ghfc = conj(ghf); gvf2 = gvfc .* gvf; ghf2 = ghfc .* ghf; gf2sum = gvf2 + ghf2; H_E = double(I_PAN); for jj = 1 : num_iter %%% Filter PAN to estimate alpha set if jj == 1 PAN_LP = LPfilter(H_E,ratio); else PAN_LP = imfilter(H_E,PSF_l,'replicate'); end %%% Estimate alpha alpha(1,1,:) = estimation_alpha(cat(3,I_MS,ones(size(I_MS,1),size(I_MS,2))),PAN_LP,'global'); It_E = sum(cat(3,I_MS,ones(size(I_MS,1),size(I_MS,2))) .* repmat(alpha,[size(I_MS,1) size(I_MS,2) 1]),3); %%% Edge taper H_E = edgetaper(H_E,ones(tap,tap)./((tap)^2)); It_E = edgetaper(It_E,ones(tap,tap)./((tap)^2)); %%% Filter Estimation PSF = real(fftshift(ifft2(conj(fft2(H_E)).* fft2(It_E)./(abs(fft2(H_E)).^2 + lambda + mu * gf2sum )))); %%% Thresholding PSF(PSF < th) = 0; %%% Cut using the support dimension and center [~, maxIndex] = max(PSF(:)); [rm, cm] = ind2sub(size(PSF), maxIndex); PSF_l = PSF(rm - tap : rm + tap - 1 + sum_tap, cm - tap : cm + tap - 1 + sum_tap); PSF_l = PSF_l ./ sum(PSF_l(:)); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/FE-HPM/FE_HPM.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % FE_HPM fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images by % exploiting the high pass modulation injection model and the estimated filter via deconvolution. % % Interface: % [I_Fus,D,PSF_l] = FE_HPM(I_MS,I_PAN,ratio,tap,lambda,mu,th,num_iter,filtername) % % Inputs: % I_MS: MS image upsampled at PAN scale; % I_PAN: PAN image; % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value. % tap: Filter support; % lambda: Coefficient for weighting the energy regularization term; % mu: Coefficient for weighting the derivative regularization terms; % th: Threshold on the kernel (it cuts to 0 values below threshold); % num_iter_max: Max number of iteration (at least 3; not sensitive); % filtername: Kind of derivative (default: 'Basic') % % Outputs: % I_Fus,D: Pansharpened image; % PSF_l: Estimated point spread function. % % Reference: % [Vivone15] G. Vivone, M. Simoes, M. Dalla Mura, R. Restaino, J. Bioucas-Dias, G. A. Licciardi, and J. Chanussot, "Pansharpening based on semiblind deconvolution", % IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 4, pp. 1997-2010, 2015. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. % % % % % % % % % % % % % % % Version: 1 % % % % % % % % % % % % % % % % Copyright (C) 2019 % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function [I_Fus,PSF_l] = FE_HPM(I_MS,I_PAN,ratio,tap,lambda,mu,th,num_iter,filtername) imageHR = double(I_PAN); I_MS = double(I_MS); nBands = size(I_MS,3); %%% Equalization imageHR = repmat(imageHR,[1 1 size(I_MS,3)]); for ii = 1 : size(I_MS,3) imageHR(:,:,ii) = (imageHR(:,:,ii) - mean2(imageHR(:,:,ii))).*(std2(I_MS(:,:,ii))./std2(imageHR(:,:,ii))) + mean2(I_MS(:,:,ii)); end PSF_l = FE(I_MS,I_PAN,ratio,tap,lambda,mu,th,num_iter,filtername); PAN_LP = zeros(size(imageHR)); for ii = 1 : nBands PAN_LP(:,:,ii) = imfilter(imageHR(:,:,ii),PSF_l,'replicate'); t = imresize(PAN_LP(:,:,ii),1/ratio,'nearest'); PAN_LP(:,:,ii) = interp23tap(t,ratio); end PAN_LP = double(PAN_LP); I_Fus = I_MS .* (imageHR ./ (PAN_LP + eps)); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/FR_Assessment.tex ================================================ ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/GLP/GS2_GLP.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % GS2_GLP fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images by % exploiting the Gram-Schmidt (GS) mode 2 algorithm with Generalized Laplacian Pyramid (GLP) decomposition. % % Interface: % I_Fus_GS2_GLP = GS2_GLP(I_MS,I_PAN,ratio,sensor) % % Inputs: % I_MS: MS image upsampled at PAN scale; % I_PAN: PAN image; % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value; % sensor: String for type of sensor (e.g. 'WV2','IKONOS'). % % Outputs: % I_Fus_GS2_GLP: GS2_GLP pasharpened image. % % References: % [Aiazzi06] B. Aiazzi, L. Alparone, S. Baronti, A. Garzelli, and M. Selva, MTF-tailored multiscale fusion of high-resolution MS and Pan imagery, % Photogrammetric Engineering and Remote Sensing, vol. 72, no. 5, pp. 591596, May 2006. % [Alparone07] L. Alparone, L. Wald, J. Chanussot, C. Thomas, P. Gamba, and L. M. Bruce, Comparison of pansharpening algorithms: Outcome % of the 2006 GRS-S Data Fusion Contest, IEEE Transactions on Geoscience and Remote Sensing, vol. 45, no. 10, pp. 30123021, % October 2007. % [Vivone15] G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, % IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. % % % % % % % % % % % % % % % Version: 1 % % % % % % % % % % % % % % % % Copyright (C) 2019 % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function I_Fus_GS2_GLP = GS2_GLP(I_MS,I_PAN,ratio,sensor) imageLR = double(I_MS); imageHR = double(I_PAN); imageHR = repmat(imageHR,[1 1 size(imageLR,3)]); h = genMTF(ratio, sensor, size(I_MS,3)); PAN_LP = zeros(size(I_MS)); for ii = 1 : size(I_MS,3) PAN_LP(:,:,ii) = imfilter(imageHR(:,:,ii),real(h(:,:,ii)),'replicate'); t = imresize(PAN_LP(:,:,ii),1/ratio,'nearest'); PAN_LP(:,:,ii) = interp23tap(t,ratio); end PAN_LP = double(PAN_LP); %%% Coefficients g = ones(1,size(I_MS,3)); for ii = 1 : size(I_MS,3) h = imageLR(:,:,ii); h2 = PAN_LP(:,:,ii); c = cov(h2(:),h(:)); g(ii) = c(1,2)/var(h2(:)); end %%% Detail Extraction delta = imageHR - PAN_LP; I_Fus_GS2_GLP = zeros(size(imageLR)); for ii = 1 : size(imageLR,3) I_Fus_GS2_GLP(:,:,ii) = imageLR(:,:,ii) + delta(:,:,ii) .* g(ii); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/GLP/MTF_GLP.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % MTF_GLP fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images by % exploiting the Modulation Transfer Function - Generalized Laplacian Pyramid (MTF-GLP) algorithm. % % Interface: % I_Fus_MTF_GLP = MTF_GLP(I_MS,I_PAN,sensor,ratio) % % Inputs: % I_MS: MS image upsampled at PAN scale; % I_PAN: PAN image; % sensor: String for type of sensor (e.g. 'WV2','IKONOS'); % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value. % % Outputs: % I_Fus_MTF_GLP: MTF_GLP pansharpened image. % % References: % [Aiazzi02] B. Aiazzi, L. Alparone, S. Baronti, and A. Garzelli, Context-driven fusion of high spatial and spectral resolution images based on % oversampled multiresolution analysis, IEEE Transactions on Geoscience and Remote Sensing, vol. 40, no. 10, pp. 23002312, October % 2002. % [Aiazzi06] B. Aiazzi, L. Alparone, S. Baronti, A. Garzelli, and M. Selva, MTF-tailored multiscale fusion of high-resolution MS and Pan imagery, % Photogrammetric Engineering and Remote Sensing, vol. 72, no. 5, pp. 591596, May 2006. % [Vivone14a] G. Vivone, R. Restaino, M. Dalla Mura, G. Licciardi, and J. Chanussot, Contrast and error-based fusion schemes for multispectral % image pansharpening, IEEE Geoscience and Remote Sensing Letters, vol. 11, no. 5, pp. 930934, May 2014. % [Vivone15a] G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, % IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015. % [Alparone17] L. Alparone, A. Garzelli, and G. Vivone, "Intersensor statistical matching for pansharpening: Theoretical issues and practical solutions", % IEEE Transactions on Geoscience and Remote Sensing, vol. 55, no. 8, pp. 4682-4695, 2017. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. % % % % % % % % % % % % % % % Version: 2 % % % % % % % % % % % % % % % % Copyright (C) 2019 % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function I_Fus_MTF_GLP = MTF_GLP(I_MS,I_PAN,sensor,ratio) imageHR = double(I_PAN); I_MS = double(I_MS); %%% Equalization imageHR = repmat(imageHR,[1 1 size(I_MS,3)]); for ii = 1 : size(I_MS,3) imageHR(:,:,ii) = (imageHR(:,:,ii) - mean2(imageHR(:,:,ii))).*(std2(I_MS(:,:,ii))./std2(LPfilterGauss(imageHR(:,:,ii),ratio))) + mean2(I_MS(:,:,ii)); end h = genMTF(ratio, sensor, size(I_MS,3)); PAN_LP = zeros(size(I_MS)); for ii = 1 : size(I_MS,3) PAN_LP(:,:,ii) = imfilter(imageHR(:,:,ii),real(h(:,:,ii)),'replicate'); t = imresize(PAN_LP(:,:,ii),1/ratio,'nearest'); PAN_LP(:,:,ii) = interp23tap(t,ratio); end I_Fus_MTF_GLP = I_MS + imageHR - PAN_LP; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/GLP/MTF_GLP_FS.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % MTF_GLP_FS fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images by % exploiting the Modulation Transfer Function - Generalized Laplacian Pyramid (MTF-GLP) and a new Full Resolution Regression-based injection model. % % Interface: % I_Fus_MTF_GLP_FS = MTF_GLP_FS(I_MS,I_PAN,sensor,ratio) % % Inputs: % I_MS: MS image upsampled at PAN scale; % I_PAN: PAN image; % sensor: String for type of sensor (e.g. 'WV2','IKONOS'); % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value. % % Outputs: % I_Fus_MTF_GLP_FS: Pansharpened image. % % Reference: % [Vivone18] G. Vivone, R. Restaino,and J. Chanussot, "Full scale regression-based injection coefficients for panchromatic sharpening," % IEEE Transactions on Image Processing, vol. 27, no. 7, pp. 3418-3431, Jul. 2018. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. % % % % % % % % % % % % % % % Version: 1 % % % % % % % % % % % % % % % % Copyright (C) 2019 % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function I_Fus_MTF_GLP_FS = MTF_GLP_FS(I_MS,I_PAN,sensor,ratio) imageHR = double(I_PAN); I_MS = double(I_MS); h = genMTF(ratio, sensor, size(I_MS,3)); I_Fus_MTF_GLP_FS = zeros(size(I_MS)); for ii = 1 : size(I_MS,3) %%% Low resolution PAN image PAN_LP = imfilter(imageHR,real(h(:,:,ii)),'replicate'); t = imresize(PAN_LP,1/ratio,'nearest'); PAN_LP = interp23tap(t,ratio); %%% Injection coefficient for band ii MSB = I_MS(:,:,ii); CMSPAN = cov(MSB(:), imageHR(:)); CPANPANLR = cov(PAN_LP(:), imageHR(:)); gFS = CMSPAN(1,2)./CPANPANLR(1,2); %%% Fusion rule I_Fus_MTF_GLP_FS(:,:,ii) = I_MS(:,:,ii) + gFS .* (imageHR - PAN_LP); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/GLP/MTF_GLP_HPM.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % MTF_GLP_HPM fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images by % exploiting the Modulation Transfer Function - Generalized Laplacian Pyramid (MTF-GLP) with High Pass Modulation (HPM) injection model algorithm. % % Interface: % I_Fus_MTF_GLP_HPM = MTF_GLP_HPM(I_MS,I_PAN,sensor,ratio) % % Inputs: % I_MS: MS image upsampled at PAN scale; % I_PAN: PAN image; % sensor: String for type of sensor (e.g. 'WV2','IKONOS'); % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value. % % Outputs: % I_Fus_MTF_GLP_HPM: MTF_GLP_HPM pansharpened image. % % References: % [Aiazzi03] B. Aiazzi, L. Alparone, S. Baronti, A. Garzelli, and M. Selva, An MTF-based spectral distortion minimizing model for Pan-sharpening % of very high resolution multispectral images of urban areas, in Proceedings of URBAN 2003: 2nd GRSS/ISPRS Joint Workshop on % Remote Sensing and Data Fusion over Urban Areas, 2003, pp. 9094. % [Aiazzi06] B. Aiazzi, L. Alparone, S. Baronti, A. Garzelli, and M. Selva, MTF-tailored multiscale fusion of high-resolution MS and Pan imagery, % Photogrammetric Engineering and Remote Sensing, vol. 72, no. 5, pp. 591596, May 2006. % [Vivone14a] G. Vivone, R. Restaino, M. Dalla Mura, G. Licciardi, and J. Chanussot, Contrast and error-based fusion schemes for multispectral % image pansharpening, IEEE Geoscience and Remote Sensing Letters, vol. 11, no. 5, pp. 930934, May 2014. % [Vivone15] G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, % IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015. % [Alparone17] L. Alparone, A. Garzelli, and G. Vivone, "Intersensor statistical matching for pansharpening: Theoretical issues and practical solutions", % IEEE Transactions on Geoscience and Remote Sensing, vol. 55, no. 8, pp. 4682-4695, 2017. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. % % % % % % % % % % % % % % % Version: 1 % % % % % % % % % % % % % % % % Copyright (C) 2019 % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function I_Fus_MTF_GLP_HPM = MTF_GLP_HPM(I_MS,I_PAN,sensor,ratio) imageHR = double(I_PAN); I_MS = double(I_MS); %%% Equalization imageHR = repmat(imageHR,[1 1 size(I_MS,3)]); for ii = 1 : size(I_MS,3) imageHR(:,:,ii) = (imageHR(:,:,ii) - mean2(imageHR(:,:,ii))).*(std2(I_MS(:,:,ii))./std2(LPfilterGauss(imageHR(:,:,ii),ratio))) + mean2(I_MS(:,:,ii)); end h = genMTF(ratio, sensor, size(I_MS,3)); PAN_LP = zeros(size(I_MS)); for ii = 1 : size(I_MS,3) PAN_LP(:,:,ii) = imfilter(imageHR(:,:,ii),real(h(:,:,ii)),'replicate'); t = imresize(PAN_LP(:,:,ii),1/ratio,'nearest'); PAN_LP(:,:,ii) = interp23tap(t,ratio); end I_Fus_MTF_GLP_HPM = I_MS .* (imageHR ./ (PAN_LP + eps)); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/GLP/MTF_GLP_HPM_Haze_min.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Gaussian Laplacian Pyramid with high pass modulation injection model haze corrected. % % Interface: % I_Fus_MTF_GLP_HPM = MTF_GLP_HPM_Haze_min(I_PAN,I_MS,sensor,ratio,decimation) % % Inputs: % I_MS: MS image upsampled at PAN scale; % I_PAN: PAN image; % sensor: String for type of sensor (e.g. 'WV2','IKONOS'); % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value; % decimation: Flag decimation (1: decimated PAN_LP). % % Outputs: % I_Fus_MTF_GLP_HPM: Pansharpened image. % % References: % [Lolli17] S. Lolli, L. Alparone, A. Garzelli, and G. Vivone, "Haze correction for contrast-based multispectral pansharpening", % IEEE Geoscience and Remote Sensing Letters, vol. 14, no. 12, pp. 2255-2259, 2017. % [Garzelli18] A. Garzelli, B. Aiazzi, L. Alparone, S. Lolli, and G. Vivone, % "Multispectral Pansharpening with Radiative Transfer-Based Detail-Injection Modeling for Preserving Changes in Vegetation Cover", % MDPI Remote Sensing, vol. 10, no. 8, pp. 1 - 18, 2018. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. % % % % % % % % % % % % % % % Version: 1 % % % % % % % % % % % % % % % % Copyright (C) 2019 % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function I_Fus_MTF_GLP_HPM = MTF_GLP_HPM_Haze_min(I_MS,I_PAN,sensor,ratio,decimation) if size(I_MS,3) == 4 prc = 1; minMS = zeros(1,1,4); B = I_MS(:,:,1); G = I_MS(:,:,2); R = I_MS(:,:,3); NIR = I_MS(:,:,4); minMS(1,1,1) = 0.95 * prctile(B(:),prc); minMS(1,1,2) = 0.45 * prctile(G(:),prc); minMS(1,1,3) = 0.40 * prctile(R(:),prc); minMS(1,1,4) = 0.05 * prctile(NIR(:),prc); else minMS = zeros(1,1,size(I_MS,3)); for ii = 1 : size(I_MS, 3) minMS(1,1,ii) = min(min(I_MS(:,:,ii))); end end I_PAN_LR = LPfilterGauss(I_PAN,ratio); w = estimation_alpha(cat(3,ones(size(I_PAN_LR)),I_MS),I_PAN_LR,'global'); wp = w' * [1;squeeze(minMS)]; L = repmat(minMS, [size(I_MS,1) size(I_MS,2)]); Lp = wp .* ones([size(I_MS,1) size(I_MS,2)]); imageHR = double(I_PAN); I_MS = double(I_MS); %%% Equalization imageHR = repmat(imageHR,[1 1 size(I_MS,3)]); PAN_LP = MTF(imageHR,sensor,ratio); if decimation for ii = 1 : size(I_MS,3) t = imresize(PAN_LP(:,:,ii),1/ratio,'nearest'); PAN_LP(:,:,ii) = interp23tap(t,ratio); end end P_PL = (imageHR - Lp) ./ (PAN_LP - Lp + eps); MS_L = I_MS - L; I_Fus_MTF_GLP_HPM = MS_L .* P_PL + L; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/GLP/MTF_GLP_HPM_R.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % A Regression-Based High-Pass Modulation Pansharpening Approach (Global Version) % % Interface: % I_Fus_MTF_GLP_HPM_R = MTF_GLP_HPM_R(I_MS,I_PAN,sensor,ratio) % % Inputs: % I_MS: MS image upsampled at PAN scale; % I_PAN: PAN image; % sensor: String for type of sensor (e.g. 'WV2','IKONOS'); % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value. % % Outputs: % I_Fus_MTF_GLP_HPM_R: Pansharpened image. % % Reference: % [Vivone18] G. Vivone, R. Restaino, and J. Chanussot, "A regression-based high-pass modulation pansharpening approach," % IEEE Transactions on Geoscience and Remote Sensing, vol. 56, no. 2, pp. 984-996, Feb. 2018. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. % % % % % % % % % % % % % % % Version: 1 % % % % % % % % % % % % % % % % Copyright (C) 2019 % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function I_Fus_MTF_GLP_HPM_R = MTF_GLP_HPM_R(I_MS,I_PAN,sensor,ratio) imageHR = double(I_PAN); I_MS = double(I_MS); h = genMTF(ratio, sensor, size(I_MS,3)); I_Fus_MTF_GLP_HPM_R = zeros(size(I_MS)); for ii = 1 : size(I_MS,3) %%% Low resolution PAN image PAN_LP = imfilter(imageHR,real(h(:,:,ii)),'replicate'); t = imresize(PAN_LP,1/ratio,'nearest'); PAN_LP = interp23tap(t,ratio); %%%% Regression coefficients MSB = I_MS(:,:,ii); C = cov(MSB(:),PAN_LP(:)); g = C(1,2)./C(2,2); cb = mean(MSB(:))./g - mean(imageHR(:)); %%% Fusion rule I_Fus_MTF_GLP_HPM_R(:,:,ii) = I_MS(:,:,ii) .* (imageHR + cb) ./ (PAN_LP + cb + eps); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/GS/GS.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % GS fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images by % exploiting the Gram-Schmidt (GS) transformation. % % Interface: % I_Fus_GS = GS(I_MS,I_PAN) % % Inputs: % I_MS: MS image upsampled at PAN scale; % I_PAN: PAN image. % % Outputs: % I_Fus_GS: GS pasharpened image. % % References: % [Laben00] C. A. Laben and B. V. Brower, Process for enhancing the spatial resolution of multispectral imagery using pan-sharpening, Eastman % Kodak Company, Tech. Rep. US Patent # 6,011,875, 2000. % [Aiazzi07] B. Aiazzi, S. Baronti, and M. Selva, Improving component substitution Pansharpening through multivariate regression of MS+Pan % data, IEEE Transactions on Geoscience and Remote Sensing, vol. 45, no. 10, pp. 32303239, October 2007. % [Vivone15] G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, % IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. % % % % % % % % % % % % % % % Version: 1 % % % % % % % % % % % % % % % % Copyright (C) 2019 % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function I_Fus_GS = GS(I_MS,I_PAN) imageLR = double(I_MS); imageHR = double(I_PAN); %%% Remove means from imageLR imageLR0 = zeros(size(I_MS)); for ii = 1 : size(I_MS,3), imageLR0(:,:,ii) = imageLR(:,:,ii) - mean2(imageLR(:,:,ii)); end %%% Intensity I = mean(imageLR,3); %%% Remove mean from I I0 = I - mean2(I); imageHR = (imageHR - mean2(imageHR)) .* (std2(I0)./std2(imageHR)) + mean2(I0); %%% Coefficients g = ones(1,1,size(I_MS,3)+1); for ii = 1 : size(I_MS,3) h = imageLR0(:,:,ii); c = cov(I0(:),h(:)); g(1,1,ii+1) = c(1,2)/var(I0(:)); end %%% Detail Extraction delta = imageHR - I0; deltam = repmat(delta(:),[1 size(I_MS,3)+1]); %%% Fusion V = I0(:); for ii = 1 : size(I_MS,3) h = imageLR0(:,:,ii); V = cat(2,V,h(:)); end gm = zeros(size(V)); for ii = 1 : size(g,3) gm(:,ii) = squeeze(g(1,1,ii)) .* ones(size(I_MS,1).*size(I_MS,2),1); end V_hat = V + deltam .* gm; %%% Reshape fusion result I_Fus_GS = reshape(V_hat(:,2:end),[size(I_MS,1) size(I_MS,2) size(I_MS,3)]); % Final Mean Equalization for ii = 1 : size(I_MS,3) h = I_Fus_GS(:,:,ii); I_Fus_GS(:,:,ii) = h - mean2(h) + mean2(imageLR(:,:,ii)); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/GS/GSA.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % GSA fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images by % exploiting the Gram-Schmidt Adaptive (GSA) algorithm. % % Interface: % I_Fus_GSA = GSA(I_MS,I_PAN,I_MS_LR,ratio) % % Inputs: % I_MS: MS image upsampled at PAN scale; % I_PAN: PAN image; % I_MS_LR: MS image; % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value. % % Outputs: % I_Fus_GSA: GSA pasharpened image. % % References: % [Aiazzi07] B. Aiazzi, S. Baronti, and M. Selva, Improving component substitution Pansharpening through multivariate regression of MS+Pan % data, IEEE Transactions on Geoscience and Remote Sensing, vol. 45, no. 10, pp. 32303239, October 2007. % [Vivone15] G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, % IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. % % % % % % % % % % % % % % % Version: 1 % % % % % % % % % % % % % % % % Copyright (C) 2019 % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function I_Fus_GSA = GSA(I_MS,I_PAN,I_MS_LR,ratio) imageLR = double(I_MS); imageHR = double(I_PAN); imageLR_LP = double(I_MS_LR); %%% Remove means from imageLR imageLR0 = zeros(size(I_MS)); for ii = 1 : size(I_MS,3), imageLR0(:,:,ii) = imageLR(:,:,ii) - mean2(imageLR(:,:,ii)); end %%% Remove means from imageLR_LP imageLR_LP0 = zeros(size(I_MS_LR)); for ii = 1 : size(I_MS_LR,3), imageLR_LP0(:,:,ii) = imageLR_LP(:,:,ii) - mean2(imageLR_LP(:,:,ii)); end %%% Intensity imageHR0 = imageHR - mean2(imageHR); imageHR0 = LPfilterPlusDec(imageHR0,ratio); alpha(1,1,:) = estimation_alpha(cat(3,imageLR_LP0,ones(size(I_MS_LR,1),size(I_MS_LR,2))),imageHR0,'global'); I = sum(cat(3,imageLR0,ones(size(I_MS,1),size(I_MS,2))) .* repmat(alpha,[size(I_MS,1) size(I_MS,2) 1]),3); %%% Remove mean from I I0 = I - mean2(I); %%% Coefficients g = ones(1,1,size(I_MS,3)+1); for ii = 1 : size(I_MS,3) h = imageLR0(:,:,ii); c = cov(I0(:),h(:)); g(1,1,ii+1) = c(1,2)/var(I0(:)); end imageHR = imageHR - mean2(imageHR); %%% Detail Extraction delta = imageHR - I0; deltam = repmat(delta(:),[1 size(I_MS,3)+1]); %%% Fusion V = I0(:); for ii = 1 : size(I_MS,3) h = imageLR0(:,:,ii); V = cat(2,V,h(:)); end gm = zeros(size(V)); for ii = 1 : size(g,3) gm(:,ii) = squeeze(g(1,1,ii)) .* ones(size(I_MS,1).*size(I_MS,2),1); end V_hat = V + deltam .* gm; %%% Reshape fusion result I_Fus_GSA = reshape(V_hat(:,2:end),[size(I_MS,1) size(I_MS,2) size(I_MS,3)]); %%% Final Mean Equalization for ii = 1 : size(I_MS,3) h = I_Fus_GSA(:,:,ii); I_Fus_GSA(:,:,ii) = h - mean2(h) + mean2(imageLR(:,:,ii)); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/GS/GS_Segm.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % GS_Segm fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images by % exploiting the segmentation-based version of the Gram-Schmidt algorithm. % % Interface: % PanSharpenedImage = GS_Segm(I_MS,I_PAN,I_LR_input,S) % % Inputs: % I_MS: MS image upsampled at PAN scale % I_PAN: PAN image % I_LR_input: Low Resolution PAN Image % S: Segmentation % % Outputs: % PanSharpenedImage: Pasharpened image % % Reference: % [Restaino17] R. Restaino, M. Dalla Mura, G. Vivone, J. Chanussot, Context-Adaptive Pansharpening Based on Image Segmentation, % IEEE Transactions on Geoscience and Remote Sensing, vol. 55, no. 2, pp. 753766, February 2017. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. % % % % % % % % % % % % % % % Version: 1 % % % % % % % % % % % % % % % % Copyright (C) 2019 % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function PanSharpenedImage = GS_Segm(I_MS,I_PAN,I_LR_input,S) I_MS = double(I_MS); I_PAN = repmat(double(I_PAN), [1, 1, size(I_MS,3)]); I_LR_input = double(I_LR_input); if size(I_LR_input, 3) == 1 I_LR_input = repmat(I_LR_input, [1, 1, size(I_MS,3)]); end if size(I_LR_input, 3) ~= size(I_PAN, 3) error('I_LP should have the same number of bands as PAN'); end DetailsHRPan = I_PAN - I_LR_input; Coeff = zeros(size(I_MS)); labels = unique(S); for ii = 1: size(I_MS,3) MS_Band = squeeze(I_MS(:,:,ii)); I_LR_Band = squeeze(I_LR_input(:,:,ii)); Coeff_Band = zeros(size(I_LR_Band)); for il=1:length(labels) idx = S==labels(il); c = cov(I_LR_Band(idx),MS_Band(idx)); Coeff_Band(idx) = c(1,2)/var(I_LR_Band(idx)); end Coeff(:,:,ii) = Coeff_Band; end PanSharpenedImage = Coeff .* DetailsHRPan + I_MS; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/MF/MF_HG_Pansharpen.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Morphological Pyramid Decomposition using Half-Gradient. % % Interface: % I_Fus_MF_HG = MF_HG_Pansharpen(I_MS,I_PAN,ratio) % % Inputs: % I_MS: MS image upsampled at PAN scale; % I_PAN: PAN image; % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value. % % Outputs: % I_Fus_MF_HG: Morphological Half Gradient (HG) pansharpened image. % % Reference: % [Restaino16] R. Restaino, G. Vivone, M. Dalla Mura, and J. Chanussot, Fusion of Multispectral and Panchromatic Images Based on Morphological Operators, % IEEE Transactions on Image Processing, vol. 25, no. 6, pp. 2882-2895, Jun. 2016. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. % % % % % % % % % % % % % % % Version: 1 % % % % % % % % % % % % % % % % Copyright (C) 2019 % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function I_Fus_MF_HG = MF_HG_Pansharpen(I_MS,I_PAN,ratio) imageLR = double(I_MS); imageHR = double(I_PAN); % Equalization imageHR = repmat(imageHR,[1 1 size(imageLR,3)]); for ii = 1 : size(imageLR,3) imageHR(:,:,ii) = (imageHR(:,:,ii) - mean2(imageHR(:,:,ii))).*(std2(imageLR(:,:,ii))./std2(imageHR(:,:,ii))) + mean2(imageLR(:,:,ii)); end % Structuring Element choice textse= [0 1 0; 1 1 1; 0 1 0]; % Interpolation Method int_meth='bilinear'; % Number of levels lev=ceil(log2(ratio))+1; % Image Construction P = Pyr_Dec(imageHR,textse,lev,int_meth); % Fusion P_LP = P(:,:,:,lev); I_Fus_MF_HG = imageLR .* (P(:,:,:,1)./(P_LP+eps)); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/MF/Pyr_Dec.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Morphological Pyramid Decomposition using Half-Gradient. % % Interface: % P = Pyr_Dec(Im,textse,lev,int_meth) % % Inputs: % Im: Image to decompose; % textse: Structuring Element; % lev: Number of decomposition levels; % int_meth: Interpolation method. % % Outputs: % P: Morphological Pyramid using Half-Gradient. % % References: % [Vivone14] G. Vivone, R. Restaino, M. Dalla Mura, G. Licciardi, and J. Chanussot, Contrast and error-based fusion schemes for multispectral % image pansharpening, IEEE Geoscience and Remote Sensing Letters, vol. 11, no. 5, pp. 930934, May 2014. % [Vivone15] G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, % IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 2565-2586, May 2015. % [Restaino16] R. Restaino, G. Vivone, M. Dalla Mura, and J. Chanussot, Fusion of Multispectral and Panchromatic Images Based on Morphological Operators, % IEEE Transactions on Image Processing, vol. 25, no. 6, pp. 2882-2895, Jun. 2016. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function P = Pyr_Dec(Im,textse,lev,int_meth) P(:,:,:,1) = Im; Sizes(1,:)=[size(Im,1), size(Im,2)]; imageI_new=P(:,:,:,1); first=1; for ii = 2 : lev imageI_old = imageI_new; clear imageI_new % Half Gradient PD = imdilate(imageI_old,textse); PE= imerode(imageI_old,textse); rho_minus=imageI_old-PE; rho_plus=PD-imageI_old; D=rho_minus-rho_plus; PS = imageI_old -0.5*D; % PS = 0.5*squeeze(PD+PE); %equivalently % Downsampling if first for il=1:size(imageI_old,3) imageI_new(:,:,il)=PS(2:2:end,2:2:end,il); end first=0; else for il=1:size(imageI_old,3) imageI_new(:,:,il)=PS(1:2:end,1:2:end,il); end end Sizes(ii,:)=[size(imageI_new,1) size(imageI_new,1)]; imageI_resized_old=imageI_new; for ir=ii:-1:2, for il=1:size(Im,3) imageI_resized_new(:,:,il) = imresize(imageI_resized_old(:,:,il),[Sizes(ir-1,1) Sizes(ir-1,2)],int_meth); end imageI_resized_old=imageI_resized_new; clear imageI_resized_new end if sum(isfinite(imageI_resized_old(:)))~=numel(imageI_resized_old) P(:,:,:,1:lev) =repmat(P(:,:,:,1),1,1,1,lev); break else P(:,:,:,ii) = imageI_resized_old; end clear imageI_resized_old end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PRACS/PRACS.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % PRACS fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images by % exploiting the Partial Replacement Adaptive CS (PRACS) algorithm. % % Interface: % I_Fus_PRACS = PRACS(I_MS,I_PAN,ratio) % % Inputs: % I_MS: MS image upsampled at PAN scale; % I_PAN: PAN image; % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value. % % Outputs: % I_Fus_PRACS: PRACS pansharpened image. % % References: % [Choi11] J. Choi, K. Yu, and Y. Kim, A new adaptive component-substitution-based satellite image fusion by using partial replacement, IEEE % Transactions on Geoscience and Remote Sensing, vol. 49, no. 1, pp. 295309, January 2011. % [Vivone15] G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, % IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. % % % % % % % % % % % % % % % Version: 1 % % % % % % % % % % % % % % % % Copyright (C) 2019 % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function I_Fus_PRACS = PRACS(I_MS,I_PAN,ratio) beta = 0.95; % for 11-bit data % beta = 1.95; % for 8-bit data I_MS = double(I_MS); I_PAN = double(I_PAN); [N,M,L] = size(I_MS); %%% Histogram matching of each MS band to Pan msexp_hm = zeros(N,M,L); for k=1:L b = I_MS(:,:,k); b = (b - mean2(b) + mean2(I_PAN)/std2(I_PAN)*std2(b)) * std2(I_PAN)/std2(b); b(b<0) = 0; msexp_hm(:,:,k) = b; end %%% Computing low-resolution Pan by bicubic decimation/interpolation aux = imresize(I_PAN,1/ratio); pan_l = imresize(aux,ratio); clear aux %%% Regression of Pan_low vs MS (with offset) bb = zeros(N*M,L); for k = 1:L bb(:,k) = reshape(squeeze(msexp_hm(:,:,k)),[N*M,1]); end bb = [ones(N*M,1),bb]; alpha = regress(pan_l(:),bb); %%% Initial estimate of intensity aux = bb * alpha; I_l = reshape(aux,[N,M]); clear aux clear bb %%% Partial Replacement I_h = zeros(N,M,L); cc = zeros(1,L); for k=1:L b = msexp_hm(:,:,k); cc(k) = corr2(I_l(:),b(:)); aux = cc(k)*I_PAN(:)+(1-cc(k))*b(:); I_h(:,:,k) = reshape(aux,[N,M]); end clear aux %%% Band-dependent intensity %%% For each band, compute low-resolution I_h by bicubic decimation/interpolation I_h_low = zeros(N,M,L); for k=1:L aux = imresize(I_h(:,:,k),1/ratio); I_h_low(:,:,k) = imresize(aux,ratio); end clear aux %%% %%% Regression of I_h_low_k vs MS (with offset) alpha = zeros(L+1,L); for k = 1:L bb(:,k) = reshape(squeeze(msexp_hm(:,:,k)),[N*M,1]); end bb = [ones(N*M,1),bb]; for k=1:L aux = I_h_low(:,:,k); alpha(:,k) = regress(aux(:),bb); end clear aux %%% Intensities I_l_prime = zeros(N,M,L); for k=1:L aux = bb * alpha(:,k); I_l_prime(:,:,k) = reshape(aux,[N,M]); end clear aux %%% Computing detail images delta = zeros(N,M,L); for k=1:L delta(:,:,k)= I_h(:,:,k)-I_l_prime(:,:,k)-(mean2(I_h(:,:,k))-mean2(I_l_prime(:,:,k))); end %%% Computing mean of std. devs. aux3 = zeros(1,L); for k=1:L aux3(k) = std2(I_MS(:,:,k)); end aux3 = mean(aux3); %%% Computing weights w = zeros(1,L); for k=1:L aux1 = I_l_prime(:,:,k); b = I_MS(:,:,k); w(k) = beta .* corr2(aux1(:),b(:))*std(b(:))/aux3;%std(aux2(:)); end %%% Computing local instability adjustment parameter L_I = zeros(N,M,L); for k=1:L b = I_MS(:,:,k); I = I_l_prime(:,:,k); aux = 1-abs(1-corr2(I_l(:),b(:))*b(:)./I(:)); L_I(:,:,k) = reshape(aux,[N,M]); end %%% Computing pansharpened image det = zeros(N,M,L); I_Fus_PRACS = zeros(N,M,L); for k=1:L det(:,:,k) = w(k) * L_I(:,:,k) .* delta(:,:,k); I_Fus_PRACS(:,:,k) = I_MS(:,:,k) + det(:,:,k); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/PWMBF.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Model-based fusion using PCA and wavelets. % % Interface: % Z = PWMBF(Pan,Low,ratio,r,wavelet,degrade,reduced,whiten) % % Inputs: % Pan : Panchromatic image; % Low: Low spatial resolution MS image; % ratio: Scale ratio between Pan and Low; % r: Number of principal components; % wavelet: flag; % degrade: flag. % % Output: % Z: Pansharpened image; % % References: % [Palsson15] F. Palsson, J.R. Sveinsson, M.O. Ulfarsson, J.A. Benediktsson, "Model-based fusion of multi-and hyperspectral images using PCA and wavelets", % IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 2652-2663, May 2015. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function Z = PWMBF(Pan,Low,ratio,r,wavelet,degrade) addpath(sprintf('%s/rwt/bin',pwd)) % Wavelet parameters L=4; type='rwt'; Low=double(Low); Pan=double(Pan); N=size(Pan,1); Q=size(Pan,3); nb=size(Low,3); if(r>nb) error('Number of PCs greater than number of bands'); end X=Pan; Ylow=Low; if(degrade) X=imresize(Pan,1/ratio); Ylow=imresize(Low,1/ratio); N=N/4; end % Upsample Y Y=imresize(Ylow,ratio,'bicubic'); % Degrade X Xtilde=imresize(imresize(X,1/ratio,'bilinear'),ratio,'bicubic'); X=reshape(X,[N^2 Q]); Xtilde=reshape(Xtilde,[N^2 Q]); Y=reshape(Y,[N^2 nb]); % PCA transform [F, D, R]=svd(Y,'econ'); G=F*D; U=R; wfilter=daubcqf(4,'min'); if wavelet x=compute_PhiTX(Xtilde,L,wfilter,type); x0=compute_PhiTX(X,L,wfilter,type); y=compute_PhiTX(G(:,1:r),L,wfilter,type); yl=y(1:N^2,:); zh=zeros(3*L*N^2,r); for p=1:r for j=1:3*L xh=x(j*N^2+1:(j+1)*N^2,:); xh0=x0(j*N^2+1:(j+1)*N^2,:); yh=y(j*N^2+1:(j+1)*N^2,p); Cyy=yh'*yh/N^2; Cyx=yh'*xh/N^2; Cxx=xh'*xh/N^2; Cn=diag(mad(abs(yh))/0.6745).^2; inv_Cxx=inv(Cxx); Cy_x=Cyy-Cyx*inv_Cxx*Cyx'; if Q>1 CyxiCxx=Cyx*inv_Cxx; mu_zx=xh*CyxiCxx'; mu_zx0=xh0*CyxiCxx'; else mu_zx=repmat((Cyx*inv_Cxx)',[N^2 1]).*xh; mu_zx0=repmat((Cyx*inv_Cxx)',[N^2 1]).*xh0; end ymu=yh-mu_zx; CC=Cy_x*inv(Cy_x+Cn); zh((j-1)*N^2+1:N^2+(j-1)*N^2,p)=mu_zx0+ymu*CC; end end z=[yl;zh]; B=compute_PhiX(z,L,wfilter,type); deg=0; if deg == 1 U = U(:,1:r); Zhat=B*U'; else G(:,1:r)=B; Zhat=G*U'; end else Cn=0; yh=G(:,1:r); xh=Xtilde; xh0=X; Cyy=yh'*yh/N^2; Cyx=yh'*xh/N^2; Cxx=xh'*xh/N^2; inv_Cxx=inv(Cxx); Cy_x=Cyy-Cyx*inv_Cxx*Cyx'; CyxiCxx=Cyx*inv_Cxx; mu_zx=xh*CyxiCxx'; mu_zx0=xh0*CyxiCxx'; ymu=yh-mu_zx; CC=Cy_x/(Cy_x+Cn); B=mu_zx0+ymu*CC; G(:,1:r)=B; Zhat=G*U'; end Z=reshape(Zhat,[N N nb]); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/compute_PhiTX.m ================================================ function PhiTX=compute_PhiTX(X,L,h,type) vec = @(x) x(:); [M,T]=size(X); switch lower(type) case 'dwt2' PhiTX=zeros(size(X)); for k=1:T PhiTX(:,k)=vec(FWT2_PO(reshape(X(:,k),[sqrt(M) sqrt(M)]),log2(sqrt(M))-L,h)); end case 'dwt' PhiTX=zeros(size(X)); for k=1:T PhiTX(:,k)=vec(mdwt(reshape(X(:,k),[sqrt(M) sqrt(M)]),h,L)); end case 'rwt' PhiTX=zeros((3*L+1)*M,T); for k=1:T [xl xh L]=mrdwt(reshape(X(:,k),[sqrt(M) sqrt(M)]),h,L); PhiTX(:,k)=vec([xl xh])/2; end case 'swt' PhiTX=zeros((3*L+1)*M,T); for k=1:T PhiTX(:,k)=vec(myswt2(reshape(X(:,k),[sqrt(M) sqrt(M)]),L,'db4')); end case 'iso' PhiTX=zeros((L+1)*M,T); for k=1:T PhiTX(:,k)=vec(cell2mat(atrousdec(reshape(X(:,k),[sqrt(M) sqrt(M)]),'maxflat',L))); end case 'cwt' J=L; [Faf, Fsf] = FSfarras; % 1st stage anal. & synth. filters [af, sf] = dualfilt1; for k=1:T w=dualtree2D(reshape(X(:,k),[sqrt(M) sqrt(M)]),J,Faf,af); W=[]; for j=1:J W=[W' vec(w{j}{1}{1})']'; W=[W' vec(w{j}{1}{2})']'; W=[W' vec(w{j}{1}{3})']'; end W=[W' vec(w{J+1}{1})']'; for j=1:J W=[W' vec(w{j}{2}{1})']'; W=[W' vec(w{j}{2}{2})']'; W=[W' vec(w{j}{2}{3})']'; end W=[W' vec(w{J+1}{2})']'; PhiTX(:,k)=W; end case 'cplxdt' J=L; [Faf, Fsf] = FSfarras; % 1st stage anal. & synth. filters [af, sf] = dualfilt1; for k=1:T w=cplxdual2D(reshape(X(:,k),[sqrt(M) sqrt(M)]),J,Faf,af); W=[]; for j=1:J W=[W' vec(w{j}{1}{1}{1})']'; W=[W' vec(w{j}{1}{1}{2})']'; W=[W' vec(w{j}{1}{1}{3})']'; W=[W' vec(w{j}{1}{2}{1})']'; W=[W' vec(w{j}{1}{2}{2})']'; W=[W' vec(w{j}{1}{2}{3})']'; end W=[W' vec(w{J+1}{1}{1})']'; W=[W' vec(w{J+1}{1}{2})']'; for j=1:J W=[W' vec(w{j}{2}{1}{1})']'; W=[W' vec(w{j}{2}{1}{2})']'; W=[W' vec(w{j}{2}{1}{3})']'; W=[W' vec(w{j}{2}{2}{1})']'; W=[W' vec(w{j}{2}{2}{2})']'; W=[W' vec(w{j}{2}{2}{3})']'; end W=[W' vec(w{J+1}{2}{1})']'; W=[W' vec(w{J+1}{2}{2})']'; PhiTX(:,k)=W; end otherwise error(['Unknown method ' type]); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/compute_PhiX.m ================================================ function PhiX=compute_PhiX(X,L,h,type) vec = @(x) x(:); [N,r]=size(X); switch lower(type) case 'dwt' M=N; PhiX=zeros(size(X)); for k=1:r PhiX(:,k)=vec(midwt(reshape(X(:,k),[sqrt(M) sqrt(M)]),h,L)); end case 'dwt2' M=N; PhiX=zeros(size(X)); for k=1:r PhiX(:,k)=vec(IWT2_PO(reshape(X(:,k),[sqrt(M) sqrt(M)]),log2(sqrt(M))-L,h)); end case 'rwt' M=N/(3*L+1); PhiX=zeros(M,r); for k=1:r PhiX(:,k)=vec(mirdwt(reshape(X(1:M,k),[sqrt(M) sqrt(M)]),reshape(X(M+1:end,k),[sqrt(M) 3*L*sqrt(M)]),h,L))*2; end case 'swt' M=N/(3*L+1); PhiX=zeros(M,r); for k=1:r PhiX(:,k)=vec(iswt2(reshape(X(:,k),[sqrt(M) sqrt(M) 3*L+1]),'db4')); end case 'iso' M=N/(L+1); PhiX=zeros(M,r); for k=1:r xc=reshape(X(:,k),[sqrt(M) (L+1)*sqrt(M)]); xc=mat2cell(xc,[sqrt(M)],repmat(sqrt(M),[1 L+1])); PhiX(:,k)=vec(atrousrec(xc,'maxflat')); end case 'cwt' J=L; [Faf, Fsf] = FSfarras; % 1st stage anal. & synth. filters [af, sf] = dualfilt1; PhiX=zeros(size(X,1)/2,size(X,2)); n=sqrt(size(X,1)/2); for c=1:r W=X(:,c); j_offset=0; for j=1:J for k=1:3 w2{j}{1}{k}=reshape(W(j_offset+1+(k-1)*(n/2^j)^2:j_offset+k*(n/2^j)^2),[n/2^j n/2^j]); w2{j}{2}{k}=reshape(W(j_offset+n^2+1+(k-1)*(n/2^j)^2:j_offset+n^2+k*(n/2^j)^2),[n/2^j n/2^j]); end j_offset=j_offset+3*(n/2^j)^2; end w2{J+1}{1}=reshape(W(n^2-(n/2^(J))^2+1:n^2),[n/2^J n/2^J]); w2{J+1}{2}=reshape(W(2*n^2-(n/2^(J))^2+1:2*n^2),[n/2^J n/2^J]); PhiX(:,c)=vec(idualtree2D(w2,J,Fsf,sf)); end case 'cplxdt' J=L; [Faf, Fsf] = FSfarras; % 1st stage anal. & synth. filters [af, sf] = dualfilt1; PhiX=zeros(size(X,1)/4,size(X,2)); n=sqrt(size(X,1)/4); for c=1:r W=X(:,c); j_offset=0; for j=1:J l_offset=0; for l=1:2 for k=1:3 w2{j}{1}{l}{k}=reshape(W(j_offset+l_offset+1+(k-1)*(n/2^j)^2:j_offset+l_offset+k*(n/2^j)^2),[n/2^j n/2^j]); w2{j}{2}{l}{k}=reshape(W(j_offset+l_offset+2*n^2+1+(k-1)*(n/2^j)^2:j_offset+l_offset+2*n^2+k*(n/2^j)^2),[n/2^j n/2^j]); end l_offset=l_offset+3*(n/2^j)^2; end j_offset=j_offset+6*(n/2^j)^2; end w2{J+1}{1}{1}=reshape(W(2*n^2-2*(n/2^(J))^2+1:2*n^2-(n/2^(J))^2),[n/2^J n/2^J]); w2{J+1}{1}{2}=reshape(W(2*n^2-(n/2^(J))^2+1:2*n^2),[n/2^J n/2^J]); w2{J+1}{2}{1}=reshape(W(4*n^2-2*(n/2^(J))^2+1:4*n^2-(n/2^(J))^2),[n/2^J n/2^J]); w2{J+1}{2}{2}=reshape(W(4*n^2-(n/2^(J))^2+1:4*n^2),[n/2^J n/2^J]); PhiX(:,c)=vec(icplxdual2D(w2,J,Fsf,sf)); end otherwise error(['Unknown method ' type]); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/readme ================================================ test ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/AUTHORS ================================================ The primary authors of Rice Wavelet Toolbox are and/or have been: * Richard Baraniuk * Hyeokho Choi * Ramesh Neelamani * Vinay Ribeiro * Rebecca Hindman * Justin Romberg * Haitao Guo * Felix Fernandes * Brent Hendricks * Ramesh Gopinath * Markus Lang * Jan Erik Odegard * Dong Wei * Joshua Jackson ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.6) project (rwt) subdirs(lib/src) subdirs(doc) subdirs(python) #set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/") #FIND_PACKAGE(MatlabMex REQUIRED) # This section based on http://www.cmake.org/pipermail/cmake/2003-June/003953.html IF (UNIX) ADD_CUSTOM_TARGET (distclean @echo cleaning for source distribution) SET(DISTCLEANED CMakeFiles cmake.depends cmake.check_depends CMakeCache.txt cmake.check_cache Makefile *.cmake */CMakeCache.txt */CMakeFiles */Makefile */*.cmake */*/CMakeCache.txt */*/CMakeFiles */*/*.cmake */*/Makefile lib/src/*.a doc/Doxyfile doc/html doc/latex core core.* gmon.out */*.mex* */*.o lib/src/*.o python/rwtPYTHON_wrap.cxx python/rwt.py python/rwt.pyc python/_rwt.so *~ ) ADD_CUSTOM_COMMAND( DEPENDS clean COMMENT "distribution clean" COMMAND rm ARGS -Rf CMakeTmp ${DISTCLEANED} TARGET distclean ) ENDIF(UNIX) ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/HACKING ================================================ PHREAK Look, you wanna be elite? You gotta do a righteous hack. None of this accidental shit. CEREAL Oh yeah, you want a seriously righteous hack, you score one of those Gibsons man. You know, supercomputers they use to like, do physics, and look for oil and stuff? -Hackers (1995) ================================================================================ = INTRODUCTION ================================================================================ There are a number of ways in which wavelet toolbox might be expanded or improved. Arbitrary dimension handling or just 3d, non-orthogonal wavelets, support for other environments, and so on. If you are seeking to implement these or any other changes, this document will be your launching point. All of this applies to the state of the code as of the 3.0 release - if this sentence hasn't been updated but the code has then you may assume that some of the rest of this text may be outdated. ================================================================================ = DEVELOPER DOCUMENTATION ================================================================================ You will likely want to begin by building the documentation files. You'll need CMake, Doxygen, and GraphViz. Hopefully you are on Linux, OSX, or some other Unix flavor. If you are using Windows as your primary development platform, take this time to stop and think about your life and where it's going. Ok, so to build the documentation run: cmake . make doc After this you should see HTML documentation in doc/html and a pdf with the same content at doc/latex/refman.pdf If you haven't used Doxygen before, here's what you need to know: put a ! after the opening /* in a comment to include that comment in the generated docs, look at the top of some existing functions to see how function parameters are shown, and note that you can use latex formulas. ================================================================================ = UNIT TESTS ================================================================================ Before changing anything you should make your way over to the tests directory from the MATLAB prompt and run: runtests Witness how all of these tests pass. Take care that this is still true after any changes you make. Now of course you have an IQ that must be measured using 2-byte integers and have never once introduced a bug into computer software, but these tests are important so that mortals can come along later and know whether a change they made broke some function of the software or not. There are also Python tests at python/test_rwt.py and these are mostly the same as the MATLAB tests. If you are feeling adventurous you might look into unifying these into a single script that generates both sets of tests. You should run both sets of tests before publishing any commit that could concievably affect them. If you don't know every line of code in both platforms intimately then you should take the safe route and run both test scripts, and possibly the Octave tests as well. Unfortunately Octave lacks a lot of things that would be needed to run MATLAB xUnit. ================================================================================ = TOUR OF THE C CODE ================================================================================ As of version 3.0, all MATLAB-specific C code has been isolated to a few places. The files in the mex/ directory are MATLAB MEX wrappers for the transforms and these files are intended to be as short as possible. All the initialization code common to the different transforms is found in lib/src/init.c and some of that code is also shared with Python. The real magic of making things work across different environments is in lib/inc/rwt_platform.h - in particular the mat() macro abstracts away memory addressing so you don't have to worry about row major order and column major order. The rwt_printf, mat_offset, offset_row, and offset_col macros will be very useful if you need to change any of the code that uses the mat() macro. To understand the code for the transforms themselves, start with lib/src/dwt.c which is the best documented of the transforms. The rest of them are written and structured in a very similar fashion. The flow of the code is as follows. One of the transforms is called from MATLAB. This invokes one of the wrappers from the mex directory. The function here calls rwt_matlab_init in lib/src/init.c which calls other init functions. From here the mex wrapper calls the transform in lib/src. For example, the mdwt function for the discrete wavlet transform calls dwt() in the lib/src/dwt.c file. This function has a few helpers in the same file. It allocates memory necessary for the transform in dwt_allocate(), calculates the high and low pass coefficients in dwt_coefficients(), performs the convolution in dwt_convolution, and frees the allocated memory in the dwt_free() function. In the case of Python, a python wrapper function in python/rwt.i calls some of the same initialization code in lib/src/init.c then decides if the input is 1D or 2D and calls a matching C wrapper function, also found in the python/rwt.i file. Finally, this wrapper function calls the transform function found in the lib/src directory. ================================================================================ = PYTHON / NUMPY ================================================================================ The HardTh, SoftTh, daubcqf, denoise, and makesig functions are implemeted twice - once in MATLAB and once in Python. This was simpler than rewriting them in C. If you change any of these you will be glad to see that MATLAB and numpy are extremely similar. Here follows the differences you may need to know. MATLAB indexes start at 1 and numpy starts at 0. For three part indexes the order changes - a[b:c:d] in MATLAB code corresponds to a[b-1:d:c] in Python/numpy. You must use the ddof=1 argument to the std() function in Python. The size() function in MATLAB returns 2 numbers for 1D inputs and the same function in Python returns 1 number. MATLAB assumes additional return values beyond the 1st should be dropped if not assigned to a separate variable - Python does not. A quick look over the SWIG/numpy documentation might lead you to think that you could use OUTPUT_ARRAY or INPLACE_FARRAY or some other macro to change how the python bindings work to be more reasonable. You are probably wrong. Probably. ================================================================================ = THE BUILD SYSTEM ================================================================================ The CMake build system was selected for its license similarity to Wavelet Toolbox itself, though this is not strictly necessary. CMake also allows for sophisticated results with relatively little work. Anything you want to do will likely require some searching and playing. You may be tempted to switch to some other more common build system, but this would probably only make things worse. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/INSTALL ================================================ ================================================================================ = MATLAB Installation Instructions = ================================================================================ 1. Make sure you have the latest source code. See the GitHub page at https://github.com/ricedsp/rwt If you have the command line git tool installed you should be able to run: git clone https://github.com/ricedsp/rwt.git 2. Properly set up your system to create MEX-files. Refer to the MATLAB documentation section "Build MEX-Files" at http://www.mathworks.com/help/matlab/matlab_external/building-mex-files.html 3. Run MATLAB and change to the "bin" subdirectory containing the .m files 4. Compile the toolbox by executing the Matlab command: compile 5. Add the toolbox "bin" subdirectory to your Matlab path. ================================================================================ = Octave Installation Instructions = ================================================================================ Octave installation is similar to the procedure for MATLAB above. On Linux you will need the octave-dev (Debian/Ubuntu) or octave-devel (RedHat, etc.) package installed. ================================================================================ = Python Installation Instructions = ================================================================================ Python installation requires SWIG version 2.0.11 or greater and CMake. Also you should have numpy and scipy installed. To install the python bindings, execute the following commands: cd python cmake . sudo make install On OSX, CMake is available from Macports http://www.macports.org/ For Redhat Enterprise Linux, Scientific Linux, CentOS, etc. there is a package available on RepoForge http://repoforge.org/use/ ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/LICENSE ================================================ Copyright (c) 2000 RICE UNIVERSITY. All rights reserved. This software is distributed and licensed to you on a non-exclusive basis, free-of-charge. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistribution of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistribution in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY WILLIAM MARSH RICE UNIVERSITY, HOUSTON, TEXAS, AND CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RICE UNIVERSITY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTIONS) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE), PRODUCT LIABILITY, OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. For information on commercial licenses, contact Rice University's Office of Technology Transfer at techtran@rice.edu or (713) 348-6173 ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/bin/HardTh.m ================================================ function x = HardTh(y,thld) % x = HardTh(y,thld); % % HARDTH hard thresholds the input signal y with the threshold value % thld. % % Input: % y : 1D or 2D signal to be thresholded % thld : threshold value % % Output: % x : Hard thresholded output (x = (abs(y)>thld).*y) % % HERE'S AN EASY WAY TO RUN THE EXAMPLES: % Cut-and-paste the example you want to run to a new file % called ex.m, for example. Delete out the % at the beginning % of each line in ex.m (Can use search-and-replace in your editor % to replace it with a space). Type 'ex' in matlab and hit return. % % % Example: % y = makesig('WernerSorrows',8); % thld = 1; % x = HardTh(y,thld) % x = 1.5545 5.3175 0 1.6956 -1.2678 0 1.7332 0 % % See also: SoftTh % %Author: Haitao Guo x = (abs(y) > thld).*y; ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/bin/SoftTh.m ================================================ function x = SoftTh(y,thld) % x = SoftTh(y,thld); % % SOFTTH soft thresholds the input signal y with the threshold value % thld. % % Input: % y : 1D or 2D signal to be thresholded % thld : Threshold value % % Output: % x : Soft thresholded output (x = sign(y)(|y|-thld)_+) % % HERE'S AN EASY WAY TO RUN THE EXAMPLES: % Cut-and-paste the example you want to run to a new file % called ex.m, for example. Delete out the % at the beginning % of each line in ex.m (Can use search-and-replace in your editor % to replace it with a space). Type 'ex' in matlab and hit return. % % % Example: % y = makesig('Doppler',8); % thld = 0.2; % x = SoftTh(y,thld) % x = 0 0 0 -0.0703 0 0.2001 0.0483 0 % % See also: HardTh % % Reference: % "De-noising via Soft-Thresholding" Tech. Rept. Statistics, % Stanford, 1992. D.L. Donoho. % %Author: Haitao Guo x = abs(y); x = sign(y).*(x >= thld).*(x - thld); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/bin/compile.m ================================================ % COMPILE compiles the c files and generates mex files. % if exist('OCTAVE_VERSION', 'builtin') mkoctfile --mex -v -DOCTAVE_MEX_FILE ../mex/mdwt.c ../lib/src/dwt.c ../lib/src/init.c ../lib/src/platform.c -I../lib/inc -o omdwt.mex mkoctfile --mex -v -DOCTAVE_MEX_FILE ../mex/midwt.c ../lib/src/idwt.c ../lib/src/init.c ../lib/src/platform.c -I../lib/inc -o omidwt.mex mkoctfile --mex -v -DOCTAVE_MEX_FILE ../mex/mrdwt.c ../lib/src/rdwt.c ../lib/src/init.c ../lib/src/platform.c -I../lib/inc -o omrdwt.mex mkoctfile --mex -v -DOCTAVE_MEX_FILE ../mex/mirdwt.c ../lib/src/irdwt.c ../lib/src/init.c ../lib/src/platform.c -I../lib/inc -o omirdwt.mex else x = computer(); if (x(length(x)-1:length(x)) == '64') mex -v -largeArrayDims ../mex/mdwt.c ../lib/src/dwt.c ../lib/src/init.c ../lib/src/platform.c -I../lib/inc -outdir ../bin mex -v -largeArrayDims ../mex/midwt.c ../lib/src/idwt.c ../lib/src/init.c ../lib/src/platform.c -I../lib/inc -outdir ../bin mex -v -largeArrayDims ../mex/mrdwt.c ../lib/src/rdwt.c ../lib/src/init.c ../lib/src/platform.c -I../lib/inc -outdir ../bin mex -v -largeArrayDims ../mex/mirdwt.c ../lib/src/irdwt.c ../lib/src/init.c ../lib/src/platform.c -I../lib/inc -outdir ../bin else mex -v -compatibleArrayDims ../mex/mdwt.c ../lib/src/dwt.c ../lib/src/init.c ../lib/src/platform.c -I../lib/inc -outdir ../bin mex -v -compatibleArrayDims ../mex/midwt.c ../lib/src/idwt.c ../lib/src/init.c ../lib/src/platform.c -I../lib/inc -outdir ../bin mex -v -compatibleArrayDims ../mex/mrdwt.c ../lib/src/rdwt.c ../lib/src/init.c ../lib/src/platform.c -I../lib/inc -outdir ../bin mex -v -compatibleArrayDims ../mex/mirdwt.c ../lib/src/irdwt.c ../lib/src/init.c ../lib/src/platform.c -I../lib/inc -outdir ../bin end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/bin/daubcqf.m ================================================ function [h_0,h_1] = daubcqf(N,TYPE) % [h_0,h_1] = daubcqf(N,TYPE); % % Function computes the Daubechies' scaling and wavelet filters % (normalized to sqrt(2)). % % Input: % N : Length of filter (must be even) % TYPE : Optional parameter that distinguishes the minimum phase, % maximum phase and mid-phase solutions ('min', 'max', or % 'mid'). If no argument is specified, the minimum phase % solution is used. % % Output: % h_0 : Minimal phase Daubechies' scaling filter % h_1 : Minimal phase Daubechies' wavelet filter % % Example: % N = 4; % TYPE = 'min'; % [h_0,h_1] = daubcqf(N,TYPE) % h_0 = 0.4830 0.8365 0.2241 -0.1294 % h_1 = 0.1294 0.2241 -0.8365 0.4830 % % Reference: "Orthonormal Bases of Compactly Supported Wavelets", % CPAM, Oct.89 % %Author: Ramesh Gopinath if(nargin < 2), TYPE = 'min'; end; if(rem(N,2) ~= 0), error('No Daubechies filter exists for ODD length'); end; K = N/2; a = 1; p = 1; q = 1; h_0 = [1 1]; for j = 1:K-1, a = -a * 0.25 * (j + K - 1)/j; h_0 = [0 h_0] + [h_0 0]; p = [0 -p] + [p 0]; p = [0 -p] + [p 0]; q = [0 q 0] + a*p; end; q = sort(roots(q)); qt = q(1:K-1); if TYPE=='mid', if rem(K,2)==1, qt = q([1:4:N-2 2:4:N-2]); else qt = q([1 4:4:K-1 5:4:K-1 N-3:-4:K N-4:-4:K]); end; end; h_0 = conv(h_0,real(poly(qt))); h_0 = sqrt(2)*h_0/sum(h_0); %Normalize to sqrt(2); if(TYPE=='max'), h_0 = fliplr(h_0); end; if(abs(sum(h_0 .^ 2))-1 > 1e-4) error('Numerically unstable for this value of "N".'); end; h_1 = rot90(h_0,2); h_1(1:2:N)=-h_1(1:2:N); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/bin/denoise.m ================================================ function [xd,xn,option] = denoise(x,h,type,option) % [xd,xn,option] = denoise(x,h,type,option); % % DENOISE is a generic program for wavelet based denoising. % The program will denoise the signal x using the 2-band wavelet % system described by the filter h using either the traditional % discrete wavelet transform (DWT) or the linear shift invariant % discrete wavelet transform (also known as the undecimated DWT % (UDWT)). % % Input: % x : 1D or 2D signal to be denoised % h : Scaling filter to be applied % type : Type of transform (Default: type = 0) % 0 --> Discrete wavelet transform (DWT) % 1 --> Undecimated DWT (UDWT) % option : Default settings is marked with '*': % *type = 0 --> option = [0 3.0 0 0 0 0] % type = 1 --> option = [0 3.6 0 1 0 0] % option(1) : Whether to threshold low-pass part % 0 --> Don't threshold low pass component % 1 --> Threshold low pass component % option(2) : Threshold multiplier, c. The threshold is % computed as: % thld = c*MAD(noise_estimate)). % The default values are: % c = 3.0 for the DWT based denoising % c = 3.6 for the UDWT based denoising % option(3) : Type of variance estimator % 0 --> MAD (mean absolute deviation) % 1 --> STD (classical numerical std estimate) % option(4) : Type of thresholding % 2 --> Soft thresholding % 1 --> Hard thresholding % option(5) : Number of levels, L, in wavelet decomposition. By % setting this to the default value '0' a maximal % decomposition is used. % option(6) : Actual threshold to use (setting this to % anything but 0 will mean that option(3) % is ignored) % % Output: % xd : Estimate of noise free signal % xn : The estimated noise signal (x-xd) % option : A vector of actual parameters used by the % program. The vector is configured the same way as % the input option vector with one added element % option(7) = type. % % HERE'S AN EASY WAY TO RUN THE EXAMPLES: % Cut-and-paste the example you want to run to a new file % called ex.m, for example. Delete out the % at the beginning % of each line in ex.m (Can use search-and-replace in your editor % to replace it with a space). Type 'ex' in matlab and hit return. % % Example 1: % h = daubcqf(6); [s,N] = makesig('Doppler'); n = randn(1,N); % x = s + n/10; % (approximately 10dB SNR) % figure;plot(x);hold on;plot(s,'r'); % % %Denoise x with the default method based on the DWT % [xd,xn,opt1] = denoise(x,h); % figure;plot(xd);hold on;plot(s,'r'); % % %Denoise x using the undecimated (LSI) wavelet transform % [yd,yn,opt2] = denoise(x,h,1); % figure;plot(yd);hold on;plot(s,'r'); % % Example 2: (on an image) % h = daubcqf(6); load lena; % noisyLena = lena + 25 * randn(size(lena)); % figure; colormap(gray); imagesc(lena); title('Original Image'); % figure; colormap(gray); imagesc(noisyLena); title('Noisy Image'); % Denoise lena with the default method based on the DWT % [denoisedLena,xn,opt1] = denoise(noisyLena,h); % figure; colormap(gray); imagesc(denoisedLena); title('denoised Image'); % % % See also: mdwt, midwt, mrdwt, mirdwt, SoftTh, HardTh, setopt % %Author: Jan Erik Odegard if(nargin < 2) error('You need to provide at least 2 inputs: x and h'); end; if(nargin < 3), type = 0; option = []; elseif(nargin < 4) option = []; end; if(isempty(type)), type = 0; end; if(type == 0), default_opt = [0 3.0 0 2 0 0]; elseif(type == 1), default_opt = [0 3.6 0 1 0 0]; else error(['Unknown denoising method',10,... 'If it is any good we need to have a serious talk :-)']); end; option = setopt(option,default_opt); [mx,nx] = size(x); dim = min(mx,nx); if(dim == 1), n = max(mx,nx); else n = dim; end; if(option(5) == 0), L = floor(log2(n)); else L = option(5); end; if(type == 0), % Denoising by DWT xd = mdwt(x,h,L); if (option(6) == 0), tmp = xd(floor(mx/2)+1:mx,floor(nx/2)+1:nx); if(option(3) == 0), thld = option(2)*median(abs(tmp(:)))/.67; elseif(option(3) == 1), thld = option(2)*std(tmp(:)); else error('Unknown threshold estimator, Use either MAD or STD'); end; else thld = option(6); end; if(dim == 1) ix = 1:n/(2^L); ykeep = xd(ix); else ix = 1:mx/(2^L); jx = 1:nx/(2^L); ykeep = xd(ix,jx); end; if(option(4) == 2), xd = SoftTh(xd,thld); elseif(option(4) == 1), xd = HardTh(xd,thld); else error('Unknown threshold rule. Use either Soft (2) or Hard (1)'); end; if (option(1) == 0), if(dim == 1), xd(ix) = ykeep; else xd(ix,jx) = ykeep; end; end; xd = midwt(xd,h,L); elseif(type == 1), % Denoising by UDWT [xl,xh] = mrdwt(x,h,L); if(dim == 1), c_offset = 1; else c_offset = 2*nx + 1; end; if (option(6) == 0), tmp = xh(:,c_offset:c_offset+nx-1); if(option(3) == 0), thld = option(2)*median(abs(tmp(:)))/.67; elseif(option(3) == 1), thld = option(2)*std(tmp(:)); else error('Unknown threshold estimator, Use either MAD or STD'); end; else thld = option(6); end; if(option(4) == 2), xh = SoftTh(xh,thld); if(option(1) == 1), xl = SoftTh(xl,thld); end; elseif(option(4) == 1), xh = HardTh(xh,thld); if(option(1) == 1), xl = HardTh(xl,thld); end; else error('Unknown threshold rule. Use either Soft (2) or Hard (1)'); end; xd = mirdwt(xl,xh,h,L); else % Denoising by unknown method error(['Unknown denoising method',10,... 'If it is any good we need to have a serious talk :-)']); end; option(6) = thld; option(7) = type; xn = x - xd; ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/bin/makesig.m ================================================ function [x,N] = makesig(SigName,N) % [x,N] = makesig(SigName,N) Creates artificial test signal identical to the % standard test signals proposed and used by D. Donoho and I. Johnstone % in WaveLab (- a matlab toolbox developed by Donoho et al. the statistics % department at Stanford University). % % Input: SigName - Name of the desired signal (Default 'all') % 'AllSig' (Returns a matrix with all the signals) % 'HeaviSine' % 'Bumps' % 'Blocks' % 'Doppler' % 'Ramp' % 'Cusp' % 'Sing' % 'HiSine' % 'LoSine' % 'LinChirp' % 'TwoChirp' % 'QuadChirp' % 'MishMash' % 'WernerSorrows' (Heisenberg) % 'Leopold' (Kronecker) % N - Length in samples of the desired signal (Default 512) % % Output: x - vector/matrix of test signals % N - length of signal returned % % See also: % % References: % WaveLab can be accessed at % www_url: http://playfair.stanford.edu/~wavelab/ % Also see various articles by D.L. Donoho et al. at % web_url: http://playfair.stanford.edu/ % %Author: Jan Erik Odegard %This m-file is a copy of the code provided with WaveLab %customized to be consistent with RWT. if(nargin < 1) SigName = 'AllSig'; N = 512; elseif(nargin == 1) N = 512; end; t = (1:N) ./N; x = []; y = []; if(strcmp(SigName,'HeaviSine') | strcmp(SigName,'AllSig')), y = 4.*sin(4*pi.*t); y = y - sign(t - .3) - sign(.72 - t); end; x = [x;y]; y = []; if(strcmp(SigName,'Bumps') | strcmp(SigName,'AllSig')), pos = [ .1 .13 .15 .23 .25 .40 .44 .65 .76 .78 .81]; hgt = [ 4 5 3 4 5 4.2 2.1 4.3 3.1 5.1 4.2]; wth = [.005 .005 .006 .01 .01 .03 .01 .01 .005 .008 .005]; y = zeros(size(t)); for j =1:length(pos) y = y + hgt(j)./( 1 + abs((t - pos(j))./wth(j))).^4; end end; x = [x;y]; y = []; if(strcmp(SigName,'Blocks') | strcmp(SigName,'AllSig')), pos = [ .1 .13 .15 .23 .25 .40 .44 .65 .76 .78 .81]; hgt = [4 (-5) 3 (-4) 5 (-4.2) 2.1 4.3 (-3.1) 2.1 (-4.2)]; y = zeros(size(t)); for j=1:length(pos) y = y + (1 + sign(t-pos(j))).*(hgt(j)/2) ; end end; x = [x;y]; y = []; if(strcmp(SigName,'Doppler') | strcmp(SigName,'AllSig')), y = sqrt(t.*(1-t)).*sin((2*pi*1.05) ./(t+.05)); end; x = [x;y]; y = []; if(strcmp(SigName,'Ramp') | strcmp(SigName,'AllSig')), y = t - (t >= .37); end; x = [x;y]; y = []; if(strcmp(SigName,'Cusp') | strcmp(SigName,'AllSig')), y = sqrt(abs(t - .37)); end; x = [x;y]; y = []; if(strcmp(SigName,'Sing') | strcmp(SigName,'AllSig')), k = floor(N * .37); y = 1 ./abs(t - (k+.5)/N); end; x = [x;y]; y = []; if(strcmp(SigName,'HiSine') | strcmp(SigName,'AllSig')), y = sin( pi * (N * .6902) .* t); end; x = [x;y]; y = []; if(strcmp(SigName,'LoSine') | strcmp(SigName,'AllSig')), y = sin( pi * (N * .3333) .* t); end; x = [x;y]; y = []; if(strcmp(SigName,'LinChirp') | strcmp(SigName,'AllSig')), y = sin(pi .* t .* ((N .* .125) .* t)); end; x = [x;y]; y = []; if(strcmp(SigName,'TwoChirp') | strcmp(SigName,'AllSig')), y = sin(pi .* t .* (N .* t)) + sin((pi/3) .* t .* (N .* t)); end; x = [x;y]; y = []; if(strcmp(SigName,'QuadChirp') | strcmp(SigName,'AllSig')), y = sin( (pi/3) .* t .* (N .* t.^2)); end; x = [x;y]; y = []; if(strcmp(SigName,'MishMash') | strcmp(SigName,'AllSig')), % QuadChirp + LinChirp + HiSine y = sin( (pi/3) .* t .* (N .* t.^2)) ; y = y + sin( pi * (N * .6902) .* t); y = y + sin(pi .* t .* (N .* .125 .* t)); end; x = [x;y]; y = []; if(strcmp(SigName,'WernerSorrows') | strcmp(SigName,'AllSig')), y = sin( pi .* t .* (N/2 .* t.^2)) ; y = y + sin( pi * (N * .6902) .* t); y = y + sin(pi .* t .* (N .* t)); pos = [ .1 .13 .15 .23 .25 .40 .44 .65 .76 .78 .81]; hgt = [ 4 5 3 4 5 4.2 2.1 4.3 3.1 5.1 4.2]; wth = [.005 .005 .006 .01 .01 .03 .01 .01 .005 .008 .005]; for j =1:length(pos) y = y + hgt(j)./( 1 + abs((t - pos(j))./wth(j))).^4; end end; x = [x;y]; y = []; if(strcmp(SigName,'Leopold') | strcmp(SigName,'AllSig')), y = (t == floor(.37 * N)/N); % Kronecker end; x = [x;y]; y = []; % disp(sprintf('MakeSignal: I don*t recognize << %s>>',SigName)) % disp('Allowable SigNames are:') % disp('AllSig'), % disp('HeaviSine'), % disp('Bumps'), % disp('Blocks'), % disp('Doppler'), % disp('Ramp'), % disp('Cusp'), % disp('Crease'), % disp('Sing'), % disp('HiSine'), % disp('LoSine'), % disp('LinChirp'), % disp('TwoChirp'), % disp('QuadChirp'), % disp('MishMash'), % disp('WernerSorrows'), % disp('Leopold'), %end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/bin/mdwt.m ================================================ function [y,L] = mdwt(x,h,L) % [y,L] = mdwt(x,h,L); % % Function computes the discrete wavelet transform y for a 1D or 2D input % signal x using the scaling filter h. % % Input: % x : finite length 1D or 2D signal (implicitly periodized) % h : scaling filter % L : number of levels. In the case of a 1D signal, length(x) must be % divisible by 2^L; in the case of a 2D signal, the row and the % column dimension must be divisible by 2^L. If no argument is % specified, a full DWT is returned for maximal possible L. % % Output: % y : the wavelet transform of the signal % (see example to understand the coefficients) % L : number of decomposition levels % % 1D Example: % x = makesig('LinChirp',8); % h = daubcqf(4,'min'); % L = 2; % [y,L] = mdwt(x,h,L) % % 1D Example's output and explanation: % % y = [1.1097 0.8767 0.8204 -0.5201 -0.0339 0.1001 0.2201 -0.1401] % L = 2 % % The coefficients in output y are arranged as follows % % y(1) and y(2) : Scaling coefficients (lowest frequency) % y(3) and y(4) : Band pass wavelet coefficients % y(5) to y(8) : Finest scale wavelet coefficients (highest frequency) % % 2D Example: % % load test_image % h = daubcqf(4,'min'); % L = 1; % [y,L] = mdwt(test_image,h,L); % % 2D Example's output and explanation: % % The coefficients in y are arranged as follows. % % .------------------. % | | | % | 4 | 2 | % | | | % | L,L | H,L | % | | | % -------------------- % | | | % | 3 | 1 | % | | | % | L,H | H,H | % | | | % `------------------' % % where % 1 : High pass vertically and high pass horizontally % 2 : Low pass vertically and high pass horizontally % 3 : High pass vertically and low pass horizontally % 4 : Low pass vertically and Low pass horizontally % (scaling coefficients) % % % % % See also: midwt, mrdwt, mirdwt % %Author: Markus Lang if exist('OCTAVE_VERSION', 'builtin') x = x * 1.0; if (exist('L')) [y,L] = omdwt(x,h,L); else [y,L] = omdwt(x,h); end else error('You must compile wavelet toolbox before use') end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/bin/midwt.m ================================================ function [y,L] = midwt(x,h,L) % [x,L] = midwt(y,h,L); % % Function computes the inverse discrete wavelet transform x for a 1D or % 2D input signal y using the scaling filter h. % % Input: % y : finite length 1D or 2D input signal (implicitly periodized) % (see function mdwt to find the structure of y) % h : scaling filter % L : number of levels. In the case of a 1D signal, length(x) must be % divisible by 2^L; in the case of a 2D signal, the row and the % column dimension must be divisible by 2^L. If no argument is % specified, a full inverse DWT is returned for maximal possible % L. % % Output: % x : periodic reconstructed signal % L : number of decomposition levels % % 1D Example: % xin = makesig('LinChirp',8); % h = daubcqf(4,'min'); % L = 1; % [y,L] = mdwt(xin,h,L); % [x,L] = midwt(y,h,L) % % 1D Example's output: % % x = 0.0491 0.1951 0.4276 0.7071 0.9415 0.9808 0.6716 0.0000 % L = 1 % % See also: mdwt, mrdwt, mirdwt % %Author: Markus Lang if exist('OCTAVE_VERSION', 'builtin') if (exist('L')) [y,L] = omidwt(x,h,L); else [y,L] = omidwt(x,h); end else error('You must compile wavelet toolbox before use') end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/bin/mirdwt.m ================================================ function [x,L] = mirdwt(yl,yh,h,L) % function [x,L] = mirdwt(yl,yh,h,L); % % Function computes the inverse redundant discrete wavelet % transform x for a 1D or 2D input signal. (Redundant means here % that the sub-sampling after each stage of the forward transform % has been omitted.) yl contains the lowpass and yl the highpass % components as computed, e.g., by mrdwt. In the case of a 2D % signal, the ordering in % yh is [lh hl hh lh hl ... ] (first letter refers to row, second % to column filtering). % % Input: % yl : lowpass component % yh : highpass components % h : scaling filter % L : number of levels. In the case of a 1D signal, % length(yl) must be divisible by 2^L; % in the case of a 2D signal, the row and % the column dimension must be divisible by 2^L. % % Output: % x : finite length 1D or 2D signal % L : number of levels % % HERE'S AN EASY WAY TO RUN THE EXAMPLES: % Cut-and-paste the example you want to run to a new file % called ex.m, for example. Delete out the % at the beginning % of each line in ex.m (Can use search-and-replace in your editor % to replace it with a space). Type 'ex' in matlab and hit return. % % % Example 1: % xin = makesig('Leopold',8); % h = daubcqf(4,'min'); % L = 1; % [yl,yh,L] = mrdwt(xin,h,L); % [x,L] = mirdwt(yl,yh,h,L) % x = 0.0000 1.0000 0.0000 -0.0000 0 0 0 -0.0000 % L = 1 % % Example 2: % load lena; % h = daubcqf(4,'min'); % L = 2; % [ll_lev2,yh,L] = mrdwt(lena,h,L); % lena is a 256x256 matrix % N = 256; % lh_lev1 = yh(:,1:N); % hl_lev1 = yh(:,N+1:2*N); % hh_lev1 = yh(:,2*N+1:3*N); % lh_lev2 = yh(:,3*N+1:4*N); % hl_lev2 = yh(:,4*N+1:5*N); % hh_lev2 = yh(:,5*N+1:6*N); % figure; colormap(gray); imagesc(lena); title('Original Image'); % figure; colormap(gray); imagesc(ll_lev2); title('LL Level 2'); % figure; colormap(gray); imagesc(hh_lev2); title('HH Level 2'); % figure; colormap(gray); imagesc(hl_lev2); title('HL Level 2'); % figure; colormap(gray); imagesc(lh_lev2); title('LH Level 2'); % figure; colormap(gray); imagesc(hh_lev1); title('HH Level 1'); % figure; colormap(gray); imagesc(hl_lev2); title('HL Level 1'); % figure; colormap(gray); imagesc(lh_lev2); title('LH Level 1'); % [lena_Hat,L] = mirdwt(ll_lev2,yh,h,L); % figure; colormap(gray); imagesc(lena_Hat); % title('Reconstructed Image'); % % See also: mdwt, midwt, mrdwt % % Warning! min(size(yl))/2^L should be greater than length(h) % %Author: Markus Lang if exist('OCTAVE_VERSION', 'builtin') yl = yl * 1.0; yh = yh * 1.0; if (exist('L')) [x,L] = omirdwt(yl,yh,h,L); else [x,L] = omirdwt(yl,yh,h); end else error('You must compile wavelet toolbox before use') end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/bin/mrdwt.m ================================================ function [yl,yh,L] = mrdwt(x,h,L) % [yl,yh,L] = mrdwt(x,h,L); % % Function computes the redundant discrete wavelet transform y % for a 1D or 2D input signal. (Redundant means here that the % sub-sampling after each stage is omitted.) yl contains the % lowpass and yh the highpass components. In the case of a 2D % signal, the ordering in yh is % [lh hl hh lh hl ... ] (first letter refers to row, second to % column filtering). % % Input: % x : finite length 1D or 2D signal (implicitly periodized) % h : scaling filter % L : number of levels. In the case of a 1D % length(x) must be divisible by 2^L; % in the case of a 2D signal, the row and the % column dimension must be divisible by 2^L. % If no argument is % specified, a full DWT is returned for maximal possible L. % % Output: % yl : lowpass component % yh : highpass components % L : number of levels % % HERE'S AN EASY WAY TO RUN THE EXAMPLES: % Cut-and-paste the example you want to run to a new file % called ex.m, for example. Delete out the % at the beginning % of each line in ex.m (Can use search-and-replace in your editor % to replace it with a space). Type 'ex' in matlab and hit return. % % % Example 1:: % x = makesig('Leopold',8); % h = daubcqf(4,'min'); % L = 1; % [yl,yh,L] = mrdwt(x,h,L) % yl = 0.8365 0.4830 0 0 0 0 -0.1294 0.2241 % yh = -0.2241 -0.1294 0 0 0 0 -0.4830 0.8365 % L = 1 % Example 2: % load lena; % h = daubcqf(4,'min'); % L = 2; % [ll_lev2,yh,L] = mrdwt(lena,h,L); % lena is a 256x256 matrix % N = 256; % lh_lev1 = yh(:,1:N); % hl_lev1 = yh(:,N+1:2*N); % hh_lev1 = yh(:,2*N+1:3*N); % lh_lev2 = yh(:,3*N+1:4*N); % hl_lev2 = yh(:,4*N+1:5*N); % hh_lev2 = yh(:,5*N+1:6*N); % figure; colormap(gray); imagesc(lena); title('Original Image'); % figure; colormap(gray); imagesc(ll_lev2); title('LL Level 2'); % figure; colormap(gray); imagesc(hh_lev2); title('HH Level 2'); % figure; colormap(gray); imagesc(hl_lev2); title('HL Level 2'); % figure; colormap(gray); imagesc(lh_lev2); title('LH Level 2'); % figure; colormap(gray); imagesc(hh_lev1); title('HH Level 1'); % figure; colormap(gray); imagesc(hl_lev2); title('HL Level 1'); % figure; colormap(gray); imagesc(lh_lev2); title('LH Level 1'); % % See also: mdwt, midwt, mirdwt % % Warning! min(size(x))/2^L should be greater than length(h) % %Author: Markus Lang if exist('OCTAVE_VERSION', 'builtin') x = x * 1.0; if (exist('L')) [yl,yh,L] = omrdwt(x,h,L); else [yl,yh,L] = omrdwt(x,h); end else error('You must compile wavelet toolbox before use') end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/bin/setopt.m ================================================ function option = setopt(opt_par,default); % option = setopt(opt_par,default); % % SETOPT can modify a default option vector with user specified options. % % Input: % opt_par : Users desired option vector % default : Program default option vector % % Output: % option : New option vector % % Example: % opt_par = [1 2 3 4]; % default = [1 1 1 1]; % option = setopt(opt_par,default) % option = 1 2 3 4 % %Author: Jan Erik Odegard if (nargin < 2) error('You need two inputs'); end; len = length(opt_par); option = zeros(size(default)); option(1:len) = opt_par(1:len); option = option + (option == 0).*default; ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/dist/2.01/INSTALL ================================================ ################################################################################# #File Name: INSTALL #Last Modification Date: 11/16/95 10:30:38 #Current Version: INSTALL 1.13 #File Creation Date: Wed Aug 25 09:25:06 1993 #Author: Ramesh Gopinath # #Copyright: All software, documentation, and related files in this distribution # are Copyright (c) 1993-1995 Rice University # #Permission is granted for use and non-profit distribution providing that this #notice be clearly maintained. The right to distribute any portion for profit #or as part of any commercial product is specifically reserved for the author. # #Change History: # ################################################################################# In order to install this distribution of wlet-tools: 1. Uncompress and extract the tar archive in the desired directory. uncompress RWT.tar.Z tar xvf RWT.tar NOTE: New subdirectories (rice-wlet-tools and rice-atr-tools) will be generated in directory where you extract the archive. 2. cd rice-wlet-tools 3. make all 4. make install 5. Append the paths to the mex, mfile and wdemo directories. That is, in .cshrc add the following lines at the end: setenv RWT_HOME YOUR/LOCAL/PATH/TO setenv RWT_PATH $RWT_HOME/rice-wlet-tools/mex:$RWT_HOME/rice-wlet-tools/mfiles:\ $RWT_HOME/rice-wlet-tools/wdemos:$RWT_HOME/rice-atr-tools/mex:\ $RWT_HOME/rice-atr-tools/mfiles:$RWT_HOME/rice-atr-tools/sardemo:\ $RWT_HOME setenv MATLABPATH $RWT_PATH':'$MATLABPATH where YOUR/LOCAL/PATH/TO is replaced with the actual path to the directory where rice-wlet-tools and rice-atr-tools are located on your system NOTE: If you do not have the environment variable MATLABPATH previously defined change the line setenv MATLABPATH $RWT_PATH':'$MATLABPATH to setenv MATLABPATH $RWT_PATH ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/dist/2.01/README ================================================ ################################################################################# #File Name: README #Last Modification Date: 9/1/94 10:11:28 #Current Version: README 1.5 #File Creation Date: Wed Aug 25 09:25:06 1993 #Author: Ramesh Gopinath # #Copyright: All software, documentation, and related files in this distribution # are Copyright (c) 1993 Rice University # #Permission is granted for use and non-profit distribution providing that this #notice be clearly maintained. The right to distribute any portion for profit #or as part of any commercial product is specifically reserved for the author. # #Change History: # ################################################################################# This "rice-wlet-tools", version 2.01 Released - INSTALLATION: To install this distribution of wlet-tools see INSTALL. SOURCE: ftp: cml.rice.edu (128.42.62.23) /pub/software mosaic: URL http://jazz.rice.edu Associated references can be obtained from directory /pub/dsp/papers and /pub/reports EMAIL: For bug reports and questions send email to webmaster-dsp@ece.rice.edu CONDITIONS FOR USE: This software is Copyright (C) Rice University 1993. You have the right to use, free of charge, with the following terms and conditions: (1) You can redistribute this software in source form. If you redistribute this software in compiled form you will include the source code. (2) You can distribute your own applications that link this software if you include the source code for this software. (3) You own full rights to any output files you generate with this software. (4) You can make modifications to this software and use it for in-house use only. Under no circumstances can modified software be redistributed. (5) If you make any modifications to this software you will send the changes by email to webmaster-dsp@ece.rice.edu (6) The DSP group at Rice University shall be credited should this software be used in in any form or written about in any publication. (7) This software is provided "as is", without warranty by Rice University. In no event shall Rice University be liable for any loss or for any indirect, special, punitive, exemplary, incidental, or consequential damages arising from the use, possession or performance of this software. --------- ALTERNATIVE WAY OF GETTING TO SOFTWARE AND REPORTS (THIS MIGHT BE DISCONTINUED SINCE IT IS NOT ROBUST): It can also be obtained (usually) with the following command on unix systems: %telnet dsp.rice.edu 5555 |sed '1,3d' | csh -fbs software OR %telnet 128.42.4.62 5555 |sed '1,3d' | csh -fbs software You probably want to add alias riceget "telnet 128.42.4.62 5555 |sed '1,3d' | csh -fsb" so that you can access the distribution (which will hopefully be updated periodically) %riceget OPTIONS where options is a list of options. %riceget help would return all options currently available. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/dist/2.01/doc/index.html ================================================ Rice Wavelet Toolbox Documentation

Rice Wavelet Toolbox Documentation Version 2.01


A Y in the M-file and/or the MEX-file column indicates whether the given function is implemented as a matlab M-file, a MEX-file or both. An X in the Depend column will indicate that although the function itself is not a MEX-file it depends on subroutines written in C and compiled as MEX-files for significant speedup on 2D problems in particular.
Function Description M-file MEX-file Depend
denoise Nonlinear wavelet denoising Y X
dwt Computes the 1D and 2D discrete wavelet transform Y Y
hoeleder Estimate of the Hoelder exponent for a given scaling function Y
makesig Generates the 'Donoho' test signals Y


The lastest version of the Rice Wavelet Toolbox is available in Version 2.3
================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/dist/2.3/INSTALL ================================================ Installation instructions ------------------------- In order to install this distribution of Rice Wavelet Tools version 2.3 released - 1. Properly set up your system to create MEX-files. Please refer to the "Matlab Application Program Guide" to properly set up of your matlab and C-compiler to be able to compile C-mex files on your system. All reference documentations are available on the MathWorks web page: www.mathworks.com 2. Make a toolbox directory and uncompress/extract all the files. For example, in the unix environment, gunzip rwt.tar.gz tar xvf rwt.tar 3. Run MATLAB and change to the temporary directory containing the files. 4. Compile the toolbox by executing the Matlab command compile 5. Add the toolbox directory to your Matlab path. 6. For further instructions, please refer to the README file. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/dist/2.3/INSTALL_PRECOMPILED ================================================ Installation instructions ------------------------- In order to install this distribution of Rice Wavelet Tools version 2.3 released - 1. Make a toolbox directory and uncompress/extract all the files. For example, in the unix environment, gunzip rwt.tar.gz tar xvf rwt.tar 2. Add the toolbox directory to your Matlab path. 3. For further instructions, please refer to the README file. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/dist/2.3/LICENSE ================================================ This "rice-wlet-tools", version 2.3 Released - CONDITIONS FOR USE: Copyright (c) 2000 RICE UNIVERSITY. All rights reserved. This software is distributed and licensed to you on a non-exclusive basis, free-of-charge. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistribution of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistribution in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. All advertising materials mentioning features or use of this software must display the following acknowledgment: This product includes software developed by Rice University, Houston, Texas and its contributors. 4. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY WILLIAM MARSH RICE UNIVERSITY, HOUSTON, TEXAS, AND CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RICE UNIVERSITY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTIONS) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE), PRODUCT LIABILITY, OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. For information on commercial licenses, contact Rice University's Office of Technology Transfer at techtran@rice.edu or (713) 348-6173 ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/dist/2.3/README ================================================ Rice Wavelet Tools version 2.3 Released - NEWER VERSION: A newer version of this toolbox is available at http://www.dsp.rice.edu/software/rwt.shtml INSTALLATION: To install this distribution of Rice Wavelet Tools see the INSTALL file. SOURCE: www.dsp.rice.edu/software/rwt.shtml EMAIL: For bug reports and questions, send email to webmaster-dsp@ece.rice.edu CONDITIONS FOR USE: See the LICENSE file TOOLBOX FUNCTIONS: Wavelet Transforms mdwt - Discrete orthogonal wavelet transform using the Mallat algorithm (1D and 2D) midwt - Inverse discrete orthogonal wavelet transform mrdwt - Undecimated (redundant) discrete wavelet transform (1D and 2D) mirdwt - Inverse undecimated discrete wavelet transform daubcqf - Daubechies filter coefficients Wavelet Domain Processing denoise - Denoise signals and images by thresholding wavelet coefficients HardTh - Hard thresholding SoftTh - Soft thresholding Other makesig - Create Donoho-Johnstone test signals compile - Compile the Rice Wavelet Toolbox Functions omitted in this version of toolbox can be found in version 2.01 at www.dsp.rice.edu/software/RWT2.01/RWT-2.01.tar.Z This version may not compile with Matlab 6.0 (Release 12) and above. This problem has been fixed in version 2.4 at www.dsp.rice.edu/software/rwt.shtml ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/doc/CMakeLists.txt ================================================ # add a target to generate API documentation with Doxygen find_package(Doxygen) if(DOXYGEN_FOUND) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY) add_custom_target(doc ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} COMMENT "Generating API documentation with Doxygen" VERBATIM ) find_package(LATEX) find_program(DOXYFILE_MAKE make) mark_as_advanced(DOXYFILE_MAKE) add_custom_command(TARGET doc POST_BUILD COMMAND "${DOXYFILE_MAKE}" COMMENT "Running LaTeX for Doxygen documentation in ${CMAKE_CURRENT_SOURCE_DIR}/latex..." WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/latex") endif(DOXYGEN_FOUND) ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/doc/Doxyfile.in ================================================ # Doxyfile 1.8.3.1 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. # # All text after a hash (#) is considered a comment and will be ignored. # The format is: # TAG = value [value, ...] # For lists items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (" "). #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the config file # that follow. The default is UTF-8 which is also the encoding used for all # text before the first occurrence of this tag. Doxygen uses libiconv (or the # iconv built into libc) for the transcoding. See # http://www.gnu.org/software/libiconv for the list of possible encodings. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or sequence of words) that should # identify the project. Note that if you do not use Doxywizard you need # to put quotes around the project name if it contains spaces. PROJECT_NAME = "Rice Wavelet Toolbox" # The PROJECT_NUMBER tag can be used to enter a project or revision number. # This could be handy for archiving the generated documentation or # if some version control system is used. PROJECT_NUMBER = # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer # a quick idea about the purpose of the project. Keep the description short. PROJECT_BRIEF = # With the PROJECT_LOGO tag one can specify an logo or icon that is # included in the documentation. The maximum height of the logo should not # exceed 55 pixels and the maximum width should not exceed 200 pixels. # Doxygen will copy the logo to the output directory. PROJECT_LOGO = # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. # If a relative path is entered, it will be relative to the location # where doxygen was started. If left blank the current directory will be used. OUTPUT_DIRECTORY = # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create # 4096 sub-directories (in 2 levels) under the output directory of each output # format and will distribute the generated files over these directories. # Enabling this option can be useful when feeding doxygen a huge amount of # source files, where putting all generated files in the same directory would # otherwise cause performance problems for the file system. CREATE_SUBDIRS = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # The default language is English, other supported languages are: # Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, # Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, # Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English # messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, # Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, # Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. OUTPUT_LANGUAGE = English # If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will # include brief member descriptions after the members that are listed in # the file and class documentation (similar to JavaDoc). # Set to NO to disable this. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend # the brief description of a member or function before the detailed description. # Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator # that is used to form the text in various listings. Each string # in this list, if found as the leading text of the brief description, will be # stripped from the text and the result after processing the whole list, is # used as the annotated text. Otherwise, the brief description is used as-is. # If left blank, the following values are used ("$name" is automatically # replaced with the name of the entity): "The $name class" "The $name widget" # "The $name file" "is" "provides" "specifies" "contains" # "represents" "a" "an" "the" ABBREVIATE_BRIEF = # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # Doxygen will generate a detailed section even if there is only a brief # description. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full # path before files name in the file list and in the header files. If set # to NO the shortest path that makes the file name unique will be used. FULL_PATH_NAMES = YES # If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag # can be used to strip a user-defined part of the path. Stripping is # only done if one of the specified strings matches the left-hand part of # the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the # path to strip. Note that you specify absolute paths here, but also # relative paths, which will be relative from the directory where doxygen is # started. STRIP_FROM_PATH = @CMAKE_CURRENT_SOURCE_DIR@/.. # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of # the path mentioned in the documentation of a class, which tells # the reader which header file to include in order to use a class. # If left blank only the name of the header file containing the class # definition is used. Otherwise one should specify the include paths that # are normally passed to the compiler using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter # (but less readable) file names. This can be useful if your file system # doesn't support long names like on DOS, Mac, or CD-ROM. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen # will interpret the first line (until the first dot) of a JavaDoc-style # comment as the brief description. If set to NO, the JavaDoc # comments will behave just like regular Qt-style comments # (thus requiring an explicit @brief command for a brief description.) JAVADOC_AUTOBRIEF = NO # If the QT_AUTOBRIEF tag is set to YES then Doxygen will # interpret the first line (until the first dot) of a Qt-style # comment as the brief description. If set to NO, the comments # will behave just like regular Qt-style comments (thus requiring # an explicit \brief command for a brief description.) QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen # treat a multi-line C++ special comment block (i.e. a block of //! or /// # comments) as a brief description. This used to be the default behaviour. # The new default is to treat a multi-line C++ comment block as a detailed # description. Set this tag to YES if you prefer the old behaviour instead. MULTILINE_CPP_IS_BRIEF = NO # If the INHERIT_DOCS tag is set to YES (the default) then an undocumented # member inherits the documentation from any documented member that it # re-implements. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce # a new page for each member. If set to NO, the documentation of a member will # be part of the file/class/namespace that contains it. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. # Doxygen uses this value to replace tabs by spaces in code fragments. TAB_SIZE = 4 # This tag can be used to specify a number of aliases that acts # as commands in the documentation. An alias has the form "name=value". # For example adding "sideeffect=\par Side Effects:\n" will allow you to # put the command \sideeffect (or @sideeffect) in the documentation, which # will result in a user-defined paragraph with heading "Side Effects:". # You can put \n's in the value part of an alias to insert newlines. ALIASES = # This tag can be used to specify a number of word-keyword mappings (TCL only). # A mapping has the form "name=value". For example adding # "class=itcl::class" will allow you to use the command class in the # itcl::class meaning. TCL_SUBST = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C # sources only. Doxygen will then generate output that is more tailored for C. # For instance, some of the names that are used will be different. The list # of all members will be omitted, etc. OPTIMIZE_OUTPUT_FOR_C = YES # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java # sources only. Doxygen will then generate output that is more tailored for # Java. For instance, namespaces will be presented as packages, qualified # scopes will look different, etc. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources only. Doxygen will then generate output that is more tailored for # Fortran. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for # VHDL. OPTIMIZE_OUTPUT_VHDL = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, # and language is one of the parsers supported by doxygen: IDL, Java, # Javascript, CSharp, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, # C++. For instance to make doxygen treat .inc files as Fortran files (default # is PHP), and .f files as C (default is Fortran), use: inc=Fortran f=C. Note # that for custom extensions you also need to set FILE_PATTERNS otherwise the # files are not read by doxygen. EXTENSION_MAPPING = # If MARKDOWN_SUPPORT is enabled (the default) then doxygen pre-processes all # comments according to the Markdown format, which allows for more readable # documentation. See http://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you # can mix doxygen, HTML, and XML commands with Markdown formatting. # Disable only in case of backward compatibilities issues. MARKDOWN_SUPPORT = YES # When enabled doxygen tries to link words that correspond to documented classes, # or namespaces to their corresponding documentation. Such a link can be # prevented in individual cases by by putting a % sign in front of the word or # globally by setting AUTOLINK_SUPPORT to NO. AUTOLINK_SUPPORT = YES # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should # set this tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); v.s. # func(std::string) {}). This also makes the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. BUILTIN_STL_SUPPORT = NO # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. # Doxygen will parse them like normal C++ but will assume all classes use public # instead of private inheritance when no explicit protection keyword is present. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate # getter and setter methods for a property. Setting this option to YES (the # default) will make doxygen replace the get and set methods by a property in # the documentation. This will only work if the methods are indeed getting or # setting a simple type. If this is not the case, or you want to show the # methods anyway, you should set this option to NO. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES, then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. DISTRIBUTE_GROUP_DOC = NO # Set the SUBGROUPING tag to YES (the default) to allow class member groups of # the same type (for instance a group of public functions) to be put as a # subgroup of that type (e.g. under the Public Functions section). Set it to # NO to prevent subgrouping. Alternatively, this can be done per class using # the \nosubgrouping command. SUBGROUPING = YES # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and # unions are shown inside the group in which they are included (e.g. using # @ingroup) instead of on a separate page (for HTML and Man pages) or # section (for LaTeX and RTF). INLINE_GROUPED_CLASSES = NO # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and # unions with only public data fields will be shown inline in the documentation # of the scope in which they are defined (i.e. file, namespace, or group # documentation), provided this scope is documented. If set to NO (the default), # structs, classes, and unions are shown on a separate page (for HTML and Man # pages) or section (for LaTeX and RTF). INLINE_SIMPLE_STRUCTS = NO # When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum # is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically # be useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. TYPEDEF_HIDES_STRUCT = NO # The SYMBOL_CACHE_SIZE determines the size of the internal cache use to # determine which symbols to keep in memory and which to flush to disk. # When the cache is full, less often used symbols will be written to disk. # For small to medium size projects (<1000 input files) the default value is # probably good enough. For larger projects a too small cache size can cause # doxygen to be busy swapping symbols to and from disk most of the time # causing a significant performance penalty. # If the system has enough physical memory increasing the cache will improve the # performance by keeping more symbols in memory. Note that the value works on # a logarithmic scale so increasing the size by one will roughly double the # memory usage. The cache size is given by this formula: # 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, # corresponding to a cache size of 2^16 = 65536 symbols. SYMBOL_CACHE_SIZE = 0 # Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be # set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given # their name and scope. Since this can be an expensive process and often the # same symbol appear multiple times in the code, doxygen keeps a cache of # pre-resolved symbols. If the cache is too small doxygen will become slower. # If the cache is too large, memory is wasted. The cache size is given by this # formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range is 0..9, the default is 0, # corresponding to a cache size of 2^16 = 65536 symbols. LOOKUP_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in # documentation are documented, even if no documentation was available. # Private class members and static file members will be hidden unless # the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES EXTRACT_ALL = YES # If the EXTRACT_PRIVATE tag is set to YES all private members of a class # will be included in the documentation. EXTRACT_PRIVATE = NO # If the EXTRACT_PACKAGE tag is set to YES all members with package or internal # scope will be included in the documentation. EXTRACT_PACKAGE = NO # If the EXTRACT_STATIC tag is set to YES all static members of a file # will be included in the documentation. EXTRACT_STATIC = NO # If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) # defined locally in source files will be included in the documentation. # If set to NO only classes defined in header files are included. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. When set to YES local # methods, which are defined in the implementation section but not in # the interface are included in the documentation. # If set to NO (the default) only methods in the interface are included. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base # name of the file that contains the anonymous namespace. By default # anonymous namespaces are hidden. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all # undocumented members of documented classes, files or namespaces. # If set to NO (the default) these members will be included in the # various overviews, but no documentation section is generated. # This option has no effect if EXTRACT_ALL is enabled. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. # If set to NO (the default) these classes will be included in the various # overviews. This option has no effect if EXTRACT_ALL is enabled. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all # friend (class|struct|union) declarations. # If set to NO (the default) these declarations will be included in the # documentation. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any # documentation blocks found inside the body of a function. # If set to NO (the default) these blocks will be appended to the # function's detailed documentation block. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation # that is typed after a \internal command is included. If the tag is set # to NO (the default) then the documentation will be excluded. # Set it to YES to include the internal documentation. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate # file names in lower-case letters. If set to YES upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # and Mac users are advised to set this option to NO. CASE_SENSE_NAMES = NO # If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen # will show members with their full class and namespace scopes in the # documentation. If set to YES the scope will be hidden. HIDE_SCOPE_NAMES = NO # If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen # will put a list of the files that are included by a file in the documentation # of that file. SHOW_INCLUDE_FILES = YES # If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen # will list include files with double quotes in the documentation # rather than with sharp brackets. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES (the default) then a tag [inline] # is inserted in the documentation for inline members. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen # will sort the (detailed) documentation of file and class members # alphabetically by member name. If set to NO the members will appear in # declaration order. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the # brief documentation of file, namespace and class members alphabetically # by member name. If set to NO (the default) the members will appear in # declaration order. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen # will sort the (brief and detailed) documentation of class members so that # constructors and destructors are listed first. If set to NO (the default) # the constructors will appear in the respective orders defined by # SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. # This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO # and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the # hierarchy of group names into alphabetical order. If set to NO (the default) # the group names will appear in their defined order. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be # sorted by fully-qualified names, including namespaces. If set to # NO (the default), the class list will be sorted only by class name, # not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the # alphabetical list. SORT_BY_SCOPE_NAME = NO # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to # do proper type resolution of all parameters of a function it will reject a # match between the prototype and the implementation of a member function even # if there is only one candidate or it is obvious which candidate to choose # by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen # will still accept a match between prototype and implementation in such cases. STRICT_PROTO_MATCHING = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or # disable (NO) the todo list. This list is created by putting \todo # commands in the documentation. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or # disable (NO) the test list. This list is created by putting \test # commands in the documentation. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable (YES) or # disable (NO) the bug list. This list is created by putting \bug # commands in the documentation. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or # disable (NO) the deprecated list. This list is created by putting # \deprecated commands in the documentation. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional # documentation sections, marked by \if section-label ... \endif # and \cond section-label ... \endcond blocks. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines # the initial value of a variable or macro consists of for it to appear in # the documentation. If the initializer consists of more lines than specified # here it will be hidden. Use a value of 0 to hide initializers completely. # The appearance of the initializer of individual variables and macros in the # documentation can be controlled using \showinitializer or \hideinitializer # command in the documentation regardless of this setting. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated # at the bottom of the documentation of classes and structs. If set to YES the # list will mention the files that were used to generate the documentation. SHOW_USED_FILES = YES # Set the SHOW_FILES tag to NO to disable the generation of the Files page. # This will remove the Files entry from the Quick Index and from the # Folder Tree View (if specified). The default is YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the # Namespaces page. # This will remove the Namespaces entry from the Quick Index # and from the Folder Tree View (if specified). The default is YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command , where is the value of # the FILE_VERSION_FILTER tag, and is the name of an input file # provided by doxygen. Whatever the program writes to standard output # is used as the file version. See the manual for examples. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. # You can optionally specify a file name after the option, if omitted # DoxygenLayout.xml will be used as the name of the layout file. LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files # containing the references data. This must be a list of .bib files. The # .bib extension is automatically appended if omitted. Using this command # requires the bibtex tool to be installed. See also # http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style # of the bibliography can be controlled using LATEX_BIB_STYLE. To use this # feature you need bibtex and perl available in the search path. Do not use # file names with spaces, bibtex cannot handle them. CITE_BIB_FILES = #--------------------------------------------------------------------------- # configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated # by doxygen. Possible values are YES and NO. If left blank NO is used. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated by doxygen. Possible values are YES and NO. If left blank # NO is used. WARNINGS = YES # If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings # for undocumented members. If EXTRACT_ALL is set to YES then this flag will # automatically be disabled. WARN_IF_UNDOCUMENTED = YES # If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some # parameters in a documented function, or documenting parameters that # don't exist or using markup commands wrongly. WARN_IF_DOC_ERROR = YES # The WARN_NO_PARAMDOC option can be enabled to get warnings for # functions that are documented, but have no documentation for their parameters # or return value. If set to NO (the default) doxygen will only warn about # wrong or incomplete parameter documentation, but not about the absence of # documentation. WARN_NO_PARAMDOC = NO # The WARN_FORMAT tag determines the format of the warning messages that # doxygen can produce. The string should contain the $file, $line, and $text # tags, which will be replaced by the file and line number from which the # warning originated and the warning text. Optionally the format may contain # $version, which will be replaced by the version of the file (if it could # be obtained via FILE_VERSION_FILTER) WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning # and error messages should be written. If left blank the output is written # to stderr. WARN_LOGFILE = #--------------------------------------------------------------------------- # configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag can be used to specify the files and/or directories that contain # documented source files. You may enter file names like "myfile.cpp" or # directories like "/usr/src/myproject". Separate the files or directories # with spaces. INPUT = @CMAKE_CURRENT_SOURCE_DIR@/../src @CMAKE_CURRENT_SOURCE_DIR@/../lib/src @CMAKE_CURRENT_SOURCE_DIR@/../lib/inc @CMAKE_CURRENT_SOURCE_DIR@/../mex # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is # also the default input encoding. Doxygen uses libiconv (or the iconv built # into libc) for the transcoding. See http://www.gnu.org/software/libiconv for # the list of possible encodings. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp # and *.h) to filter out the source-files in the directories. If left # blank the following patterns are tested: # *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh # *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py # *.f90 *.f *.for *.vhd *.vhdl FILE_PATTERNS = # The RECURSIVE tag can be used to turn specify whether or not subdirectories # should be searched for input files as well. Possible values are YES and NO. # If left blank NO is used. RECURSIVE = NO # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. # Note that relative paths are relative to the directory from which doxygen is # run. EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded # from the input. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. Note that the wildcards are matched # against the file with absolute path, so to exclude all test directories # for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or # directories that contain example code fragments that are included (see # the \include command). EXAMPLE_PATH = # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp # and *.h) to filter out the source-files in the directories. If left # blank all files are included. EXAMPLE_PATTERNS = # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude # commands irrespective of the value of the RECURSIVE tag. # Possible values are YES and NO. If left blank NO is used. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or # directories that contain image that are included in the documentation (see # the \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command , where # is the value of the INPUT_FILTER tag, and is the name of an # input file. Doxygen will then use the output that the filter program writes # to standard output. # If FILTER_PATTERNS is specified, this tag will be # ignored. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. # Doxygen will compare the file name with each pattern and apply the # filter if there is a match. # The filters are a list of the form: # pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further # info on how filters are used. If FILTER_PATTERNS is empty or if # non of the patterns match the file name, INPUT_FILTER is applied. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will be used to filter the input files when producing source # files to browse (i.e. when SOURCE_BROWSER is set to YES). FILTER_SOURCE_FILES = NO # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file # pattern. A pattern will override the setting for FILTER_PATTERN (if any) # and it is also possible to disable source filtering for a specific pattern # using *.ext= (so without naming a filter). This option only has effect when # FILTER_SOURCE_FILES is enabled. FILTER_SOURCE_PATTERNS = # If the USE_MD_FILE_AS_MAINPAGE tag refers to the name of a markdown file that # is part of the input, its contents will be placed on the main page (index.html). # This can be useful if you have a project on for instance GitHub and want reuse # the introduction page also for the doxygen output. USE_MDFILE_AS_MAINPAGE = #--------------------------------------------------------------------------- # configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will # be generated. Documented entities will be cross-referenced with these sources. # Note: To get rid of all source code in the generated output, make sure also # VERBATIM_HEADERS is set to NO. SOURCE_BROWSER = YES # Setting the INLINE_SOURCES tag to YES will include the body # of functions and classes directly in the documentation. INLINE_SOURCES = YES # Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct # doxygen to hide any special comment blocks from generated source code # fragments. Normal C, C++ and Fortran comments will always remain visible. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES # then for each documented function all documented # functions referencing it will be listed. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES # then for each documented function all documented entities # called/used by that function will be listed. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES (the default) # and SOURCE_BROWSER tag is set to YES, then the hyperlinks from # functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will # link to the source code. # Otherwise they will link to the documentation. REFERENCES_LINK_SOURCE = YES # If the USE_HTAGS tag is set to YES then the references to source code # will point to the HTML generated by the htags(1) tool instead of doxygen # built-in source browser. The htags tool is part of GNU's global source # tagging system (see http://www.gnu.org/software/global/global.html). You # will need version 4.8.6 or higher. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen # will generate a verbatim copy of the header file for each class for # which an include is specified. Set to NO to disable this. VERBATIM_HEADERS = YES #--------------------------------------------------------------------------- # configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index # of all compounds will be generated. Enable this if the project # contains a lot of classes, structs, unions or interfaces. ALPHABETICAL_INDEX = YES # If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then # the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns # in which this list will be split (can be a number in the range [1..20]) COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all # classes will be put under the same header in the alphabetical index. # The IGNORE_PREFIX tag can be used to specify one or more prefixes that # should be ignored while generating the index headers. IGNORE_PREFIX = #--------------------------------------------------------------------------- # configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES (the default) Doxygen will # generate HTML output. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `html' will be used as the default path. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for # each generated HTML page (for example: .htm,.php,.asp). If it is left blank # doxygen will generate files with .html extension. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a personal HTML header for # each generated HTML page. If it is left blank doxygen will generate a # standard header. Note that when using a custom header you are responsible # for the proper inclusion of any scripts and style sheets that doxygen # needs, which is dependent on the configuration options used. # It is advised to generate a default header using "doxygen -w html # header.html footer.html stylesheet.css YourConfigFile" and then modify # that header. Note that the header is subject to change so you typically # have to redo this when upgrading to a newer version of doxygen or when # changing the value of configuration settings such as GENERATE_TREEVIEW! HTML_HEADER = # The HTML_FOOTER tag can be used to specify a personal HTML footer for # each generated HTML page. If it is left blank doxygen will generate a # standard footer. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading # style sheet that is used by each HTML page. It can be used to # fine-tune the look of the HTML output. If left blank doxygen will # generate a default style sheet. Note that it is recommended to use # HTML_EXTRA_STYLESHEET instead of this one, as it is more robust and this # tag will in the future become obsolete. HTML_STYLESHEET = # The HTML_EXTRA_STYLESHEET tag can be used to specify an additional # user-defined cascading style sheet that is included after the standard # style sheets created by doxygen. Using this option one can overrule # certain style aspects. This is preferred over using HTML_STYLESHEET # since it does not replace the standard style sheet and is therefor more # robust against future updates. Doxygen will copy the style sheet file to # the output directory. HTML_EXTRA_STYLESHEET = # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note # that these files will be copied to the base HTML output directory. Use the # $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these # files. In the HTML_STYLESHEET file, use the file name only. Also note that # the files will be copied as-is; there are no commands or markers available. HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. # Doxygen will adjust the colors in the style sheet and background images # according to this color. Hue is specified as an angle on a colorwheel, # see http://en.wikipedia.org/wiki/Hue for more information. # For instance the value 0 represents red, 60 is yellow, 120 is green, # 180 is cyan, 240 is blue, 300 purple, and 360 is red again. # The allowed range is 0 to 359. HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of # the colors in the HTML output. For a value of 0 the output will use # grayscales only. A value of 255 will produce the most vivid colors. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to # the luminance component of the colors in the HTML output. Values below # 100 gradually make the output lighter, whereas values above 100 make # the output darker. The value divided by 100 is the actual gamma applied, # so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, # and 100 does not change the gamma. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting # this to NO can help when comparing the output of multiple runs. HTML_TIMESTAMP = YES # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. HTML_DYNAMIC_SECTIONS = NO # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of # entries shown in the various tree structured indices initially; the user # can expand and collapse entries dynamically later on. Doxygen will expand # the tree to such a level that at most the specified number of entries are # visible (unless a fully collapsed tree already exceeds this amount). # So setting the number of entries 1 will produce a full collapsed tree by # default. 0 is a special value representing an infinite number of entries # and will result in a full expanded tree by default. HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files # will be generated that can be used as input for Apple's Xcode 3 # integrated development environment, introduced with OSX 10.5 (Leopard). # To create a documentation set, doxygen will generate a Makefile in the # HTML output directory. Running make will produce the docset in that # directory and running "make install" will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find # it at startup. # See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html # for more information. GENERATE_DOCSET = NO # When GENERATE_DOCSET tag is set to YES, this tag determines the name of the # feed. A documentation feed provides an umbrella under which multiple # documentation sets from a single provider (such as a company or product suite) # can be grouped. DOCSET_FEEDNAME = "Doxygen generated docs" # When GENERATE_DOCSET tag is set to YES, this tag specifies a string that # should uniquely identify the documentation set bundle. This should be a # reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen # will append .docset to the name. DOCSET_BUNDLE_ID = org.doxygen.Project # When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely # identify the documentation publisher. This should be a reverse domain-name # style string, e.g. com.mycompany.MyDocSet.documentation. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES, additional index files # will be generated that can be used as input for tools like the # Microsoft HTML help workshop to generate a compiled HTML help file (.chm) # of the generated HTML documentation. GENERATE_HTMLHELP = NO # If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can # be used to specify the file name of the resulting .chm file. You # can add a path in front of the file if the result should not be # written to the html output directory. CHM_FILE = # If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can # be used to specify the location (absolute path including file name) of # the HTML help compiler (hhc.exe). If non-empty doxygen will try to run # the HTML help compiler on the generated index.hhp. HHC_LOCATION = # If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag # controls if a separate .chi index file is generated (YES) or that # it should be included in the master .chm file (NO). GENERATE_CHI = NO # If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING # is used to encode HtmlHelp index (hhk), content (hhc) and project file # content. CHM_INDEX_ENCODING = # If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag # controls whether a binary table of contents is generated (YES) or a # normal table of contents (NO) in the .chm file. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members # to the contents of the HTML help documentation and to the tree view. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated # that can be used as input for Qt's qhelpgenerator to generate a # Qt Compressed Help (.qch) of the generated HTML documentation. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can # be used to specify the file name of the resulting .qch file. # The path specified is relative to the HTML output folder. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating # Qt Help Project output. For more information please see # http://doc.trolltech.com/qthelpproject.html#namespace QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating # Qt Help Project output. For more information please see # http://doc.trolltech.com/qthelpproject.html#virtual-folders QHP_VIRTUAL_FOLDER = doc # If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to # add. For more information please see # http://doc.trolltech.com/qthelpproject.html#custom-filters QHP_CUST_FILTER_NAME = # The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see # # Qt Help Project / Custom Filters. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's # filter section matches. # # Qt Help Project / Filter Attributes. QHP_SECT_FILTER_ATTRS = # If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can # be used to specify the location of Qt's qhelpgenerator. # If non-empty doxygen will try to run qhelpgenerator on the generated # .qhp file. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files # will be generated, which together with the HTML files, form an Eclipse help # plugin. To install this plugin and make it available under the help contents # menu in Eclipse, the contents of the directory containing the HTML and XML # files needs to be copied into the plugins directory of eclipse. The name of # the directory within the plugins directory should be the same as # the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before # the help appears. GENERATE_ECLIPSEHELP = NO # A unique identifier for the eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have # this name. ECLIPSE_DOC_ID = org.doxygen.Project # The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) # at top of each HTML page. The value NO (the default) enables the index and # the value YES disables it. Since the tabs have the same information as the # navigation tree you can set this option to NO if you already set # GENERATE_TREEVIEW to YES. DISABLE_INDEX = NO # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. # If the tag value is set to YES, a side panel will be generated # containing a tree-like index structure (just like the one that # is generated for HTML Help). For this to work a browser that supports # JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). # Windows users are probably better off using the HTML help feature. # Since the tree basically has the same information as the tab index you # could consider to set DISABLE_INDEX to NO when enabling this option. GENERATE_TREEVIEW = NO # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values # (range [0,1..20]) that doxygen will group on one line in the generated HTML # documentation. Note that a value of 0 will completely suppress the enum # values from appearing in the overview section. ENUM_VALUES_PER_LINE = 4 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be # used to set the initial width (in pixels) of the frame in which the tree # is shown. TREEVIEW_WIDTH = 250 # When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open # links to external symbols imported via tag files in a separate window. EXT_LINKS_IN_WINDOW = NO # Use this tag to change the font size of Latex formulas included # as images in the HTML documentation. The default is 10. Note that # when you change the font size after a successful doxygen run you need # to manually remove any form_*.png images from the HTML output directory # to force them to be regenerated. FORMULA_FONTSIZE = 14 # Use the FORMULA_TRANPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are # not supported properly for IE 6.0, but are supported on all modern browsers. # Note that when changing this option you need to delete any form_*.png files # in the HTML output before the changes have effect. FORMULA_TRANSPARENT = YES # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax # (see http://www.mathjax.org) which uses client side Javascript for the # rendering instead of using prerendered bitmaps. Use this if you do not # have LaTeX installed or if you want to formulas look prettier in the HTML # output. When enabled you may also need to install MathJax separately and # configure the path to it using the MATHJAX_RELPATH option. USE_MATHJAX = NO # When MathJax is enabled you can set the default output format to be used for # thA MathJax output. Supported types are HTML-CSS, NativeMML (i.e. MathML) and # SVG. The default value is HTML-CSS, which is slower, but has the best # compatibility. MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the # HTML output directory using the MATHJAX_RELPATH option. The destination # directory should contain the MathJax.js script. For instance, if the mathjax # directory is located at the same level as the HTML output directory, then # MATHJAX_RELPATH should be ../mathjax. The default value points to # the MathJax Content Delivery Network so you can quickly see the result without # installing MathJax. # However, it is strongly recommended to install a local # copy of MathJax from http://www.mathjax.org before deployment. MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest # The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension # names that should be enabled during MathJax rendering. MATHJAX_EXTENSIONS = # When the SEARCHENGINE tag is enabled doxygen will generate a search box # for the HTML output. The underlying search engine uses javascript # and DHTML and should work on any modern browser. Note that when using # HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets # (GENERATE_DOCSET) there is already a search function so this one should # typically be disabled. For large projects the javascript based search engine # can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. SEARCHENGINE = YES # When the SERVER_BASED_SEARCH tag is enabled the search engine will be # implemented using a web server instead of a web client using Javascript. # There are two flavours of web server based search depending on the # EXTERNAL_SEARCH setting. When disabled, doxygen will generate a PHP script for # searching and an index file used by the script. When EXTERNAL_SEARCH is # enabled the indexing and searching needs to be provided by external tools. # See the manual for details. SERVER_BASED_SEARCH = NO # When EXTERNAL_SEARCH is enabled doxygen will no longer generate the PHP # script for searching. Instead the search results are written to an XML file # which needs to be processed by an external indexer. Doxygen will invoke an # external search engine pointed to by the SEARCHENGINE_URL option to obtain # the search results. Doxygen ships with an example indexer (doxyindexer) and # search engine (doxysearch.cgi) which are based on the open source search engine # library Xapian. See the manual for configuration details. EXTERNAL_SEARCH = NO # The SEARCHENGINE_URL should point to a search engine hosted by a web server # which will returned the search results when EXTERNAL_SEARCH is enabled. # Doxygen ships with an example search engine (doxysearch) which is based on # the open source search engine library Xapian. See the manual for configuration # details. SEARCHENGINE_URL = # When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed # search data is written to a file for indexing by an external tool. With the # SEARCHDATA_FILE tag the name of this file can be specified. SEARCHDATA_FILE = searchdata.xml # When SERVER_BASED_SEARCH AND EXTERNAL_SEARCH are both enabled the # EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is # useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple # projects and redirect the results back to the right project. EXTERNAL_SEARCH_ID = # The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen # projects other than the one defined by this configuration file, but that are # all added to the same external search index. Each project needs to have a # unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id # of to a relative location where the documentation can be found. # The format is: EXTRA_SEARCH_MAPPINGS = id1=loc1 id2=loc2 ... EXTRA_SEARCH_MAPPINGS = #--------------------------------------------------------------------------- # configuration options related to the LaTeX output #--------------------------------------------------------------------------- # If the GENERATE_LATEX tag is set to YES (the default) Doxygen will # generate Latex output. GENERATE_LATEX = YES # The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `latex' will be used as the default path. LATEX_OUTPUT = latex # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be # invoked. If left blank `latex' will be used as the default command name. # Note that when enabling USE_PDFLATEX this option is only used for # generating bitmaps for formulas in the HTML output, but not in the # Makefile that is written to the output directory. LATEX_CMD_NAME = latex # The MAKEINDEX_CMD_NAME tag can be used to specify the command name to # generate index for LaTeX. If left blank `makeindex' will be used as the # default command name. MAKEINDEX_CMD_NAME = makeindex # If the COMPACT_LATEX tag is set to YES Doxygen generates more compact # LaTeX documents. This may be useful for small projects and may help to # save some trees in general. COMPACT_LATEX = NO # The PAPER_TYPE tag can be used to set the paper type that is used # by the printer. Possible values are: a4, letter, legal and # executive. If left blank a4wide will be used. PAPER_TYPE = a4 # The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX # packages that should be included in the LaTeX output. EXTRA_PACKAGES = # The LATEX_HEADER tag can be used to specify a personal LaTeX header for # the generated latex document. The header should contain everything until # the first chapter. If it is left blank doxygen will generate a # standard header. Notice: only use this tag if you know what you are doing! LATEX_HEADER = # The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for # the generated latex document. The footer should contain everything after # the last chapter. If it is left blank doxygen will generate a # standard footer. Notice: only use this tag if you know what you are doing! LATEX_FOOTER = # If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated # is prepared for conversion to pdf (using ps2pdf). The pdf file will # contain links (just like the HTML output) instead of page references # This makes the output suitable for online browsing using a pdf viewer. PDF_HYPERLINKS = YES # If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of # plain latex in the generated Makefile. Set this option to YES to get a # higher quality PDF documentation. USE_PDFLATEX = YES # If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. # command to the generated LaTeX files. This will instruct LaTeX to keep # running if errors occur, instead of asking the user for help. # This option is also used when generating formulas in HTML. LATEX_BATCHMODE = NO # If LATEX_HIDE_INDICES is set to YES then doxygen will not # include the index chapters (such as File Index, Compound Index, etc.) # in the output. LATEX_HIDE_INDICES = NO # If LATEX_SOURCE_CODE is set to YES then doxygen will include # source code with syntax highlighting in the LaTeX output. # Note that which sources are shown also depends on other settings # such as SOURCE_BROWSER. LATEX_SOURCE_CODE = NO # The LATEX_BIB_STYLE tag can be used to specify the style to use for the # bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See # http://en.wikipedia.org/wiki/BibTeX for more info. LATEX_BIB_STYLE = plain #--------------------------------------------------------------------------- # configuration options related to the RTF output #--------------------------------------------------------------------------- # If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output # The RTF output is optimized for Word 97 and may not look very pretty with # other RTF readers or editors. GENERATE_RTF = NO # The RTF_OUTPUT tag is used to specify where the RTF docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `rtf' will be used as the default path. RTF_OUTPUT = rtf # If the COMPACT_RTF tag is set to YES Doxygen generates more compact # RTF documents. This may be useful for small projects and may help to # save some trees in general. COMPACT_RTF = NO # If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated # will contain hyperlink fields. The RTF file will # contain links (just like the HTML output) instead of page references. # This makes the output suitable for online browsing using WORD or other # programs which support those fields. # Note: wordpad (write) and others do not support links. RTF_HYPERLINKS = NO # Load style sheet definitions from file. Syntax is similar to doxygen's # config file, i.e. a series of assignments. You only have to provide # replacements, missing definitions are set to their default value. RTF_STYLESHEET_FILE = # Set optional variables used in the generation of an rtf document. # Syntax is similar to doxygen's config file. RTF_EXTENSIONS_FILE = #--------------------------------------------------------------------------- # configuration options related to the man page output #--------------------------------------------------------------------------- # If the GENERATE_MAN tag is set to YES (the default) Doxygen will # generate man pages GENERATE_MAN = NO # The MAN_OUTPUT tag is used to specify where the man pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `man' will be used as the default path. MAN_OUTPUT = man # The MAN_EXTENSION tag determines the extension that is added to # the generated man pages (default is the subroutine's section .3) MAN_EXTENSION = .3 # If the MAN_LINKS tag is set to YES and Doxygen generates man output, # then it will generate one additional man file for each entity # documented in the real man page(s). These additional files # only source the real man page, but without them the man command # would be unable to find the correct page. The default is NO. MAN_LINKS = NO #--------------------------------------------------------------------------- # configuration options related to the XML output #--------------------------------------------------------------------------- # If the GENERATE_XML tag is set to YES Doxygen will # generate an XML file that captures the structure of # the code including all documentation. GENERATE_XML = NO # The XML_OUTPUT tag is used to specify where the XML pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `xml' will be used as the default path. XML_OUTPUT = xml # The XML_SCHEMA tag can be used to specify an XML schema, # which can be used by a validating XML parser to check the # syntax of the XML files. XML_SCHEMA = # The XML_DTD tag can be used to specify an XML DTD, # which can be used by a validating XML parser to check the # syntax of the XML files. XML_DTD = # If the XML_PROGRAMLISTING tag is set to YES Doxygen will # dump the program listings (including syntax highlighting # and cross-referencing information) to the XML output. Note that # enabling this will significantly increase the size of the XML output. XML_PROGRAMLISTING = YES #--------------------------------------------------------------------------- # configuration options for the AutoGen Definitions output #--------------------------------------------------------------------------- # If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will # generate an AutoGen Definitions (see autogen.sf.net) file # that captures the structure of the code including all # documentation. Note that this feature is still experimental # and incomplete at the moment. GENERATE_AUTOGEN_DEF = NO #--------------------------------------------------------------------------- # configuration options related to the Perl module output #--------------------------------------------------------------------------- # If the GENERATE_PERLMOD tag is set to YES Doxygen will # generate a Perl module file that captures the structure of # the code including all documentation. Note that this # feature is still experimental and incomplete at the # moment. GENERATE_PERLMOD = NO # If the PERLMOD_LATEX tag is set to YES Doxygen will generate # the necessary Makefile rules, Perl scripts and LaTeX code to be able # to generate PDF and DVI output from the Perl module output. PERLMOD_LATEX = NO # If the PERLMOD_PRETTY tag is set to YES the Perl module output will be # nicely formatted so it can be parsed by a human reader. # This is useful # if you want to understand what is going on. # On the other hand, if this # tag is set to NO the size of the Perl module output will be much smaller # and Perl will parse it just the same. PERLMOD_PRETTY = YES # The names of the make variables in the generated doxyrules.make file # are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. # This is useful so different doxyrules.make files included by the same # Makefile don't overwrite each other's variables. PERLMOD_MAKEVAR_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the preprocessor #--------------------------------------------------------------------------- # If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will # evaluate all C-preprocessor directives found in the sources and include # files. ENABLE_PREPROCESSING = YES # If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro # names in the source code. If set to NO (the default) only conditional # compilation will be performed. Macro expansion can be done in a controlled # way by setting EXPAND_ONLY_PREDEF to YES. MACRO_EXPANSION = NO # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES # then the macro expansion is limited to the macros specified with the # PREDEFINED and EXPAND_AS_DEFINED tags. EXPAND_ONLY_PREDEF = NO # If the SEARCH_INCLUDES tag is set to YES (the default) the includes files # pointed to by INCLUDE_PATH will be searched when a #include is found. SEARCH_INCLUDES = YES # The INCLUDE_PATH tag can be used to specify one or more directories that # contain include files that are not input files but should be processed by # the preprocessor. INCLUDE_PATH = # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard # patterns (like *.h and *.hpp) to filter out the header-files in the # directories. If left blank, the patterns specified with FILE_PATTERNS will # be used. INCLUDE_FILE_PATTERNS = # The PREDEFINED tag can be used to specify one or more macro names that # are defined before the preprocessor is started (similar to the -D option of # gcc). The argument of the tag is a list of macros of the form: name # or name=definition (no spaces). If the definition and the = are # omitted =1 is assumed. To prevent a macro definition from being # undefined via #undef or recursively expanded use the := operator # instead of the = operator. PREDEFINED = # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then # this tag can be used to specify a list of macro names that should be expanded. # The macro definition that is found in the sources will be used. # Use the PREDEFINED tag if you want to use a different macro definition that # overrules the definition found in the source code. EXPAND_AS_DEFINED = # If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then # doxygen's preprocessor will remove all references to function-like macros # that are alone on a line, have an all uppercase name, and do not end with a # semicolon, because these will confuse the parser if not removed. SKIP_FUNCTION_MACROS = YES #--------------------------------------------------------------------------- # Configuration::additions related to external references #--------------------------------------------------------------------------- # The TAGFILES option can be used to specify one or more tagfiles. For each # tag file the location of the external documentation should be added. The # format of a tag file without this location is as follows: # # TAGFILES = file1 file2 ... # Adding location for the tag files is done as follows: # # TAGFILES = file1=loc1 "file2 = loc2" ... # where "loc1" and "loc2" can be relative or absolute paths # or URLs. Note that each tag file must have a unique name (where the name does # NOT include the path). If a tag file is not located in the directory in which # doxygen is run, you must also specify the path to the tagfile here. TAGFILES = # When a file name is specified after GENERATE_TAGFILE, doxygen will create # a tag file that is based on the input files it reads. GENERATE_TAGFILE = # If the ALLEXTERNALS tag is set to YES all external classes will be listed # in the class index. If set to NO only the inherited external classes # will be listed. ALLEXTERNALS = NO # If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed # in the modules index. If set to NO, only the current project's groups will # be listed. EXTERNAL_GROUPS = YES # The PERL_PATH should be the absolute path and name of the perl script # interpreter (i.e. the result of `which perl'). PERL_PATH = /usr/bin/perl #--------------------------------------------------------------------------- # Configuration options related to the dot tool #--------------------------------------------------------------------------- # If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will # generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base # or super classes. Setting the tag to NO turns the diagrams off. Note that # this option also works with HAVE_DOT disabled, but it is recommended to # install and use dot, since it yields more powerful graphs. CLASS_DIAGRAMS = YES # You can define message sequence charts within doxygen comments using the \msc # command. Doxygen will then run the mscgen tool (see # http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the # documentation. The MSCGEN_PATH tag allows you to specify the directory where # the mscgen tool resides. If left empty the tool is assumed to be found in the # default search path. MSCGEN_PATH = # If set to YES, the inheritance and collaboration graphs will hide # inheritance and usage relations if the target is undocumented # or is not a class. HIDE_UNDOC_RELATIONS = YES # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is # available from the path. This tool is part of Graphviz, a graph visualization # toolkit from AT&T and Lucent Bell Labs. The other options in this section # have no effect if this option is set to NO (the default) HAVE_DOT = YES # The DOT_NUM_THREADS specifies the number of dot invocations doxygen is # allowed to run in parallel. When set to 0 (the default) doxygen will # base this on the number of processors available in the system. You can set it # explicitly to a value larger than 0 to get control over the balance # between CPU load and processing speed. DOT_NUM_THREADS = 0 # By default doxygen will use the Helvetica font for all dot files that # doxygen generates. When you want a differently looking font you can specify # the font name using DOT_FONTNAME. You need to make sure dot is able to find # the font, which can be done by putting it in a standard location or by setting # the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the # directory containing the font. DOT_FONTNAME = Helvetica # The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. # The default size is 10pt. DOT_FONTSIZE = 10 # By default doxygen will tell dot to use the Helvetica font. # If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to # set the path where dot can find it. DOT_FONTPATH = # If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect inheritance relations. Setting this tag to YES will force the # CLASS_DIAGRAMS tag to NO. CLASS_GRAPH = YES # If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect implementation dependencies (inheritance, containment, and # class references variables) of the class with other documented classes. COLLABORATION_GRAPH = YES # If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen # will generate a graph for groups, showing the direct groups dependencies GROUP_GRAPHS = YES # If the UML_LOOK tag is set to YES doxygen will generate inheritance and # collaboration diagrams in a style similar to the OMG's Unified Modeling # Language. UML_LOOK = NO # If the UML_LOOK tag is enabled, the fields and methods are shown inside # the class node. If there are many fields or methods and many nodes the # graph may become too big to be useful. The UML_LIMIT_NUM_FIELDS # threshold limits the number of items for each type to make the size more # managable. Set this to 0 for no limit. Note that the threshold may be # exceeded by 50% before the limit is enforced. UML_LIMIT_NUM_FIELDS = 10 # If set to YES, the inheritance and collaboration graphs will show the # relations between templates and their instances. TEMPLATE_RELATIONS = NO # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT # tags are set to YES then doxygen will generate a graph for each documented # file showing the direct and indirect include dependencies of the file with # other documented files. INCLUDE_GRAPH = YES # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and # HAVE_DOT tags are set to YES then doxygen will generate a graph for each # documented header file showing the documented files that directly or # indirectly include this file. INCLUDED_BY_GRAPH = YES # If the CALL_GRAPH and HAVE_DOT options are set to YES then # doxygen will generate a call dependency graph for every global function # or class method. Note that enabling this option will significantly increase # the time of a run. So in most cases it will be better to enable call graphs # for selected functions only using the \callgraph command. CALL_GRAPH = YES # If the CALLER_GRAPH and HAVE_DOT tags are set to YES then # doxygen will generate a caller dependency graph for every global function # or class method. Note that enabling this option will significantly increase # the time of a run. So in most cases it will be better to enable caller # graphs for selected functions only using the \callergraph command. CALLER_GRAPH = YES # If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen # will generate a graphical hierarchy of all classes instead of a textual one. GRAPHICAL_HIERARCHY = YES # If the DIRECTORY_GRAPH and HAVE_DOT tags are set to YES # then doxygen will show the dependencies a directory has on other directories # in a graphical way. The dependency relations are determined by the #include # relations between the files in the directories. DIRECTORY_GRAPH = YES # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images # generated by dot. Possible values are svg, png, jpg, or gif. # If left blank png will be used. If you choose svg you need to set # HTML_FILE_EXTENSION to xhtml in order to make the SVG files # visible in IE 9+ (other browsers do not have this requirement). DOT_IMAGE_FORMAT = png # If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to # enable generation of interactive SVG images that allow zooming and panning. # Note that this requires a modern browser other than Internet Explorer. # Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you # need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files # visible. Older versions of IE do not have SVG support. INTERACTIVE_SVG = NO # The tag DOT_PATH can be used to specify the path where the dot tool can be # found. If left blank, it is assumed the dot tool can be found in the path. DOT_PATH = # The DOTFILE_DIRS tag can be used to specify one or more directories that # contain dot files that are included in the documentation (see the # \dotfile command). DOTFILE_DIRS = # The MSCFILE_DIRS tag can be used to specify one or more directories that # contain msc files that are included in the documentation (see the # \mscfile command). MSCFILE_DIRS = # The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of # nodes that will be shown in the graph. If the number of nodes in a graph # becomes larger than this value, doxygen will truncate the graph, which is # visualized by representing a node as a red box. Note that doxygen if the # number of direct children of the root node in a graph is already larger than # DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note # that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. DOT_GRAPH_MAX_NODES = 50 # The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the # graphs generated by dot. A depth value of 3 means that only nodes reachable # from the root by following a path via at most 3 edges will be shown. Nodes # that lay further from the root node will be omitted. Note that setting this # option to 1 or 2 may greatly reduce the computation time needed for large # code bases. Also note that the size of a graph can be further restricted by # DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. MAX_DOT_GRAPH_DEPTH = 0 # Set the DOT_TRANSPARENT tag to YES to generate images with a transparent # background. This is disabled by default, because dot on Windows does not # seem to support this out of the box. Warning: Depending on the platform used, # enabling this option may lead to badly anti-aliased labels on the edges of # a graph (i.e. they become hard to read). DOT_TRANSPARENT = NO # Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output # files in one run (i.e. multiple -o and -T options on the command line). This # makes dot run faster, but since only newer versions of dot (>1.8.10) # support this, this feature is disabled by default. DOT_MULTI_TARGETS = NO # If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will # generate a legend page explaining the meaning of the various boxes and # arrows in the dot generated graphs. GENERATE_LEGEND = YES # If the DOT_CLEANUP tag is set to YES (the default) Doxygen will # remove the intermediate dot files that are used to generate # the various graphs. DOT_CLEANUP = YES ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/lib/inc/rwt_init.h ================================================ /*! \file rwt_init.h \brief Header for matlab init functions in init.c */ #ifndef RWT_INIT_H_ #define RWT_INIT_H_ #include "rwt_platform.h" #if defined(MATLAB_MEX_FILE) || defined(OCTAVE_MEX_FILE) #include "mex.h" #ifndef OCTAVE_MEX_FILE #include "matrix.h" #endif typedef struct { size_t nrows; /*!< The number of rows in the input matrix. Output matrix will match. */ size_t ncols; /*!< The number of columns in the input matrix. Output matrix will match. */ int levels; /*!< L, the number of levels for the transform. */ int ncoeff; /*!< Length of h / the number of scaling coefficients */ double *scalings; /*!< Wavelet scaling coefficients */ } rwt_init_params; typedef enum {NORMAL_DWT, REDUNDANT_DWT, INVERSE_DWT, INVERSE_REDUNDANT_DWT} transform_t; #endif #ifdef __cplusplus extern "C" { #endif #if defined(MATLAB_MEX_FILE) || defined(OCTAVE_MEX_FILE) rwt_init_params rwt_matlab_init(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[], transform_t dwtType); #else int rwt_find_levels(size_t m, size_t n); int rwt_check_levels(int levels, size_t rows, size_t cols); #endif #ifdef __cplusplus } #endif #endif /* RWT_INIT_H_ */ ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/lib/inc/rwt_platform.h ================================================ /*! \file rwt_platform.h \brief Abstract away environment differences and provide some common macros */ #ifndef RWT_PLATFORM_H #define RWT_PLATFORM_H #include #include #include /*! For MATLAB we address 2d inputs and outputs in column-major order */ /*! For Python we address 2d inputs and outputs in row-major order */ /*! The offset macros are for debugging */ /*! The parameters for the mat() macro are: * a - the base pointer to the matrix of values * i - index of the target row * j - index of the target column * m - the number of rows * n - the number of columns */ #if defined(MATLAB_MEX_FILE) || defined(OCTAVE_MEX_FILE) #define COLUMN_MAJOR_ORDER 1 #include "mex.h" #ifndef OCTAVE_MEX_FILE #include "matrix.h" #endif #define mat(a, i, j, m, n) (*(a + (m*(j)+i))) #define mat_offset(a, i, j, m, n) (m*(j)+i) #define offset_row(offset, m, n) (offset % m) #define offset_col(offset, m, n) ((offset - (offset % m)) / m) #define rwt_printf(fmt, ...) mexPrintf(fmt, ##__VA_ARGS__) #define rwt_errormsg(msg) mexErrMsgTxt(msg) #else #define ROW_MAJOR_ORDER 1 #define mat(a, i, j, m, n) (*(a + (n*(i)+j))) #define mat_offset(a, i, j, m, n) (n*(i)+j) #define offset_row(offset, m, n) ((offset - (offset % n)) / n) #define offset_col(offset, m, n) (offset % n) #define rwt_printf(fmt, ...) printf(fmt, ##__VA_ARGS__) #define rwt_errormsg(msg) printf("\033[91m%s\033[0m\n", msg); #endif #ifndef max #define max(A,B) (A > B ? A : B) #endif #ifndef min #define min(A,B) (A < B ? A : B) #endif #define even(x) ((x & 1) ? 0 : 1) #ifdef __cplusplus extern "C" { #endif void *rwt_malloc(size_t size); void *rwt_calloc(size_t num, size_t size); void rwt_free(void *ptr); #ifdef __cplusplus } #endif #endif ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/lib/inc/rwt_transforms.h ================================================ /*! \file rwt_transforms.h \brief Function prototypes for the transform implementations */ #ifndef TRANSFORMS_H_ #define TRANSFORMS_H_ #include #ifdef __cplusplus extern "C" { #endif /*! dwt and rdwt take an input x and store the result in y or yl and yh * idwt and irdwt take an input y or yl and yh and store the result in x * In all cases it is expected that the output array has already been * allocated prior to calling the transform function. */ void dwt(double *x, size_t nrows, size_t ncols, double *h, int ncoeff, int levels, double *y); void idwt(double *x, size_t nrows, size_t ncols, double *h, int ncoeff, int levels, double *y); void rdwt(double *x, size_t nrows, size_t ncols, double *h, int ncoeff, int levels, double *yl, double *yh); void irdwt(double *x, size_t nrows, size_t ncols, double *h, int ncoeff, int levels, double *yl, double *yh); #ifdef __cplusplus } #endif #endif /* TRANSFORMS_H_ */ ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/lib/src/CMakeLists.txt ================================================ include_directories ("${PROJECT_SOURCE_DIR}/lib/inc") add_library(dwt dwt.c) add_library(idwt idwt.c) add_library(irdwt irdwt.c) add_library(rdwt rdwt.c) add_library(platform platform.c) ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/lib/src/dwt.c ================================================ /*! \file dwt.c \brief Implementation of the discrete wavelet transform */ #include "rwt_platform.h" /*! * Perform convolution for dwt * * @param x_in input signal values * @param lx the length of x_in * @param coeff_low the low pass coefficients * @param coeff_high the high pass coefficients * @param ncoeff_minus_one one less than the number of scaling coefficients * @param x_out_low low pass results * @param x_out_high high pass results * * For the convolution we will calculate the output of the lowpass and highpass filters in parallel * * Normally we can describe the calculation of a convolution as * \f$ (\textbf{w} * \textbf{z})_k = \frac{1}{N} \sum\limits_{l=0}^{2N-1} w_{k-l} \cdot z_{l} \f$ * * Our actual implementation resembles this * */ void dwt_convolution(double *x_in, size_t lx, double *coeff_low, double *coeff_high, int ncoeff_minus_one, double *x_out_low, double *x_out_high) { size_t i, j, ind; double x0, x1; for (i=lx; i1) { for (idx_columns=0; idx_columns -1; k--) { x_in_low[k] = x_in_low[lx+k]; x_in_high[k] = x_in_high[lx+k]; } ind = 0; for (i=0; i<(lx); i++) { x0 = 0; x1 = 0; tj = 0; for (j=0; j<=ncoeff_halved_minus_one; j++) { x0 = x0 + (x_in_low[i+j] * coeff_low[ncoeff_minus_one-1-tj]) + (x_in_high[i+j] * coeff_high[ncoeff_minus_one-1-tj]); x1 = x1 + (x_in_low[i+j] * coeff_low[ncoeff_minus_one-tj]) + (x_in_high[i+j] * coeff_high[ncoeff_minus_one-tj]); tj += 2; } x_out[ind++] = x0; x_out[ind++] = x1; } } /*! * Allocate memory for idwt * * @param m the number of rows of the input matrix * @param n the number of columns of the input matrix * @param ncoeff the number of scaling coefficients * @param x_dummy * @param y_dummy_low * @param y_dummy_high * @param coeff_low * @param coeff_high * */ void idwt_allocate(size_t m, size_t n, int ncoeff, double **x_dummy, double **y_dummy_low, double **y_dummy_high, double **coeff_low, double **coeff_high) { *x_dummy = (double *) rwt_calloc(max(m,n), sizeof(double)); *y_dummy_low = (double *) rwt_calloc(max(m,n)+ncoeff/2-1, sizeof(double)); *y_dummy_high = (double *) rwt_calloc(max(m,n)+ncoeff/2-1, sizeof(double)); *coeff_low = (double *) rwt_calloc(ncoeff, sizeof(double)); *coeff_high = (double *) rwt_calloc(ncoeff, sizeof(double)); } /*! * Free memory we allocated for idwt * * @param x_dummy * @param y_dummy_low * @param y_dummy_high * @param coeff_low * @param coeff_high * */ void idwt_free(double **x_dummy, double **y_dummy_low, double **y_dummy_high, double **coeff_low, double **coeff_high) { rwt_free(*x_dummy); rwt_free(*y_dummy_low); rwt_free(*y_dummy_high); rwt_free(*coeff_low); rwt_free(*coeff_high); } /*! * Put the scaling coeffients into a form ready for use in the convolution function * * @param ncoeff length of h / the number of scaling coefficients * @param h the wavelet scaling coefficients * @param coeff_low same as h * @param coeff_high reversed h, even values are sign flipped * */ void idwt_coefficients(int ncoeff, double *h, double **coeff_low, double **coeff_high) { int i; for (i=0; i1) current_rows = nrows/sample_f; else current_rows = 1; current_cols = ncols/sample_f; for (i=0; i<(nrows*ncols); i++) x[i] = y[i]; /* main loop */ for (current_level=levels; current_level >= 1; current_level--) { row_cursor = current_rows/2; column_cursor = current_cols/2; /* go by columns in case of a 2D signal*/ if (nrows>1) { for (idx_cols=0; idx_cols #if defined(MATLAB_MEX_FILE) || defined(OCTAVE_MEX_FILE) /*! * Checks for correct # of input variables based on type of transform. * * @param nrhs number of items on right hand side of matlab call * @param transform_type * */ int rwt_check_parameter_count(int nrhs, transform_t transform_type) { if (transform_type == INVERSE_REDUNDANT_DWT) { if (nrhs > 4) { rwt_errormsg("There are at most 4 input parameters allowed!"); return 1; } if (nrhs < 3) { rwt_errormsg("There are at least 3 input parameters required!"); return 1; } } else { if (nrhs > 3) { rwt_errormsg("There are at most 3 input parameters allowed!"); return 1; } if (nrhs < 2) { rwt_errormsg("There are at least 2 input parameters required!"); return 1; } } return 0; } /*! * For the inverse redundant transform check that the dimensions of the low and high inputs match * * @param prhs * @param params * */ int rwt_check_yl_matches_yh(const mxArray *prhs[], size_t nrows, size_t ncols, int levels) { size_t mh = mxGetM(prhs[1]); size_t nh = mxGetN(prhs[1]); if (min(nrows, ncols) > 1) { if ((nrows != mh) | (3 * ncols * levels != nh)) { return 0; } } else { if ((nrows != mh) | (ncols * levels != nh)) { return 0; } } return 1; } #endif /*! * Find L, the number of levels * * @param m the number of rows in the input * @param n the number of columns in the input * * L is the exponent of the largest power of 2 that is a factor of all input dimensions * */ int rwt_find_levels(size_t m, size_t n) { size_t i, j, L; i = n ; j = 0; while (even(i)) { i = (i >> 1); j++; } L = m; i = 0; while (even(L)) { L = (L >> 1); i++; } if (min(m, n) == 1) L = max(i, j); else L = min(i, j); if (L == 0) { rwt_errormsg("Maximum number of levels is zero; no decomposition can be performed!"); return -1; } else return L; } /*! * Check that length is divisble by 2^L * * @param length the number of rows or number of columns * @param L the number of levels * */ int rwt_check_dimensions(size_t length, int L) { double test = (double) length / pow(2.0, (double) L); if ((test - floor(test)) > 0.0) { return -1; } return 0; } /*! * Sanity check the levels parameter * * @param levels the number of levels specified or calculated for the input * @param rows the number of rows of input * @param cols the number of columns of input * */ int rwt_check_levels(int levels, size_t rows, size_t cols) { if (levels < 1) { rwt_errormsg("The number of levels, L, must be a positive integer"); return -1; } /*! Check that both the rows and columns are divisible by 2^L */ if ((rows > 1 && rwt_check_dimensions(rows, levels)) || (cols > 1 && rwt_check_dimensions(cols, levels))) { rwt_errormsg("All dimensions must be divisible by 2^L"); return -1; } return 0; } #if defined(MATLAB_MEX_FILE) || defined(OCTAVE_MEX_FILE) /*! * Parse input from MATLAB and do some sanity checking * * @param nlhs number of items on left hand side of matlab call * @param plhs pointer to left hand side data structure * @param nrhs number of items on right hand side of matlab call * @param prhs pointer to right hand side data structure * @param transform_type which transform are we setting up to do * */ rwt_init_params rwt_matlab_init(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[], transform_t transform_type) { rwt_init_params params; int argNumL; /*! Check for correct # of input parameters */ if (rwt_check_parameter_count(nrhs, transform_type) != 0) return params; /*! Check that we don't have more than two dimensions in the input since that is currently unsupported. */ if (mxGetNumberOfDimensions(prhs[0]) > 2) { rwt_errormsg("Matrix must have fewer than 3 dimensions!"); return params; } /*! Get the number of rows and columns in the input matrix. */ params.nrows = mxGetM(prhs[0]); params.ncols = mxGetN(prhs[0]); if (params.nrows == 0 && params.ncols == 0) { rwt_errormsg("The input matrix cannot be empty"); return params; } /*! Read the number of levels, L, from the input values if it was given, otherwise calculate L. Sanity check L */ argNumL = (transform_type == INVERSE_REDUNDANT_DWT) ? 3 : 2; if ((argNumL + 1) == nrhs) params.levels = (int) *mxGetPr(prhs[argNumL]); else params.levels = rwt_find_levels(params.nrows, params.ncols); if (rwt_check_levels(params.levels, params.nrows, params.ncols)) { return params; } /*! Read the scaling coefficients, h, from the input and find their length, ncoeff. * In the case of the redundant transform, the scalings are found one further position to the right, * and also we check for matching dimensions in the low and high inputs */ if (transform_type == INVERSE_REDUNDANT_DWT) { params.scalings = mxGetPr(prhs[2]); params.ncoeff = max(mxGetM(prhs[2]), mxGetN(prhs[2])); if (!rwt_check_yl_matches_yh(prhs, params.nrows, params.ncols, params.levels)) { rwt_errormsg("Dimensions of first two input matrices not consistent!"); return params; } } else { params.scalings = mxGetPr(prhs[1]); params.ncoeff = max(mxGetM(prhs[1]), mxGetN(prhs[1])); } /*! Create the first item in the output array as a double matrix with the same dimensions as the input. */ plhs[0] = mxCreateDoubleMatrix(params.nrows, params.ncols, mxREAL); return params; } #endif ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/lib/src/irdwt.c ================================================ /*! \file irdwt.c \brief Implementation of the inverse redundant discrete wavelet transform */ #include "rwt_platform.h" void irdwt_convolution(double *x_out, size_t lx, double *coeff_low, double *coeff_high, int ncoeff, double *x_in_low, double *x_in_high) { int k; size_t i, j; double x0; for (k=ncoeff-2; k > -1; k--) { x_in_low[k] = x_in_low[lx+k]; x_in_high[k] = x_in_high[lx+k]; } for (i=0; i= 1; current_level--) { /* actual (level dependent) column offset */ if (nrows==1) column_cursor = ncols*(current_level-1); else column_cursor = 3*ncols*(current_level-1); column_cursor_plus_n = column_cursor + ncols; column_cursor_plus_double_n = column_cursor_plus_n + ncols; /* go by columns in case of a 2D signal*/ if (nrows>1) { row_blocks_per_column = nrows/current_rows; /* # of row blocks per column */ for (idx_cols=0; idx_cols1) y_dummy_high_high[i+ncoeff_minus_one] = mat(x_high, idx_rows, idx_cols, nrows, ncols); else y_dummy_high_high[i+ncoeff_minus_one] = mat(y_high, idx_rows, idx_cols + column_cursor, nrows, three_n_L); } /* perform filtering lowpass/highpass */ irdwt_convolution(x_dummy_low, current_cols, coeff_low, coeff_high, ncoeff, y_dummy_low_low, y_dummy_high_high); /* restore dummy variables in matrices */ idx_cols = -sample_f + n_c; for (i=0; i1) { n_rb = nrows/current_rows; /* # of row blocks per column */ for (idx_cols=0; idx_cols %} /**********************************************************************/ /* The following code originally appeared in * enthought/kiva/agg/src/numeric.i written by Eric Jones. It was * translated from C++ to C by John Hunter. Bill Spotz has modified * it to fix some minor bugs, upgrade from Numeric to numpy (all * versions), add some comments and functionality, and convert from * direct code insertion to SWIG fragments. */ %fragment("NumPy_Macros", "header") { /* Macros to extract array attributes. */ %#define is_array(a) ((a) && PyArray_Check((PyArrayObject *)a)) %#define array_type(a) (int)(PyArray_TYPE(a)) %#define array_numdims(a) (((PyArrayObject *)a)->nd) %#define array_dimensions(a) (((PyArrayObject *)a)->dimensions) %#define array_size(a,i) (((PyArrayObject *)a)->dimensions[i]) %#define array_data(a) (((PyArrayObject *)a)->data) %#define array_is_contiguous(a) (PyArray_ISCONTIGUOUS(a)) %#define array_is_native(a) (PyArray_ISNOTSWAPPED(a)) %#define array_is_fortran(a) (PyArray_ISFORTRAN(a)) } /**********************************************************************/ %fragment("NumPy_Utilities", "header") { /* Given a PyObject, return a string describing its type. */ const char* pytype_string(PyObject* py_obj) { if (py_obj == NULL ) return "C NULL value"; if (py_obj == Py_None ) return "Python None" ; if (PyCallable_Check(py_obj)) return "callable" ; if (PyString_Check( py_obj)) return "string" ; if (PyInt_Check( py_obj)) return "int" ; if (PyFloat_Check( py_obj)) return "float" ; if (PyDict_Check( py_obj)) return "dict" ; if (PyList_Check( py_obj)) return "list" ; if (PyTuple_Check( py_obj)) return "tuple" ; if (PyModule_Check( py_obj)) return "module" ; %#if PY_MAJOR_VERSION < 3 if (PyFile_Check( py_obj)) return "file" ; if (PyInstance_Check(py_obj)) return "instance" ; %#endif return "unkown type"; } /* Given a NumPy typecode, return a string describing the type. */ const char* typecode_string(int typecode) { static const char* type_names[25] = {"bool", "byte", "unsigned byte", "short", "unsigned short", "int", "unsigned int", "long", "unsigned long", "long long", "unsigned long long", "float", "double", "long double", "complex float", "complex double", "complex long double", "object", "string", "unicode", "void", "ntypes", "notype", "char", "unknown"}; return typecode < 24 ? type_names[typecode] : type_names[24]; } /* Make sure input has correct numpy type. Allow character and byte * to match. Also allow int and long to match. This is deprecated. * You should use PyArray_EquivTypenums() instead. */ int type_match(int actual_type, int desired_type) { return PyArray_EquivTypenums(actual_type, desired_type); } } /**********************************************************************/ %fragment("NumPy_Object_to_Array", "header", fragment="NumPy_Backward_Compatibility", fragment="NumPy_Macros", fragment="NumPy_Utilities") { /* Given a PyObject pointer, cast it to a PyArrayObject pointer if * legal. If not, set the python error string appropriately and * return NULL. */ PyArrayObject* obj_to_array_no_conversion(PyObject* input, int typecode) { PyArrayObject* ary = NULL; if (is_array(input) && (typecode == NPY_NOTYPE || PyArray_EquivTypenums(array_type(input), typecode))) { ary = (PyArrayObject*) input; } else if is_array(input) { const char* desired_type = typecode_string(typecode); const char* actual_type = typecode_string(array_type(input)); PyErr_Format(PyExc_TypeError, "Array of type '%s' required. Array of type '%s' given", desired_type, actual_type); ary = NULL; } else { const char * desired_type = typecode_string(typecode); const char * actual_type = pytype_string(input); PyErr_Format(PyExc_TypeError, "Array of type '%s' required. A '%s' was given", desired_type, actual_type); ary = NULL; } return ary; } /* Convert the given PyObject to a NumPy array with the given * typecode. On success, return a valid PyArrayObject* with the * correct type. On failure, the python error string will be set and * the routine returns NULL. */ PyArrayObject* obj_to_array_allow_conversion(PyObject* input, int typecode, int* is_new_object) { PyArrayObject* ary = NULL; PyObject* py_obj; if (is_array(input) && (typecode == NPY_NOTYPE || PyArray_EquivTypenums(array_type(input),typecode))) { ary = (PyArrayObject*) input; *is_new_object = 0; } else { py_obj = PyArray_FROMANY(input, typecode, 0, 0, NPY_DEFAULT); /* If NULL, PyArray_FromObject will have set python error value.*/ ary = (PyArrayObject*) py_obj; *is_new_object = 1; } return ary; } /* Given a PyArrayObject, check to see if it is contiguous. If so, * return the input pointer and flag it as not a new object. If it is * not contiguous, create a new PyArrayObject using the original data, * flag it as a new object and return the pointer. */ PyArrayObject* make_contiguous(PyArrayObject* ary, int* is_new_object, int min_dims, int max_dims) { PyArrayObject* result; if (array_is_contiguous(ary)) { result = ary; *is_new_object = 0; } else { result = (PyArrayObject*) PyArray_ContiguousFromObject((PyObject*)ary, array_type(ary), min_dims, max_dims); *is_new_object = 1; } return result; } /* Given a PyArrayObject, check to see if it is Fortran-contiguous. * If so, return the input pointer, but do not flag it as not a new * object. If it is not Fortran-contiguous, create a new * PyArrayObject using the original data, flag it as a new object * and return the pointer. */ PyArrayObject* make_fortran(PyArrayObject* ary, int* is_new_object, int min_dims, int max_dims) { PyArrayObject* result; if (array_is_fortran(ary)) { result = ary; *is_new_object = 0; } else { Py_INCREF(ary->descr); result = (PyArrayObject*) PyArray_FromArray(ary, ary->descr, NPY_FORTRAN); *is_new_object = 1; } return result; } /* Convert a given PyObject to a contiguous PyArrayObject of the * specified type. If the input object is not a contiguous * PyArrayObject, a new one will be created and the new object flag * will be set. */ PyArrayObject* obj_to_array_contiguous_allow_conversion(PyObject* input, int typecode, int* is_new_object) { int is_new1 = 0; int is_new2 = 0; PyArrayObject* ary2; PyArrayObject* ary1 = obj_to_array_allow_conversion(input, typecode, &is_new1); if (ary1) { ary2 = make_contiguous(ary1, &is_new2, 0, 0); if ( is_new1 && is_new2) { Py_DECREF(ary1); } ary1 = ary2; } *is_new_object = is_new1 || is_new2; return ary1; } /* Convert a given PyObject to a Fortran-ordered PyArrayObject of the * specified type. If the input object is not a Fortran-ordered * PyArrayObject, a new one will be created and the new object flag * will be set. */ PyArrayObject* obj_to_array_fortran_allow_conversion(PyObject* input, int typecode, int* is_new_object) { int is_new1 = 0; int is_new2 = 0; PyArrayObject* ary2; PyArrayObject* ary1 = obj_to_array_allow_conversion(input, typecode, &is_new1); if (ary1) { ary2 = make_fortran(ary1, &is_new2, 0, 0); if (is_new1 && is_new2) { Py_DECREF(ary1); } ary1 = ary2; } *is_new_object = is_new1 || is_new2; return ary1; } } /* end fragment */ /**********************************************************************/ %fragment("NumPy_Array_Requirements", "header", fragment="NumPy_Backward_Compatibility", fragment="NumPy_Macros") { /* Test whether a python object is contiguous. If array is * contiguous, return 1. Otherwise, set the python error string and * return 0. */ int require_contiguous(PyArrayObject* ary) { int contiguous = 1; if (!array_is_contiguous(ary)) { PyErr_SetString(PyExc_TypeError, "Array must be contiguous. A non-contiguous array was given"); contiguous = 0; } return contiguous; } /* Require that a numpy array is not byte-swapped. If the array is * not byte-swapped, return 1. Otherwise, set the python error string * and return 0. */ int require_native(PyArrayObject* ary) { int native = 1; if (!array_is_native(ary)) { PyErr_SetString(PyExc_TypeError, "Array must have native byteorder. " "A byte-swapped array was given"); native = 0; } return native; } /* Require the given PyArrayObject to have a specified number of * dimensions. If the array has the specified number of dimensions, * return 1. Otherwise, set the python error string and return 0. */ int require_dimensions(PyArrayObject* ary, int exact_dimensions) { int success = 1; if (array_numdims(ary) != exact_dimensions) { PyErr_Format(PyExc_TypeError, "Array must have %d dimensions. Given array has %d dimensions", exact_dimensions, array_numdims(ary)); success = 0; } return success; } /* Require the given PyArrayObject to have one of a list of specified * number of dimensions. If the array has one of the specified number * of dimensions, return 1. Otherwise, set the python error string * and return 0. */ int require_dimensions_n(PyArrayObject* ary, int* exact_dimensions, int n) { int success = 0; int i; char dims_str[255] = ""; char s[255]; for (i = 0; i < n && !success; i++) { if (array_numdims(ary) == exact_dimensions[i]) { success = 1; } } if (!success) { for (i = 0; i < n-1; i++) { sprintf(s, "%d, ", exact_dimensions[i]); strcat(dims_str,s); } sprintf(s, " or %d", exact_dimensions[n-1]); strcat(dims_str,s); PyErr_Format(PyExc_TypeError, "Array must have %s dimensions. Given array has %d dimensions", dims_str, array_numdims(ary)); } return success; } /* Require the given PyArrayObject to have a specified shape. If the * array has the specified shape, return 1. Otherwise, set the python * error string and return 0. */ int require_size(PyArrayObject* ary, npy_intp* size, int n) { int i; int success = 1; int len; char desired_dims[255] = "["; char s[255]; char actual_dims[255] = "["; for(i=0; i < n;i++) { if (size[i] != -1 && size[i] != array_size(ary,i)) { success = 0; } } if (!success) { for (i = 0; i < n; i++) { if (size[i] == -1) { sprintf(s, "*,"); } else { sprintf(s, "%ld,", (long int)size[i]); } strcat(desired_dims,s); } len = strlen(desired_dims); desired_dims[len-1] = ']'; for (i = 0; i < n; i++) { sprintf(s, "%ld,", (long int)array_size(ary,i)); strcat(actual_dims,s); } len = strlen(actual_dims); actual_dims[len-1] = ']'; PyErr_Format(PyExc_TypeError, "Array must have shape of %s. Given array has shape of %s", desired_dims, actual_dims); } return success; } /* Require the given PyArrayObject to to be FORTRAN ordered. If the * the PyArrayObject is already FORTRAN ordered, do nothing. Else, * set the FORTRAN ordering flag and recompute the strides. */ int require_fortran(PyArrayObject* ary) { int success = 1; int nd = array_numdims(ary); int i; if (array_is_fortran(ary)) return success; /* Set the FORTRAN ordered flag */ ary->flags = NPY_FARRAY; /* Recompute the strides */ ary->strides[0] = ary->strides[nd-1]; for (i=1; i < nd; ++i) ary->strides[i] = ary->strides[i-1] * array_size(ary,i-1); return success; } } /* Combine all NumPy fragments into one for convenience */ %fragment("NumPy_Fragments", "header", fragment="NumPy_Backward_Compatibility", fragment="NumPy_Macros", fragment="NumPy_Utilities", fragment="NumPy_Object_to_Array", fragment="NumPy_Array_Requirements") { } /* End John Hunter translation (with modifications by Bill Spotz) */ /* %numpy_typemaps() macro * * This macro defines a family of 41 typemaps that allow C arguments * of the form * * (DATA_TYPE IN_ARRAY1[ANY]) * (DATA_TYPE* IN_ARRAY1, DIM_TYPE DIM1) * (DIM_TYPE DIM1, DATA_TYPE* IN_ARRAY1) * * (DATA_TYPE IN_ARRAY2[ANY][ANY]) * (DATA_TYPE* IN_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) * (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_ARRAY2) * (DATA_TYPE* IN_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) * (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_FARRAY2) * * (DATA_TYPE IN_ARRAY3[ANY][ANY][ANY]) * (DATA_TYPE* IN_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) * (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* IN_ARRAY3) * (DATA_TYPE* IN_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) * (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* IN_FARRAY3) * * (DATA_TYPE INPLACE_ARRAY1[ANY]) * (DATA_TYPE* INPLACE_ARRAY1, DIM_TYPE DIM1) * (DIM_TYPE DIM1, DATA_TYPE* INPLACE_ARRAY1) * * (DATA_TYPE INPLACE_ARRAY2[ANY][ANY]) * (DATA_TYPE* INPLACE_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) * (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* INPLACE_ARRAY2) * (DATA_TYPE* INPLACE_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) * (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* INPLACE_FARRAY2) * * (DATA_TYPE INPLACE_ARRAY3[ANY][ANY][ANY]) * (DATA_TYPE* INPLACE_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) * (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* INPLACE_ARRAY3) * (DATA_TYPE* INPLACE_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) * (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* INPLACE_FARRAY3) * * (DATA_TYPE ARGOUT_ARRAY1[ANY]) * (DATA_TYPE* ARGOUT_ARRAY1, DIM_TYPE DIM1) * (DIM_TYPE DIM1, DATA_TYPE* ARGOUT_ARRAY1) * * (DATA_TYPE ARGOUT_ARRAY2[ANY][ANY]) * * (DATA_TYPE ARGOUT_ARRAY3[ANY][ANY][ANY]) * * (DATA_TYPE** ARGOUTVIEW_ARRAY1, DIM_TYPE* DIM1) * (DIM_TYPE* DIM1, DATA_TYPE** ARGOUTVIEW_ARRAY1) * * (DATA_TYPE** ARGOUTVIEW_ARRAY2, DIM_TYPE* DIM1, DIM_TYPE* DIM2) * (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DATA_TYPE** ARGOUTVIEW_ARRAY2) * (DATA_TYPE** ARGOUTVIEW_FARRAY2, DIM_TYPE* DIM1, DIM_TYPE* DIM2) * (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DATA_TYPE** ARGOUTVIEW_FARRAY2) * * (DATA_TYPE** ARGOUTVIEW_ARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3) * (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DATA_TYPE** ARGOUTVIEW_ARRAY3) * (DATA_TYPE** ARGOUTVIEW_FARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3) * (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DATA_TYPE** ARGOUTVIEW_FARRAY3) * * where "DATA_TYPE" is any type supported by the NumPy module, and * "DIM_TYPE" is any int-like type suitable for specifying dimensions. * The difference between "ARRAY" typemaps and "FARRAY" typemaps is * that the "FARRAY" typemaps expect FORTRAN ordering of * multidimensional arrays. In python, the dimensions will not need * to be specified (except for the "DATA_TYPE* ARGOUT_ARRAY1" * typemaps). The IN_ARRAYs can be a numpy array or any sequence that * can be converted to a numpy array of the specified type. The * INPLACE_ARRAYs must be numpy arrays of the appropriate type. The * ARGOUT_ARRAYs will be returned as new numpy arrays of the * appropriate type. * * These typemaps can be applied to existing functions using the * %apply directive. For example: * * %apply (double* IN_ARRAY1, int DIM1) {(double* series, int length)}; * double prod(double* series, int length); * * %apply (int DIM1, int DIM2, double* INPLACE_ARRAY2) * {(int rows, int cols, double* matrix )}; * void floor(int rows, int cols, double* matrix, double f); * * %apply (double IN_ARRAY3[ANY][ANY][ANY]) * {(double tensor[2][2][2] )}; * %apply (double ARGOUT_ARRAY3[ANY][ANY][ANY]) * {(double low[2][2][2] )}; * %apply (double ARGOUT_ARRAY3[ANY][ANY][ANY]) * {(double upp[2][2][2] )}; * void luSplit(double tensor[2][2][2], * double low[2][2][2], * double upp[2][2][2] ); * * or directly with * * double prod(double* IN_ARRAY1, int DIM1); * * void floor(int DIM1, int DIM2, double* INPLACE_ARRAY2, double f); * * void luSplit(double IN_ARRAY3[ANY][ANY][ANY], * double ARGOUT_ARRAY3[ANY][ANY][ANY], * double ARGOUT_ARRAY3[ANY][ANY][ANY]); */ %define %numpy_typemaps(DATA_TYPE, DATA_TYPECODE, DIM_TYPE) /************************/ /* Input Array Typemaps */ /************************/ /* Typemap suite for (DATA_TYPE IN_ARRAY1[ANY]) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DATA_TYPE IN_ARRAY1[ANY]) { $1 = is_array($input) || PySequence_Check($input); } %typemap(in, fragment="NumPy_Fragments") (DATA_TYPE IN_ARRAY1[ANY]) (PyArrayObject* array=NULL, int is_new_object=0) { npy_intp size[1] = { $1_dim0 }; array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE, &is_new_object); if (!array || !require_dimensions(array, 1) || !require_size(array, size, 1)) SWIG_fail; $1 = ($1_ltype) array_data(array); } %typemap(freearg) (DATA_TYPE IN_ARRAY1[ANY]) { if (is_new_object$argnum && array$argnum) { Py_DECREF(array$argnum); } } /* Typemap suite for (DATA_TYPE* IN_ARRAY1, DIM_TYPE DIM1) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DATA_TYPE* IN_ARRAY1, DIM_TYPE DIM1) { $1 = is_array($input) || PySequence_Check($input); } %typemap(in, fragment="NumPy_Fragments") (DATA_TYPE* IN_ARRAY1, DIM_TYPE DIM1) (PyArrayObject* array=NULL, int is_new_object=0) { npy_intp size[1] = { -1 }; array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE, &is_new_object); if (!array || !require_dimensions(array, 1) || !require_size(array, size, 1)) SWIG_fail; $1 = (DATA_TYPE*) array_data(array); $2 = (DIM_TYPE) array_size(array,0); } %typemap(freearg) (DATA_TYPE* IN_ARRAY1, DIM_TYPE DIM1) { if (is_new_object$argnum && array$argnum) { Py_DECREF(array$argnum); } } /* Typemap suite for (DIM_TYPE DIM1, DATA_TYPE* IN_ARRAY1) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DIM_TYPE DIM1, DATA_TYPE* IN_ARRAY1) { $1 = is_array($input) || PySequence_Check($input); } %typemap(in, fragment="NumPy_Fragments") (DIM_TYPE DIM1, DATA_TYPE* IN_ARRAY1) (PyArrayObject* array=NULL, int is_new_object=0) { npy_intp size[1] = {-1}; array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE, &is_new_object); if (!array || !require_dimensions(array, 1) || !require_size(array, size, 1)) SWIG_fail; $1 = (DIM_TYPE) array_size(array,0); $2 = (DATA_TYPE*) array_data(array); } %typemap(freearg) (DIM_TYPE DIM1, DATA_TYPE* IN_ARRAY1) { if (is_new_object$argnum && array$argnum) { Py_DECREF(array$argnum); } } /* Typemap suite for (DATA_TYPE IN_ARRAY2[ANY][ANY]) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DATA_TYPE IN_ARRAY2[ANY][ANY]) { $1 = is_array($input) || PySequence_Check($input); } %typemap(in, fragment="NumPy_Fragments") (DATA_TYPE IN_ARRAY2[ANY][ANY]) (PyArrayObject* array=NULL, int is_new_object=0) { npy_intp size[2] = { $1_dim0, $1_dim1 }; array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE, &is_new_object); if (!array || !require_dimensions(array, 2) || !require_size(array, size, 2)) SWIG_fail; $1 = ($1_ltype) array_data(array); } %typemap(freearg) (DATA_TYPE IN_ARRAY2[ANY][ANY]) { if (is_new_object$argnum && array$argnum) { Py_DECREF(array$argnum); } } /* Typemap suite for (DATA_TYPE* IN_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DATA_TYPE* IN_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) { $1 = is_array($input) || PySequence_Check($input); } %typemap(in, fragment="NumPy_Fragments") (DATA_TYPE* IN_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) (PyArrayObject* array=NULL, int is_new_object=0) { npy_intp size[2] = { -1, -1 }; array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE, &is_new_object); if (!array || !require_dimensions(array, 2) || !require_size(array, size, 2)) SWIG_fail; $1 = (DATA_TYPE*) array_data(array); $2 = (DIM_TYPE) array_size(array,0); $3 = (DIM_TYPE) array_size(array,1); } %typemap(freearg) (DATA_TYPE* IN_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) { if (is_new_object$argnum && array$argnum) { Py_DECREF(array$argnum); } } /* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_ARRAY2) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_ARRAY2) { $1 = is_array($input) || PySequence_Check($input); } %typemap(in, fragment="NumPy_Fragments") (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_ARRAY2) (PyArrayObject* array=NULL, int is_new_object=0) { npy_intp size[2] = { -1, -1 }; array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE, &is_new_object); if (!array || !require_dimensions(array, 2) || !require_size(array, size, 2)) SWIG_fail; $1 = (DIM_TYPE) array_size(array,0); $2 = (DIM_TYPE) array_size(array,1); $3 = (DATA_TYPE*) array_data(array); } %typemap(freearg) (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_ARRAY2) { if (is_new_object$argnum && array$argnum) { Py_DECREF(array$argnum); } } /* Typemap suite for (DATA_TYPE* IN_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DATA_TYPE* IN_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) { $1 = is_array($input) || PySequence_Check($input); } %typemap(in, fragment="NumPy_Fragments") (DATA_TYPE* IN_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) (PyArrayObject* array=NULL, int is_new_object=0) { npy_intp size[2] = { -1, -1 }; array = obj_to_array_fortran_allow_conversion($input, DATA_TYPECODE, &is_new_object); if (!array || !require_dimensions(array, 2) || !require_size(array, size, 2) || !require_fortran(array)) SWIG_fail; $1 = (DATA_TYPE*) array_data(array); $2 = (DIM_TYPE) array_size(array,0); $3 = (DIM_TYPE) array_size(array,1); } %typemap(freearg) (DATA_TYPE* IN_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) { if (is_new_object$argnum && array$argnum) { Py_DECREF(array$argnum); } } /* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_FARRAY2) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_FARRAY2) { $1 = is_array($input) || PySequence_Check($input); } %typemap(in, fragment="NumPy_Fragments") (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_FARRAY2) (PyArrayObject* array=NULL, int is_new_object=0) { npy_intp size[2] = { -1, -1 }; array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE, &is_new_object); if (!array || !require_dimensions(array, 2) || !require_size(array, size, 2) || !require_fortran(array)) SWIG_fail; $1 = (DIM_TYPE) array_size(array,0); $2 = (DIM_TYPE) array_size(array,1); $3 = (DATA_TYPE*) array_data(array); } %typemap(freearg) (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_FARRAY2) { if (is_new_object$argnum && array$argnum) { Py_DECREF(array$argnum); } } /* Typemap suite for (DATA_TYPE IN_ARRAY3[ANY][ANY][ANY]) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DATA_TYPE IN_ARRAY3[ANY][ANY][ANY]) { $1 = is_array($input) || PySequence_Check($input); } %typemap(in, fragment="NumPy_Fragments") (DATA_TYPE IN_ARRAY3[ANY][ANY][ANY]) (PyArrayObject* array=NULL, int is_new_object=0) { npy_intp size[3] = { $1_dim0, $1_dim1, $1_dim2 }; array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE, &is_new_object); if (!array || !require_dimensions(array, 3) || !require_size(array, size, 3)) SWIG_fail; $1 = ($1_ltype) array_data(array); } %typemap(freearg) (DATA_TYPE IN_ARRAY3[ANY][ANY][ANY]) { if (is_new_object$argnum && array$argnum) { Py_DECREF(array$argnum); } } /* Typemap suite for (DATA_TYPE* IN_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, * DIM_TYPE DIM3) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DATA_TYPE* IN_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) { $1 = is_array($input) || PySequence_Check($input); } %typemap(in, fragment="NumPy_Fragments") (DATA_TYPE* IN_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) (PyArrayObject* array=NULL, int is_new_object=0) { npy_intp size[3] = { -1, -1, -1 }; array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE, &is_new_object); if (!array || !require_dimensions(array, 3) || !require_size(array, size, 3)) SWIG_fail; $1 = (DATA_TYPE*) array_data(array); $2 = (DIM_TYPE) array_size(array,0); $3 = (DIM_TYPE) array_size(array,1); $4 = (DIM_TYPE) array_size(array,2); } %typemap(freearg) (DATA_TYPE* IN_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) { if (is_new_object$argnum && array$argnum) { Py_DECREF(array$argnum); } } /* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, * DATA_TYPE* IN_ARRAY3) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* IN_ARRAY3) { $1 = is_array($input) || PySequence_Check($input); } %typemap(in, fragment="NumPy_Fragments") (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* IN_ARRAY3) (PyArrayObject* array=NULL, int is_new_object=0) { npy_intp size[3] = { -1, -1, -1 }; array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE, &is_new_object); if (!array || !require_dimensions(array, 3) || !require_size(array, size, 3)) SWIG_fail; $1 = (DIM_TYPE) array_size(array,0); $2 = (DIM_TYPE) array_size(array,1); $3 = (DIM_TYPE) array_size(array,2); $4 = (DATA_TYPE*) array_data(array); } %typemap(freearg) (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* IN_ARRAY3) { if (is_new_object$argnum && array$argnum) { Py_DECREF(array$argnum); } } /* Typemap suite for (DATA_TYPE* IN_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, * DIM_TYPE DIM3) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DATA_TYPE* IN_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) { $1 = is_array($input) || PySequence_Check($input); } %typemap(in, fragment="NumPy_Fragments") (DATA_TYPE* IN_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) (PyArrayObject* array=NULL, int is_new_object=0) { npy_intp size[3] = { -1, -1, -1 }; array = obj_to_array_fortran_allow_conversion($input, DATA_TYPECODE, &is_new_object); if (!array || !require_dimensions(array, 3) || !require_size(array, size, 3) | !require_fortran(array)) SWIG_fail; $1 = (DATA_TYPE*) array_data(array); $2 = (DIM_TYPE) array_size(array,0); $3 = (DIM_TYPE) array_size(array,1); $4 = (DIM_TYPE) array_size(array,2); } %typemap(freearg) (DATA_TYPE* IN_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) { if (is_new_object$argnum && array$argnum) { Py_DECREF(array$argnum); } } /* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, * DATA_TYPE* IN_FARRAY3) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* IN_FARRAY3) { $1 = is_array($input) || PySequence_Check($input); } %typemap(in, fragment="NumPy_Fragments") (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* IN_FARRAY3) (PyArrayObject* array=NULL, int is_new_object=0) { npy_intp size[3] = { -1, -1, -1 }; array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE, &is_new_object); if (!array || !require_dimensions(array, 3) || !require_size(array, size, 3) || !require_fortran(array)) SWIG_fail; $1 = (DIM_TYPE) array_size(array,0); $2 = (DIM_TYPE) array_size(array,1); $3 = (DIM_TYPE) array_size(array,2); $4 = (DATA_TYPE*) array_data(array); } %typemap(freearg) (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* IN_FARRAY3) { if (is_new_object$argnum && array$argnum) { Py_DECREF(array$argnum); } } /***************************/ /* In-Place Array Typemaps */ /***************************/ /* Typemap suite for (DATA_TYPE INPLACE_ARRAY1[ANY]) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DATA_TYPE INPLACE_ARRAY1[ANY]) { $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), DATA_TYPECODE); } %typemap(in, fragment="NumPy_Fragments") (DATA_TYPE INPLACE_ARRAY1[ANY]) (PyArrayObject* array=NULL) { npy_intp size[1] = { $1_dim0 }; array = obj_to_array_no_conversion($input, DATA_TYPECODE); if (!array || !require_dimensions(array,1) || !require_size(array, size, 1) || !require_contiguous(array) || !require_native(array)) SWIG_fail; $1 = ($1_ltype) array_data(array); } /* Typemap suite for (DATA_TYPE* INPLACE_ARRAY1, DIM_TYPE DIM1) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DATA_TYPE* INPLACE_ARRAY1, DIM_TYPE DIM1) { $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), DATA_TYPECODE); } %typemap(in, fragment="NumPy_Fragments") (DATA_TYPE* INPLACE_ARRAY1, DIM_TYPE DIM1) (PyArrayObject* array=NULL, int i=1) { array = obj_to_array_no_conversion($input, DATA_TYPECODE); if (!array || !require_dimensions(array,1) || !require_contiguous(array) || !require_native(array)) SWIG_fail; $1 = (DATA_TYPE*) array_data(array); $2 = 1; for (i=0; i < array_numdims(array); ++i) $2 *= array_size(array,i); } /* Typemap suite for (DIM_TYPE DIM1, DATA_TYPE* INPLACE_ARRAY1) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DIM_TYPE DIM1, DATA_TYPE* INPLACE_ARRAY1) { $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), DATA_TYPECODE); } %typemap(in, fragment="NumPy_Fragments") (DIM_TYPE DIM1, DATA_TYPE* INPLACE_ARRAY1) (PyArrayObject* array=NULL, int i=0) { array = obj_to_array_no_conversion($input, DATA_TYPECODE); if (!array || !require_dimensions(array,1) || !require_contiguous(array) || !require_native(array)) SWIG_fail; $1 = 1; for (i=0; i < array_numdims(array); ++i) $1 *= array_size(array,i); $2 = (DATA_TYPE*) array_data(array); } /* Typemap suite for (DATA_TYPE INPLACE_ARRAY2[ANY][ANY]) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DATA_TYPE INPLACE_ARRAY2[ANY][ANY]) { $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), DATA_TYPECODE); } %typemap(in, fragment="NumPy_Fragments") (DATA_TYPE INPLACE_ARRAY2[ANY][ANY]) (PyArrayObject* array=NULL) { npy_intp size[2] = { $1_dim0, $1_dim1 }; array = obj_to_array_no_conversion($input, DATA_TYPECODE); if (!array || !require_dimensions(array,2) || !require_size(array, size, 2) || !require_contiguous(array) || !require_native(array)) SWIG_fail; $1 = ($1_ltype) array_data(array); } /* Typemap suite for (DATA_TYPE* INPLACE_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DATA_TYPE* INPLACE_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) { $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), DATA_TYPECODE); } %typemap(in, fragment="NumPy_Fragments") (DATA_TYPE* INPLACE_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) (PyArrayObject* array=NULL) { array = obj_to_array_no_conversion($input, DATA_TYPECODE); if (!array || !require_dimensions(array,2) || !require_contiguous(array) || !require_native(array)) SWIG_fail; $1 = (DATA_TYPE*) array_data(array); $2 = (DIM_TYPE) array_size(array,0); $3 = (DIM_TYPE) array_size(array,1); } /* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* INPLACE_ARRAY2) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* INPLACE_ARRAY2) { $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), DATA_TYPECODE); } %typemap(in, fragment="NumPy_Fragments") (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* INPLACE_ARRAY2) (PyArrayObject* array=NULL) { array = obj_to_array_no_conversion($input, DATA_TYPECODE); if (!array || !require_dimensions(array,2) || !require_contiguous(array) || !require_native(array)) SWIG_fail; $1 = (DIM_TYPE) array_size(array,0); $2 = (DIM_TYPE) array_size(array,1); $3 = (DATA_TYPE*) array_data(array); } /* Typemap suite for (DATA_TYPE* INPLACE_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DATA_TYPE* INPLACE_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) { $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), DATA_TYPECODE); } %typemap(in, fragment="NumPy_Fragments") (DATA_TYPE* INPLACE_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) (PyArrayObject* array=NULL) { array = obj_to_array_no_conversion($input, DATA_TYPECODE); if (!array || !require_dimensions(array,2) || !require_contiguous(array) || !require_native(array) || !require_fortran(array)) SWIG_fail; $1 = (DATA_TYPE*) array_data(array); $2 = (DIM_TYPE) array_size(array,0); $3 = (DIM_TYPE) array_size(array,1); } /* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* INPLACE_FARRAY2) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* INPLACE_FARRAY2) { $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), DATA_TYPECODE); } %typemap(in, fragment="NumPy_Fragments") (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* INPLACE_FARRAY2) (PyArrayObject* array=NULL) { array = obj_to_array_no_conversion($input, DATA_TYPECODE); if (!array || !require_dimensions(array,2) || !require_contiguous(array) || !require_native(array) || !require_fortran(array)) SWIG_fail; $1 = (DIM_TYPE) array_size(array,0); $2 = (DIM_TYPE) array_size(array,1); $3 = (DATA_TYPE*) array_data(array); } /* Typemap suite for (DATA_TYPE INPLACE_ARRAY3[ANY][ANY][ANY]) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DATA_TYPE INPLACE_ARRAY3[ANY][ANY][ANY]) { $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), DATA_TYPECODE); } %typemap(in, fragment="NumPy_Fragments") (DATA_TYPE INPLACE_ARRAY3[ANY][ANY][ANY]) (PyArrayObject* array=NULL) { npy_intp size[3] = { $1_dim0, $1_dim1, $1_dim2 }; array = obj_to_array_no_conversion($input, DATA_TYPECODE); if (!array || !require_dimensions(array,3) || !require_size(array, size, 3) || !require_contiguous(array) || !require_native(array)) SWIG_fail; $1 = ($1_ltype) array_data(array); } /* Typemap suite for (DATA_TYPE* INPLACE_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, * DIM_TYPE DIM3) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DATA_TYPE* INPLACE_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) { $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), DATA_TYPECODE); } %typemap(in, fragment="NumPy_Fragments") (DATA_TYPE* INPLACE_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) (PyArrayObject* array=NULL) { array = obj_to_array_no_conversion($input, DATA_TYPECODE); if (!array || !require_dimensions(array,3) || !require_contiguous(array) || !require_native(array)) SWIG_fail; $1 = (DATA_TYPE*) array_data(array); $2 = (DIM_TYPE) array_size(array,0); $3 = (DIM_TYPE) array_size(array,1); $4 = (DIM_TYPE) array_size(array,2); } /* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, * DATA_TYPE* INPLACE_ARRAY3) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* INPLACE_ARRAY3) { $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), DATA_TYPECODE); } %typemap(in, fragment="NumPy_Fragments") (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* INPLACE_ARRAY3) (PyArrayObject* array=NULL) { array = obj_to_array_no_conversion($input, DATA_TYPECODE); if (!array || !require_dimensions(array,3) || !require_contiguous(array) || !require_native(array)) SWIG_fail; $1 = (DIM_TYPE) array_size(array,0); $2 = (DIM_TYPE) array_size(array,1); $3 = (DIM_TYPE) array_size(array,2); $4 = (DATA_TYPE*) array_data(array); } /* Typemap suite for (DATA_TYPE* INPLACE_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, * DIM_TYPE DIM3) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DATA_TYPE* INPLACE_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) { $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), DATA_TYPECODE); } %typemap(in, fragment="NumPy_Fragments") (DATA_TYPE* INPLACE_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) (PyArrayObject* array=NULL) { array = obj_to_array_no_conversion($input, DATA_TYPECODE); if (!array || !require_dimensions(array,3) || !require_contiguous(array) || !require_native(array) || !require_fortran(array)) SWIG_fail; $1 = (DATA_TYPE*) array_data(array); $2 = (DIM_TYPE) array_size(array,0); $3 = (DIM_TYPE) array_size(array,1); $4 = (DIM_TYPE) array_size(array,2); } /* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, * DATA_TYPE* INPLACE_FARRAY3) */ %typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, fragment="NumPy_Macros") (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* INPLACE_FARRAY3) { $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), DATA_TYPECODE); } %typemap(in, fragment="NumPy_Fragments") (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* INPLACE_FARRAY3) (PyArrayObject* array=NULL) { array = obj_to_array_no_conversion($input, DATA_TYPECODE); if (!array || !require_dimensions(array,3) || !require_contiguous(array) || !require_native(array) || !require_fortran(array)) SWIG_fail; $1 = (DIM_TYPE) array_size(array,0); $2 = (DIM_TYPE) array_size(array,1); $3 = (DIM_TYPE) array_size(array,2); $4 = (DATA_TYPE*) array_data(array); } /*************************/ /* Argout Array Typemaps */ /*************************/ /* Typemap suite for (DATA_TYPE ARGOUT_ARRAY1[ANY]) */ %typemap(in,numinputs=0, fragment="NumPy_Backward_Compatibility,NumPy_Macros") (DATA_TYPE ARGOUT_ARRAY1[ANY]) (PyObject * array = NULL) { npy_intp dims[1] = { $1_dim0 }; array = PyArray_SimpleNew(1, dims, DATA_TYPECODE); if (!array) SWIG_fail; $1 = ($1_ltype) array_data(array); } %typemap(argout) (DATA_TYPE ARGOUT_ARRAY1[ANY]) { $result = SWIG_Python_AppendOutput($result,array$argnum); } /* Typemap suite for (DATA_TYPE* ARGOUT_ARRAY1, DIM_TYPE DIM1) */ %typemap(in,numinputs=1, fragment="NumPy_Fragments") (DATA_TYPE* ARGOUT_ARRAY1, DIM_TYPE DIM1) (PyObject * array = NULL) { npy_intp dims[1]; if (!PyInt_Check($input)) { const char* typestring = pytype_string($input); PyErr_Format(PyExc_TypeError, "Int dimension expected. '%s' given.", typestring); SWIG_fail; } $2 = (DIM_TYPE) PyInt_AsLong($input); dims[0] = (npy_intp) $2; array = PyArray_SimpleNew(1, dims, DATA_TYPECODE); if (!array) SWIG_fail; $1 = (DATA_TYPE*) array_data(array); } %typemap(argout) (DATA_TYPE* ARGOUT_ARRAY1, DIM_TYPE DIM1) { $result = SWIG_Python_AppendOutput($result,array$argnum); } /* Typemap suite for (DIM_TYPE DIM1, DATA_TYPE* ARGOUT_ARRAY1) */ %typemap(in,numinputs=1, fragment="NumPy_Fragments") (DIM_TYPE DIM1, DATA_TYPE* ARGOUT_ARRAY1) (PyObject * array = NULL) { npy_intp dims[1]; if (!PyInt_Check($input)) { const char* typestring = pytype_string($input); PyErr_Format(PyExc_TypeError, "Int dimension expected. '%s' given.", typestring); SWIG_fail; } $1 = (DIM_TYPE) PyInt_AsLong($input); dims[0] = (npy_intp) $1; array = PyArray_SimpleNew(1, dims, DATA_TYPECODE); if (!array) SWIG_fail; $2 = (DATA_TYPE*) array_data(array); } %typemap(argout) (DIM_TYPE DIM1, DATA_TYPE* ARGOUT_ARRAY1) { $result = SWIG_Python_AppendOutput($result,array$argnum); } /* Typemap suite for (DATA_TYPE ARGOUT_ARRAY2[ANY][ANY]) */ %typemap(in,numinputs=0, fragment="NumPy_Backward_Compatibility,NumPy_Macros") (DATA_TYPE ARGOUT_ARRAY2[ANY][ANY]) (PyObject * array = NULL) { npy_intp dims[2] = { $1_dim0, $1_dim1 }; array = PyArray_SimpleNew(2, dims, DATA_TYPECODE); if (!array) SWIG_fail; $1 = ($1_ltype) array_data(array); } %typemap(argout) (DATA_TYPE ARGOUT_ARRAY2[ANY][ANY]) { $result = SWIG_Python_AppendOutput($result,array$argnum); } /* Typemap suite for (DATA_TYPE ARGOUT_ARRAY3[ANY][ANY][ANY]) */ %typemap(in,numinputs=0, fragment="NumPy_Backward_Compatibility,NumPy_Macros") (DATA_TYPE ARGOUT_ARRAY3[ANY][ANY][ANY]) (PyObject * array = NULL) { npy_intp dims[3] = { $1_dim0, $1_dim1, $1_dim2 }; array = PyArray_SimpleNew(3, dims, DATA_TYPECODE); if (!array) SWIG_fail; $1 = ($1_ltype) array_data(array); } %typemap(argout) (DATA_TYPE ARGOUT_ARRAY3[ANY][ANY][ANY]) { $result = SWIG_Python_AppendOutput($result,array$argnum); } /*****************************/ /* Argoutview Array Typemaps */ /*****************************/ /* Typemap suite for (DATA_TYPE** ARGOUTVIEW_ARRAY1, DIM_TYPE* DIM1) */ %typemap(in,numinputs=0) (DATA_TYPE** ARGOUTVIEW_ARRAY1, DIM_TYPE* DIM1 ) (DATA_TYPE* data_temp , DIM_TYPE dim_temp) { $1 = &data_temp; $2 = &dim_temp; } %typemap(argout, fragment="NumPy_Backward_Compatibility") (DATA_TYPE** ARGOUTVIEW_ARRAY1, DIM_TYPE* DIM1) { npy_intp dims[1] = { *$2 }; PyObject * array = PyArray_SimpleNewFromData(1, dims, DATA_TYPECODE, (void*)(*$1)); if (!array) SWIG_fail; $result = SWIG_Python_AppendOutput($result,array); } /* Typemap suite for (DIM_TYPE* DIM1, DATA_TYPE** ARGOUTVIEW_ARRAY1) */ %typemap(in,numinputs=0) (DIM_TYPE* DIM1 , DATA_TYPE** ARGOUTVIEW_ARRAY1) (DIM_TYPE dim_temp, DATA_TYPE* data_temp ) { $1 = &dim_temp; $2 = &data_temp; } %typemap(argout, fragment="NumPy_Backward_Compatibility") (DIM_TYPE* DIM1, DATA_TYPE** ARGOUTVIEW_ARRAY1) { npy_intp dims[1] = { *$1 }; PyObject * array = PyArray_SimpleNewFromData(1, dims, DATA_TYPECODE, (void*)(*$2)); if (!array) SWIG_fail; $result = SWIG_Python_AppendOutput($result,array); } /* Typemap suite for (DATA_TYPE** ARGOUTVIEW_ARRAY2, DIM_TYPE* DIM1, DIM_TYPE* DIM2) */ %typemap(in,numinputs=0) (DATA_TYPE** ARGOUTVIEW_ARRAY2, DIM_TYPE* DIM1 , DIM_TYPE* DIM2 ) (DATA_TYPE* data_temp , DIM_TYPE dim1_temp, DIM_TYPE dim2_temp) { $1 = &data_temp; $2 = &dim1_temp; $3 = &dim2_temp; } %typemap(argout, fragment="NumPy_Backward_Compatibility") (DATA_TYPE** ARGOUTVIEW_ARRAY2, DIM_TYPE* DIM1, DIM_TYPE* DIM2) { npy_intp dims[2] = { *$2, *$3 }; PyObject * array = PyArray_SimpleNewFromData(2, dims, DATA_TYPECODE, (void*)(*$1)); if (!array) SWIG_fail; $result = SWIG_Python_AppendOutput($result,array); } /* Typemap suite for (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DATA_TYPE** ARGOUTVIEW_ARRAY2) */ %typemap(in,numinputs=0) (DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DATA_TYPE** ARGOUTVIEW_ARRAY2) (DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DATA_TYPE* data_temp ) { $1 = &dim1_temp; $2 = &dim2_temp; $3 = &data_temp; } %typemap(argout, fragment="NumPy_Backward_Compatibility") (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DATA_TYPE** ARGOUTVIEW_ARRAY2) { npy_intp dims[2] = { *$1, *$2 }; PyObject * array = PyArray_SimpleNewFromData(2, dims, DATA_TYPECODE, (void*)(*$3)); if (!array) SWIG_fail; $result = SWIG_Python_AppendOutput($result,array); } /* Typemap suite for (DATA_TYPE** ARGOUTVIEW_FARRAY2, DIM_TYPE* DIM1, DIM_TYPE* DIM2) */ %typemap(in,numinputs=0) (DATA_TYPE** ARGOUTVIEW_FARRAY2, DIM_TYPE* DIM1 , DIM_TYPE* DIM2 ) (DATA_TYPE* data_temp , DIM_TYPE dim1_temp, DIM_TYPE dim2_temp) { $1 = &data_temp; $2 = &dim1_temp; $3 = &dim2_temp; } %typemap(argout, fragment="NumPy_Backward_Compatibility,NumPy_Array_Requirements") (DATA_TYPE** ARGOUTVIEW_FARRAY2, DIM_TYPE* DIM1, DIM_TYPE* DIM2) { npy_intp dims[2] = { *$2, *$3 }; PyObject * obj = PyArray_SimpleNewFromData(2, dims, DATA_TYPECODE, (void*)(*$1)); PyArrayObject * array = (PyArrayObject*) obj; if (!array || !require_fortran(array)) SWIG_fail; $result = SWIG_Python_AppendOutput($result,obj); } /* Typemap suite for (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DATA_TYPE** ARGOUTVIEW_FARRAY2) */ %typemap(in,numinputs=0) (DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DATA_TYPE** ARGOUTVIEW_FARRAY2) (DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DATA_TYPE* data_temp ) { $1 = &dim1_temp; $2 = &dim2_temp; $3 = &data_temp; } %typemap(argout, fragment="NumPy_Backward_Compatibility,NumPy_Array_Requirements") (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DATA_TYPE** ARGOUTVIEW_FARRAY2) { npy_intp dims[2] = { *$1, *$2 }; PyObject * obj = PyArray_SimpleNewFromData(2, dims, DATA_TYPECODE, (void*)(*$3)); PyArrayObject * array = (PyArrayObject*) obj; if (!array || !require_fortran(array)) SWIG_fail; $result = SWIG_Python_AppendOutput($result,obj); } /* Typemap suite for (DATA_TYPE** ARGOUTVIEW_ARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3) */ %typemap(in,numinputs=0) (DATA_TYPE** ARGOUTVIEW_ARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3) (DATA_TYPE* data_temp, DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp) { $1 = &data_temp; $2 = &dim1_temp; $3 = &dim2_temp; $4 = &dim3_temp; } %typemap(argout, fragment="NumPy_Backward_Compatibility") (DATA_TYPE** ARGOUTVIEW_ARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3) { npy_intp dims[3] = { *$2, *$3, *$4 }; PyObject * array = PyArray_SimpleNewFromData(3, dims, DATA_TYPECODE, (void*)(*$1)); if (!array) SWIG_fail; $result = SWIG_Python_AppendOutput($result,array); } /* Typemap suite for (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DATA_TYPE** ARGOUTVIEW_ARRAY3) */ %typemap(in,numinputs=0) (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DATA_TYPE** ARGOUTVIEW_ARRAY3) (DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp, DATA_TYPE* data_temp) { $1 = &dim1_temp; $2 = &dim2_temp; $3 = &dim3_temp; $4 = &data_temp; } %typemap(argout, fragment="NumPy_Backward_Compatibility") (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DATA_TYPE** ARGOUTVIEW_ARRAY3) { npy_intp dims[3] = { *$1, *$2, *$3 }; PyObject * array = PyArray_SimpleNewFromData(3, dims, DATA_TYPECODE, (void*)(*$3)); if (!array) SWIG_fail; $result = SWIG_Python_AppendOutput($result,array); } /* Typemap suite for (DATA_TYPE** ARGOUTVIEW_FARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3) */ %typemap(in,numinputs=0) (DATA_TYPE** ARGOUTVIEW_FARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3) (DATA_TYPE* data_temp, DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp) { $1 = &data_temp; $2 = &dim1_temp; $3 = &dim2_temp; $4 = &dim3_temp; } %typemap(argout, fragment="NumPy_Backward_Compatibility,NumPy_Array_Requirements") (DATA_TYPE** ARGOUTVIEW_FARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3) { npy_intp dims[3] = { *$2, *$3, *$4 }; PyObject * obj = PyArray_SimpleNewFromData(3, dims, DATA_TYPECODE, (void*)(*$1)); PyArrayObject * array = (PyArrayObject*) obj; if (!array || require_fortran(array)) SWIG_fail; $result = SWIG_Python_AppendOutput($result,obj); } /* Typemap suite for (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DATA_TYPE** ARGOUTVIEW_FARRAY3) */ %typemap(in,numinputs=0) (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DATA_TYPE** ARGOUTVIEW_FARRAY3) (DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp, DATA_TYPE* data_temp) { $1 = &dim1_temp; $2 = &dim2_temp; $3 = &dim3_temp; $4 = &data_temp; } %typemap(argout, fragment="NumPy_Backward_Compatibility,NumPy_Array_Requirements") (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DATA_TYPE** ARGOUTVIEW_FARRAY3) { npy_intp dims[3] = { *$1, *$2, *$3 }; PyObject * obj = PyArray_SimpleNewFromData(3, dims, DATA_TYPECODE, (void*)(*$3)); PyArrayObject * array = (PyArrayObject*) obj; if (!array || require_fortran(array)) SWIG_fail; $result = SWIG_Python_AppendOutput($result,obj); } %enddef /* %numpy_typemaps() macro */ /* *************************************************************** */ /* Concrete instances of the %numpy_typemaps() macro: Each invocation * below applies all of the typemaps above to the specified data type. */ %numpy_typemaps(signed char , NPY_BYTE , int) %numpy_typemaps(unsigned char , NPY_UBYTE , int) %numpy_typemaps(short , NPY_SHORT , int) %numpy_typemaps(unsigned short , NPY_USHORT , int) %numpy_typemaps(int , NPY_INT , int) %numpy_typemaps(unsigned int , NPY_UINT , int) %numpy_typemaps(long , NPY_LONG , int) %numpy_typemaps(unsigned long , NPY_ULONG , int) %numpy_typemaps(long long , NPY_LONGLONG , int) %numpy_typemaps(unsigned long long, NPY_ULONGLONG, int) %numpy_typemaps(float , NPY_FLOAT , int) %numpy_typemaps(double , NPY_DOUBLE , int) /* *************************************************************** * The follow macro expansion does not work, because C++ bool is 4 * bytes and NPY_BOOL is 1 byte * * %numpy_typemaps(bool, NPY_BOOL, int) */ /* *************************************************************** * On my Mac, I get the following warning for this macro expansion: * 'swig/python detected a memory leak of type 'long double *', no destructor found.' * * %numpy_typemaps(long double, NPY_LONGDOUBLE, int) */ /* *************************************************************** * Swig complains about a syntax error for the following macro * expansions: * * %numpy_typemaps(complex float, NPY_CFLOAT , int) * * %numpy_typemaps(complex double, NPY_CDOUBLE, int) * * %numpy_typemaps(complex long double, NPY_CLONGDOUBLE, int) */ #endif /* SWIGPYTHON */ ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/python/rwt.i ================================================ %pythonbegin %{ from __future__ import division %} %define MODDOCSTRING "The Rice Wavelet Toolbox (RWT) is a collection of functions for 1D and 2D wavelet and filter bank design, analysis, and processing." %enddef %module(docstring=MODDOCSTRING) rwt /* The C functions for the transforms are not suitable for direct use from python so let's rename them. */ %rename(_c_dwt) dwt; %rename(_c_idwt) idwt; %rename(_c_rdwt) rdwt; %rename(_c_irdwt) irdwt; %rename(_find_levels) rwt_find_levels; %rename(_check_levels) rwt_check_levels; %{ #define SWIG_FILE_WITH_INIT #include "../lib/inc/rwt_transforms.h" #include "../lib/inc/rwt_init.h" %} %include "../lib/inc/rwt_init.h" %include "numpy.i" %init %{ import_array(); %} /* Building on the numpy SWIG macros we make wrapper functions for 1D and 2D for each transform */ void _c_dwt_1( double* INPLACE_ARRAY1, int DIM1, double* INPLACE_ARRAY1, int DIM1, int levels, double* INPLACE_ARRAY1, int DIM1); void _c_dwt_2( double* INPLACE_ARRAY2, int DIM1, int DIM2, double* INPLACE_ARRAY1, int DIM1, int levels, double* INPLACE_ARRAY2, int DIM1, int DIM2); void _c_idwt_1( double* INPLACE_ARRAY1, int DIM1, double* INPLACE_ARRAY1, int DIM1, int levels, double* INPLACE_ARRAY1, int DIM1); void _c_idwt_2( double* INPLACE_ARRAY2, int DIM1, int DIM2, double* INPLACE_ARRAY1, int DIM1, int levels, double* INPLACE_ARRAY2, int DIM1, int DIM2); void _c_rdwt_1( double* INPLACE_ARRAY1, int DIM1, double* INPLACE_ARRAY1, int DIM1, int levels, double* INPLACE_ARRAY1, int DIM1, double* INPLACE_ARRAY1, int DIM1); void _c_rdwt_2( double* INPLACE_ARRAY2, int DIM1, int DIM2, double* INPLACE_ARRAY1, int DIM1, int levels, double* INPLACE_ARRAY2, int DIM1, int DIM2, double* INPLACE_ARRAY2, int DIM1, int DIM2); void _c_irdwt_1(double* INPLACE_ARRAY1, int DIM1, double* INPLACE_ARRAY1, int DIM1, int levels, double* INPLACE_ARRAY1, int DIM1, double* INPLACE_ARRAY1, int DIM1); void _c_irdwt_2(double* INPLACE_ARRAY2, int DIM1, int DIM2, double* INPLACE_ARRAY1, int DIM1, int levels, double* INPLACE_ARRAY2, int DIM1, int DIM2, double* INPLACE_ARRAY2, int DIM1, int DIM2); %inline %{ void _c_dwt_1(double *x, int nrows, double *h, int ncoeff, int levels, double *y, int toss1) { dwt(x, nrows, 1, h, ncoeff, levels, y); } void _c_idwt_1(double *x, int nrows, double *h, int ncoeff, int levels, double *y, int toss1) { idwt(x, nrows, 1, h, ncoeff, levels, y); } void _c_rdwt_1(double *x, int nrows, double *h, int ncoeff, int levels, double *yl, int toss1, double *yh, int toss2) { rdwt(x, nrows, 1, h, ncoeff, levels, yl, yh); } void _c_irdwt_1(double *x, int nrows, double *h, int ncoeff, int levels, double *yl, int toss1, double *yh, int toss2) { irdwt(x, nrows, 1, h, ncoeff, levels, yl, yh); } void _c_dwt_2(double *x, int nrows, int ncols, double *h, int ncoeff, int levels, double *y, int toss1, int toss2) { dwt(x, nrows, ncols, h, ncoeff, levels, y); } void _c_idwt_2(double *x, int nrows, int ncols, double *h, int ncoeff, int levels, double *y, int toss1, int toss2) { idwt(x, nrows, ncols, h, ncoeff, levels, y); } void _c_rdwt_2(double *x, int nrows, int ncols, double *h, int ncoeff, int levels, double *yl, int toss1, int toss2, double *yh, int toss3, int toss4) { rdwt(x, nrows, ncols, h, ncoeff, levels, yl, yh); } void _c_irdwt_2(double *x, int nrows, int ncols, double *h, int ncoeff, int levels, double *yl, int toss1, int toss2, double *yh, int toss3, int toss4) { irdwt(x, nrows, ncols, h, ncoeff, levels, yl, yh); } %} %pythoncode %{ import numpy as np def _levels(x, L): dim = len(x.shape) # Determine the dimensions of our input m = x.shape[0] if (dim == 2): n = x.shape[1] else: n = 1 if (L == 0): # If the number of levels was not specified then use the max L = _find_levels(m, n) _check_levels(L, m, n) # Sanity check the number of levels return L def dwt(x, h, L = 0): """ Function computes the discrete wavelet transform y for a 1D or 2D input signal x using the scaling filter h. Input: x : finite length 1D or 2D signal (implicitly periodized) h : scaling filter L : number of levels. In the case of a 1D signal, length(x) must be divisible by 2^L; in the case of a 2D signal, the row and the column dimension must be divisible by 2^L. If no argument is specified, a full DWT is returned for maximal possible L. Output: y : the wavelet transform of the signal (see example to understand the coefficients) L : number of decomposition levels 1D Example: x = makesig('LinChirp', 8) h = daubcqf(4, 'min')[0] L = 2 y,L = dwt(x,h,L) 1D Example's output and explanation: y = array([1.1097,0.8767,0.8204,-0.5201,-0.0339,0.1001,0.2201,-0.1401]) L = 2 The coefficients in output y are arranged as follows y(0) and y(1) : Scaling coefficients (lowest frequency) y(2) and y(3) : Band pass wavelet coefficients y(4) to y(7) : Finest scale wavelet coefficients (highest frequency) 2D Example: load test_image h = daubcqf(4,'min')[0] L = 1 y,L = dwt(test_image,h,L) 2D Example's output and explanation: The coefficients in y are arranged as follows. .------------------. | | | | 4 | 2 | | | | | L,L | H,L | | | | -------------------- | | | | 3 | 1 | | | | | L,H | H,H | | | | `------------------' where 1 : High pass vertically and high pass horizontally 2 : Low pass vertically and high pass horizontally 3 : High pass vertically and low pass horizontally 4 : Low pass vertically and Low pass horizontally (scaling coefficients) """ if (x.dtype != 'float'): x = x * 1.0 L = _levels(x, L) y = np.ascontiguousarray(np.zeros(x.shape)) dim = len(x.shape) x = np.ascontiguousarray(x) if (dim == 1): _rwt._c_dwt_1(x, h, L, y) if (dim == 2): _rwt._c_dwt_2(x, h, L, y) return y, L def idwt(y, h, L = 0): """ Function computes the inverse discrete wavelet transform x for a 1D or 2D input signal y using the scaling filter h. Input: y : finite length 1D or 2D input signal (implicitly periodized) (see function mdwt to find the structure of y) h : scaling filter L : number of levels. In the case of a 1D signal, length(x) must be divisible by 2^L; in the case of a 2D signal, the row and the column dimension must be divisible by 2^L. If no argument is specified, a full inverse DWT is returned for maximal possible L. Output: x : periodic reconstructed signal L : number of decomposition levels 1D Example: xin = makesig('LinChirp', 8) h = daubcqf(4, 'min')[0] L = 1 y, L = mdwt(xin, h, L) x, L = midwt(y, h, L) 1D Example's output: x = array([0.0491,0.1951,0.4276,0.7071,0.9415,0.9808,0.6716,0.0000]) L = 1 """ if (y.dtype != 'float'): y = y * 1.0 L = _levels(y, L) x = np.ascontiguousarray(np.zeros(y.shape)) y = np.ascontiguousarray(y) dim = len(x.shape) if (dim == 1): _rwt._c_idwt_1(x, h, L, y) if (dim == 2): _rwt._c_idwt_2(x, h, L, y) return x, L def rdwt(x, h, L = 0): """ Function computes the redundant discrete wavelet transform y for a 1D or 2D input signal. (Redundant means here that the sub-sampling after each stage is omitted.) yl contains the lowpass and yh the highpass components. In the case of a 2D signal, the ordering in yh is [lh hl hh lh hl ... ] (first letter refers to row, second to column filtering). Input: x : finite length 1D or 2D signal (implicitly periodized) h : scaling filter L : number of levels. In the case of a 1D length(x) must be divisible by 2^L; in the case of a 2D signal, the row and the column dimension must be divisible by 2^L. If no argument is specified, a full DWT is returned for maximal possible L. Output: yl : lowpass component yh : highpass components L : number of levels Example: x = makesig('Leopold', 8) h = daubcqf(4, 'min')[0] L = 1 yl, yh, L = mrdwt(x,h,L) Example's output: yl = 0.8365 0.4830 0 0 0 0 -0.1294 0.2241 yh = -0.2241 -0.1294 0 0 0 0 -0.4830 0.8365 L = 1 """ if (x.dtype != 'float'): x = x * 1.0 L = _levels(x, L) yl = np.ascontiguousarray(np.zeros(x.shape)) dim = len(x.shape) x = np.ascontiguousarray(x) if (dim == 1): yh = np.ascontiguousarray(np.zeros(x.shape[0] * L)) _rwt._c_rdwt_1(x, h, L, yl, yh) if (dim == 2): yh = np.ascontiguousarray(np.zeros((x.shape[0], x.shape[1] * L * 3))) _rwt._c_rdwt_2(x, h, L, yl, yh) return yl, yh, L def irdwt(yl, yh, h, L = 0): """ Function computes the inverse redundant discrete wavelet transform x for a 1D or 2D input signal. (Redundant means here that the sub-sampling after each stage of the forward transform has been omitted.) yl contains the lowpass and yl the highpass components as computed, e.g., by mrdwt. In the case of a 2D signal, the ordering in yh is [lh hl hh lh hl ... ] (first letter refers to row, second to column filtering). Input: yl : lowpass component yh : highpass components h : scaling filter L : number of levels. In the case of a 1D signal, length(yl) must be divisible by 2^L; in the case of a 2D signal, the row and the column dimension must be divisible by 2^L. Output: x : finite length 1D or 2D signal L : number of levels Example: xin = makesig('Leopold', 8) h = daubcqf(4, 'min')[0] L = 1 yl, yh, L = mrdwt(xin, h, L) x, L = mirdwt(yl, yh, h, L) Example Output: x = array([0.0000,1.0000,0.0000,-0.0000,0,0,0,-0.0000]) L = 1 """ if (yl.dtype != 'float'): yl = yl * 1.0 if (yh.dtype != 'float'): yh = yh * 1.0 L = _levels(yl, L) x = np.ascontiguousarray(np.zeros(yl.shape)) yl = np.ascontiguousarray(yl) yh = np.ascontiguousarray(yh) dim = len(x.shape) if (dim == 1): _rwt._c_irdwt_1(x, h, L, yl, yh) if (dim == 2): _rwt._c_irdwt_2(x, h, L, yl, yh) return x, L def daubcqf(n, dtype = 'min'): """ Function computes the Daubechies' scaling and wavelet filters (normalized to sqrt(2)). Input: n : Length of filter (must be even) dtype : Optional parameter that distinguishes the minimum phase, maximum phase and mid-phase solutions ('min', 'max', or 'mid'). If no argument is specified, the minimum phase solution is used. Output: h_0 : Minimal phase Daubechies' scaling filter h_1 : Minimal phase Daubechies' wavelet filter Example: n = 4 dtype = 'min' h_0, h_1 = daubcqf(n, dtype) Example Result: h_0 = array([0.4830, 0.8365, 0.2241, -0.1294]) h_1 = array([0.1294, 0.2241, -0.8365, 0.4830]) Reference: \"Orthonormal Bases of Compactly Supported Wavelets\", CPAM, Oct.89 """ if (n % 2 != 0): raise Exception("No Daubechies filter exists for ODD length") k = n // 2 a = p = q = 1 h_0 = np.array([1, 1]) for j in range(1, k): a = -a * 0.25 * (j + k - 1) / j h_0 = np.hstack((0, h_0)) + np.hstack((h_0, 0)) p = np.hstack((0, -p)) + np.hstack((p, 0)) p = np.hstack((0, -p)) + np.hstack((p, 0)) q = np.hstack((0, q, 0)) + a*p q = np.sort(np.roots(q)) qt = q[0:k-1] if (dtype == 'mid'): if (k % 2 == 1): qt = np.hstack((q[0:n-2:4], q[1:n-2:4])) else: qt = np.hstack((q[0], q[3:k-1:4], q[4:k-1:4], q[n-4:k:-4], q[n-5:k:-4])) h_0 = np.convolve(h_0, np.real(np.poly(qt))) h_0 = np.sqrt(2)*h_0 / sum(h_0) if (dtype == 'max'): h_0 = np.flipud(h_0) if (np.abs(sum(np.power(h_0, 2))) -1 > 1e-4): raise Exception("Numerically unstable for this value of n") h_1 = np.copy(np.flipud(h_0)) h_1[0:n-1:2] = -h_1[0:n-1:2] return h_0, h_1 def hard_th(y, thld): """ HARDTH hard thresholds the input signal y with the threshold value thld. Input: y : 1D or 2D signal to be thresholded thld : threshold value Output: x : Hard thresholded output (x = (abs(y)>thld) * y) Example: y = makesig('WernerSorrows', 8) thld = 1 x = HardTh(y, thld) Example Output: x = array([1.5545, 5.3175, 0, 1.6956, -1.2678, 0, 1.7332, 0]) """ return (np.abs(y) > thld) * y def soft_th(y, thld): """ Soft thresholds the input signal y with the threshold value thld. Input: y : 1D or 2D signal to be thresholded thld : Threshold value Output: x : Soft thresholded output (sign(y) * (x >= thld) * (x - thld)) Example: y = makesig('Doppler', 8) thld = 0.2 x = soft_th(y, thld) Example Output: x = array([0, 0, 0, -0.0703, 0, 0.2001, 0.0483, 0]) Reference: \"De-noising via Soft-Thresholding\" Tech. Rept. Statistics, Stanford, 1992. D.L. Donoho. """ x = np.abs(y) return np.sign(y) * (x >= thld) * (x - thld) def makesig(signame, n = 512): """ Creates artificial test signal identical to the standard test signals proposed and used by D. Donoho and I. Johnstone in WaveLab (- a matlab toolbox developed by Donoho et al. the statistics department at Stanford University). Input: signame - Name of the desired signal 'HeaviSine' 'Bumps' 'Blocks' 'Doppler' 'Ramp' 'Cusp' 'Sing' 'HiSine' 'LoSine' 'LinChirp' 'TwoChirp' 'QuadChirp' 'MishMash' 'WernerSorrows' (Heisenberg) 'Leopold' (Kronecker) n - Length in samples of the desired signal (Default 512) Output: x - resulting test signal References: WaveLab can be accessed at www_url: http://playfair.stanford.edu/~wavelab/ Also see various articles by D.L. Donoho et al. at web_url: http://playfair.stanford.edu/ """ t = np.array(range(1, n + 1)) / float(n) if (signame == 'HeaviSine'): y = 4 * np.sin(4 * np.pi * t) return y - np.sign(t - .3) - np.sign(.72 - t) if (signame == 'Bumps'): pos = np.array([.1, .13, .15, .23, .25, .40, .44, .65, .76, .78, .81]) hgt = np.array([4, 5, 3, 4, 5, 4.2, 2.1, 4.3, 3.1, 5.1, 4.2]) wth = np.array( [.005, .005, .006, .01, .01, .03, .01, .01, .005, .008, .005]) y = np.zeros(n) for j in range(0, pos.size): y = y + hgt[j] / pow((1 + np.abs((t - pos[j]) / wth[j])), 4) return y if (signame == 'Blocks'): pos = np.array([.1, .13, .15, .23, .25, .40, .44, .65, .76, .78, .81]) hgt = np.array([4, (-5), 3, (-4), 5, (-4.2), 2.1, 4.3, (-3.1), 2.1, (-4.2)]) y = np.zeros(n) for j in range(0, pos.size): y = y + (1 + np.sign(t - pos[j])) * (hgt[j]/2) return y if (signame == 'Doppler'): return np.sqrt(t * (1-t)) * np.sin((2 * np.pi * 1.05) / (t+.05)) if (signame == 'Ramp'): return t - (t >= .37) if (signame == 'Cusp'): return np.sqrt(np.abs(t - .37)) if (signame == 'Sing'): k = np.floor(n * .37) return 1 / np.abs(t - (k + .5)/n) if (signame == 'HiSine'): return np.sin(np.pi * (n * .6902) * t) if (signame == 'LoSine'): return np.sin(np.pi * (n * .3333) * t) if (signame == 'LinChirp'): return np.sin(np.pi * t * ((n * .125) * t)) if (signame == 'TwoChirp'): return np.sin(np.pi * t * (n * t)) + np.sin((np.pi / 3) * t * (n * t)) if (signame == 'QuadChirp'): return np.sin((np.pi/3) * t * (n * pow(t,2))) if (signame == 'MishMash'): y = np.sin((np.pi/3) * t * (n * pow(t,2))) y = y + np.sin(np.pi * (n * .6902) * t) return y + np.sin(np.pi * t * (n * .125 * t)) if (signame == 'WernerSorrows'): y = np.sin(np.pi * t * (n/2 * pow(t, 2))) y = y + np.sin(np.pi * (n * .6902) * t) y = y + np.sin(np.pi * t * (n * t)) pos = np.array([.1, .13, .15, .23, .25, .40, .44, .65, .76, .78, .81]) hgt = np.array([4, 5, 3, 4, 5, 4.2, 2.1, 4.3, 3.1, 5.1, 4.2]) wth = np.array( [.005, .005, .006, .01, .01, .03, .01, .01, .005, .008, .005]) for j in range(0, pos.size): y = y + hgt[j] / pow((1 + np.abs((t - pos[j]) / wth[j])), 4) return y if (signame == 'Leopold'): return (t == np.floor(.37 * n)/n) * 1.0 def denoise(x, h, denoise_type = 0, option = None): """ DENOISE is a generic routine for wavelet based denoising. The routine will denoise the signal x using the 2-band wavelet system described by the filter h using either the traditional discrete wavelet transform (DWT) or the linear shift invariant discrete wavelet transform (also known as the undecimated DWT (UDWT)). Input: x : 1D or 2D signal to be denoised h : Scaling filter to be applied denoise_type : Type of transform (Default: type = 0) 0 --> Discrete wavelet transform (DWT) 1 --> Undecimated DWT (UDWT) option : Default settings is marked with '*': *type = 0 --> option = [0 3.0 0 0 0 0] type = 1 --> option = [0 3.6 0 1 0 0] option(1) : Whether to threshold low-pass part 0 --> Don't threshold low pass component 1 --> Threshold low pass component option(2) : Threshold multiplier, c. The threshold is computed as: thld = c*MAD(noise_estimate)). The default values are: c = 3.0 for the DWT based denoising c = 3.6 for the UDWT based denoising option(3) : Type of variance estimator 0 --> MAD (mean absolute deviation) 1 --> STD (classical numerical std estimate) option(4) : Type of thresholding 2 --> Soft thresholding 1 --> Hard thresholding option(5) : Number of levels, L, in wavelet decomposition. By setting this to the default value '0' a maximal decomposition is used. option(6) : Actual threshold to use (setting this to anything but 0 will mean that option(3) is ignored) Output: xd : Estimate of noise free signal xn : The estimated noise signal (x-xd) option : A vector of actual parameters used by the routine. The vector is configured the same way as the input option vector with one added element option(7) = type. Example 1: from numpy.random import randn N = 16 h = daubcqf(6)[0] s = makesig('Doppler', N) n = randn(1,N) x = s + n/10 # (approximately 10dB SNR) %Denoise x with the default method based on the DWT xd, xn, opt1 = denoise(x,h) %Denoise x using the undecimated (LSI) wavelet transform yd, yn, opt2 = denoise(x,h,1) Example 2: (on an image) from scipy.io import loadmat from numpy.random import random_sample lena = loadmat('../tests/lena512.mat')['lena512'] h = daubcqf(6)[0] noisyLena = lena + 25 * random_sample(lena.shape) denoisedLena, xn, opt1 = denoise(noisyLena, h) """ if (option is None and denoise_type == 0): option = [0, 3.0, 0, 2, 0, 0] if (option is None and denoise_type == 1): option = [0, 3.6, 0, 1, 0, 0] if (not isinstance(option, list)): option = list(option) mx = x.shape[0] nx = 1 if (len(x.shape) > 1): nx = x.shape[1] dim = min(mx, nx) n = dim if (dim == 1): n = max(mx, nx) if (option[4] == 0): L = np.int(np.floor(np.log2(n))) else: L = option[4] if (denoise_type == 0): xd = dwt(x, h, L)[0] if (option[5] == 0): if (nx > 1): tmp = xd[mx // 2:mx, nx // 2:nx] else: tmp = xd[mx // 2:mx] if (option[2] == 0): thld = option[1] * np.median(np.abs(tmp)) / .67 elif (option[2] == 1): thld = option[1] * np.std(tmp, ddof=1) else: thld = option[5] if (dim == 1): ix = np.array(range(0, (n // (np.power(2, L))))) if (ix.size == 1): ix = ix[0] ykeep = xd[ix] else: ix = np.array(range(0, (mx // (np.power(2, L))))) jx = np.array(range(0, (nx // (np.power(2, L))))) if (ix.size == 1): ix = ix[0] if (jx.size == 1): jx = jx[0] ykeep = xd[ix, jx] if (option[3] == 2): xd = soft_th(xd, thld) elif (option[3] == 1): xd = hard_th(xd, thld) if (option[0] == 0): if (dim == 1): xd[ix] = ykeep else: xd[ix, jx] = ykeep xd = idwt(xd, h, L)[0] elif (denoise_type == 1): (xl, xh, L) = rdwt(x, h, L) easter_egg = 23 if (dim == 1): c_offset = 0 else: c_offset = 2 * nx if (option[5] == 0): if (nx > 1): tmp = xh[:,c_offset:c_offset+mx] else: tmp = xh[c_offset:c_offset+mx:1] if (option[2] == 0): thld = option[1] * np.median(np.abs(tmp)) / .67 elif (option[2] == 1): thld = option[1] * np.std(tmp, ddof=1) else: thld = option[5] if (option[3] == 2): xh = soft_th(xh, thld) if (option[0] == 1): xl = soft_th(xl, thld) elif (option[3] == 1): xh = hard_th(xh, thld) if (option[0] == 1): xl = hard_th(xl, thld) xd = irdwt(xl, xh, h, L)[0] option[5] = (thld) option.append(denoise_type) xn = x - xd return xd, xn, option %} ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/python/test_rwt.py ================================================ #!/usr/bin/python import unittest from numpy import * from scipy.io import loadmat from rwt import * class TestRWT(unittest.TestCase): def setUp(self): pass def test_dwt(self): x = makesig('LinChirp', 8) h = daubcqf(4, 'min')[0] L = 2 y, L = dwt(x, h, L) y_corr = array([1.109692262737501,0.876661822959323,0.820391852106669,-0.520074093642583,-0.033927668247206,0.100110695461285,0.220088240246095,-0.140081604397608]) self.assertTrue(allclose(y, y_corr, 0.0001)) def test_dwt_2d(self): x = array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16.0]]) h = daubcqf(4)[0] L = 2 y, L = dwt(x, h, L) y_corr = array([[34.0000, -3.4641, 0.0000, -2.0000], [-13.8564, 0.0000, 0.0000, -2.0000], [-0.0000, 0.0000, -0.0000, -0.0000], [-8.0000, -8.0000, 0.0000, -0.0000]]) self.assertTrue(allclose(y, y_corr, 0.0000005)) def test_idwt(self): x = makesig('LinChirp', 8) h = daubcqf(4, 'min')[0] L = 2 y, L = dwt(x, h, L) x_new, L = idwt(y, h, L) self.assertTrue(allclose(x, x_new, 0.0005)) def test_idwt_2d(self): x = loadmat('../tests/lena512.mat')['lena512'] * 1.0 h = daubcqf(6)[0] L = 9 y, L = dwt(x, h, L) x_new, L = idwt(y, h, L) self.assertTrue(allclose(x, x_new, 0.0005)) def test_rdwt(self): x = makesig('Leopold', 8) h = daubcqf(4, 'min')[0] L = 1 (yl, yh, L) = rdwt(x, h, L) yl_corr = [0.8365, 0.4830, 0, 0, 0, 0, -0.1294, 0.2241] yh_corr = [-0.2241, -0.1294, 0, 0, 0, 0, -0.4830, 0.8365] L_corr = 1 self.assertTrue(allclose(yl, yl_corr, 0.0005)) self.assertTrue(allclose(yh, yh_corr, 0.0005)) self.assertTrue(allclose(L, L_corr, 0.0005)) def test_rdwt_2(self): x = array([[1.0,3,5,2],[3,4,8,1],[3,9,2,0],[1,2,3,0]]) h = daubcqf(4, 'min')[0] yl, yh, L = rdwt(x, h, 1) yl_corr = array([ [9.0111, 10.7799, 5.8795, 4.1107], [11.1393, 8.7766, 2.5502, 4.9130], [6.9465, 5.7578, 1.6630, 2.8517], [4.8182, 7.7611, 4.9922, 2.0494]]) yh_corr = array([ [4.5724, 0.4285, -1.8828, 2.2611, 4.8714, -3.1026, -1.7978, 0.0290, -2.9620, -1.1818, -1.1295, 5.2733], [-2.4441, -2.4318, -1.4465, -1.4587, 1.8861, -4.2488, -1.9776, 4.3403, -0.0233, 0.0356, 0.9498, -0.9620], [-1.7488, -0.5870, 0.5592, -0.6026, 1.1663, -2.3550, -1.7398, 2.9285, -0.6965, 1.8583, -0.7120, -0.4498], [-0.3795, 2.5903, 2.7700, -0.1998, 4.1516, -1.2087, -1.5601, -1.3828, 3.6818, -0.7120, 0.8917, -3.8615]]) self.assertTrue(allclose(yl, yl_corr, 0.001)) self.assertTrue(allclose(yh, yh_corr, 0.001)) def test_rdwt_2L2(self): x = array([[1.0,3,5,2],[3,4,8,1],[3,9,2,0],[1,2,3,0]]) h = daubcqf(4, 'min')[0] yl, yh, L = rdwt(x, h, 2) yl_corr = array([ [11.7500, 11.7500, 11.7500, 11.7500], [11.7500, 11.7500, 11.7500, 11.7500], [11.7500, 11.7500, 11.7500, 11.7500], [11.7500, 11.7500, 11.7500, 11.7500]]) yh_corr = array([ [4.5724, 0.4285, -1.8828, 2.2611, 4.8714, -3.1026, -1.7978, 0.0290, -2.9620, -1.1818, -1.1295, 5.2733, 3.1405, 3.1405, 3.1405, 3.1405, 4.2075, 4.7877, -4.2075, -4.7877, -1.0760, 1.8816, 1.0760, -1.8816], [-2.4441, -2.4318, -1.4465, -1.4587, 1.8861, -4.2488, -1.9776, 4.3403, -0.0233, 0.0356, 0.9498, -0.9620, 1.9396, 1.9396, 1.9396, 1.9396, 4.2075, 4.7877, -4.2075, -4.7877, 4.3816, -0.9240, -4.3816, 0.9240], [-1.7488, -0.5870, 0.5592, -0.6026, 1.1663, -2.3550, -1.7398, 2.9285, -0.6965, 1.8583, -0.7120, -0.4498, -3.1405, -3.1405, -3.1405, -3.1405, 4.2075, 4.7877, -4.2075, -4.7877, 1.0760, -1.8816, -1.0760, 1.8816], [-0.3795, 2.5903, 2.7700, -0.1998, 4.1516, -1.2087, -1.5601, -1.3828, 3.6818, -0.7120, 0.8917, -3.8615, -1.9396, -1.9396, -1.9396, -1.9396, 4.2075, 4.7877, -4.2075, -4.7877, -4.3816, 0.9240, 4.3816, -0.9240]]) self.assertTrue(allclose(yl, yl_corr, 0.001)) self.assertTrue(allclose(yh, yh_corr, 0.001)) def test_irdwt(self): xin = makesig('Leopold',8) h = daubcqf(4, 'min')[0] Lin = 1 (yl, yh, L) = rdwt(xin, h, Lin) (x, L) = irdwt(yl, yh, h, L) self.assertTrue(allclose(x, xin, 0.0005)) def test_irdwt_2d(self): x = loadmat('../tests/lena512.mat')['lena512'] * 1.0 h = daubcqf(6)[0] L = 9 yl, yh, L = rdwt(x, h, L) x_new, L = irdwt(yl, yh, h, L) self.assertTrue(allclose(x, x_new, 0.0005)) def test_makesig_heavisine(self): x = makesig('HeaviSine', 8) y = array([4.0000, 0.0000, -6.0000, -2.0000, 2.0000, 0.0000, -4.0000, -0.0000]) self.assertTrue(allclose(x, y, 0.0005)) def test_makesig_bumps(self): x = around(makesig('Bumps', 8), 4) y = array([0.3206, 5.0527, 0.3727, 0.0129, 0.0295, 0.0489, 0.0004, 0.0000]) self.assertTrue(allclose(x, y, 0.0005)) def test_makesig_blocks(self): x = makesig('Blocks', 8) y = array([4.0000, 0.5000, 3.0000, 0.9000, 0.9000, 5.2000, -0.0000, -0.0000]) self.assertTrue(allclose(x, y, 0.0005)) def test_makesig_doppler(self): x = makesig('Doppler', 12) y = array([-0.1954, -0.3067, 0.0000, -0.4703, 0.4930, -0.2703, -0.4127, 0.1025, 0.4001, 0.3454, 0.1425, 0]) self.assertTrue(allclose(x, y, 0.0005)) def test_makesig_ramp(self): x = makesig('Ramp', 8) y = array([0.1250, 0.2500, -0.6250, -0.5000, -0.3750, -0.2500, -0.1250, 0]) self.assertTrue(allclose(x, y, 0.0005)) def test_makesig_cusp(self): x = makesig('Cusp', 8) y = array([0.4950, 0.3464, 0.0707, 0.3606, 0.5050, 0.6164, 0.7106, 0.7937]) self.assertTrue(allclose(x, y, 0.0005)) def test_makesig_sing(self): x = makesig('Sing', 8) y = array([5.3333, 16.0000, 16.0000, 5.3333, 3.2000, 2.2857, 1.7778, 1.4545]) self.assertTrue(allclose(x, y, 0.0005)) def test_makesig_hisine(self): x = makesig('HiSine', 8) y = array([0.8267, -0.9302, 0.2200, 0.6827, -0.9882, 0.4292, 0.5053, -0.9977]) self.assertTrue(allclose(x, y, 0.0005)) def test_makesig_losine(self): x = makesig('LoSine', 8) y = array([0.865973039158459,0.866130104544730,0.000314159260191,-0.865815888304075,-0.866287084447387,-0.000628318489377,0.865658651997088,0.866443978850937]) self.assertTrue(allclose(x, y, 0.0005)) def test_makesig_linchirp(self): x = makesig('LinChirp', 8) y = array([0.049067674327418,0.195090322016128,0.427555093430282,0.707106781186547,0.941544065183021,0.980785280403230,0.671558954847019,0.000000000000000]) self.assertTrue(allclose(x, y, 0.0001)) def test_makesig_twochirp(self): x = makesig('TwoChirp', 8) y = array([0.5132, 1.5000, 0.5412, 0.8660, -0.5132, 0, 0.5132, 0.8660]) self.assertTrue(allclose(x, y, 0.0005)) def test_makesig_quadchirp(self): x = makesig('QuadChirp', 8) y = array([0.016361731626487,0.130526192220052,0.427555093430282,0.866025403784439,0.889516075421856,-0.382683432365090,-0.621660573370077,0.866025403784439]) self.assertTrue(allclose(x, y, 0.0001)) def test_makesig_mishmash(self): x = makesig('MishMash', 8) y = array([0.8922, -0.6046, 1.0751, 2.2558, 0.8429, 1.0273, 0.5551, -0.1317]) self.assertTrue(allclose(x, y, 0.0005)) def test_makesig_wernersorrows(self): x = makesig('WernerSorrows', 8) y = array([1.5545, 5.3175, 0.8252, 1.6956, -1.2678, 0.6466, 1.7332, -0.9977]) self.assertTrue(allclose(x, y, 0.0005)) def test_makesig_leopold(self): x = makesig('Leopold', 8) y = array([0, 1, 0, 0, 0, 0, 0, 0]) self.assertTrue(allclose(x, y, 0.0005)) def test_denoise_default(self): signal = makesig('Doppler', 32) noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940]) with_noise = signal + noise / 10 h = daubcqf(6)[0] signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h) signal_denoised_corr = array([0.0741827688375062,0.0791701902526268,0.0760842615272340,0.0750476831774179,0.111279774779568,0.163475053283544,-0.0498263815350539,0.0946073088237311,0.135126562486911,-0.0186090620958193,-0.0748812479991294,-0.103470206059426,0.0234254843251780,0.239772540836257,0.0920583398962312,-0.152180640366891,-0.116682073306156,-0.0459389850762785,-0.00245240039778375,0.0755739164104836,0.102548333512214,0.121099911744184,0.177390507921620,0.240386041553093,0.231105933317157,0.198210924493273,0.175672812990725,0.138822049613034,0.127491615387826,0.121409597186325,0.0994935320130783,0.0760019340865427]) self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01)) def test_denoise_2d(self): x = array([[1,2,3,4],[5,6,7,8],[9,10.09,11,12],[13,13.91,15,16]]) h = daubcqf(4)[0] signal_denoised, subtracted_noise, actual_options = denoise(x, h) signal_denoised_corr = array([[1.093495801587334,2.052784169768518,3.036985129109070,4.014510779767102],[5.037416383975946,6.006178652683398,6.994963120759174,7.978382656683513],[9.047593546684929,10.003998510025589,10.977825887256145,11.94698494275469],[13.009489364401729,13.937038667522501,14.939852728547271,15.9224996584731398]]) self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01)) def test_denoise_udwt(self): signal = makesig('Doppler', 32) noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940]) with_noise = signal + noise / 10 h = daubcqf(6)[0] signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 1) signal_denoised_corr = array([0.126244615385152,0.0952319712425300,0.0671343607152503,0.0513902979722585,0.0430402732682634,0.0586932575131794,0.0861069751902698,0.0989949047763016,0.0908418658128637,-0.0141454670119059,-0.144791527437026,-0.0185533166035902,0.278351613782131,0.279033706376659,-0.0205012032054263,-0.212367658407976,-0.241484343697995,-0.248582298831059,-0.213374214781743,-0.101963712141109,0.0454248851310567,0.181104333949749,0.275294407293259,0.309076259882059,0.298600450385073,0.259080737796607,0.211123535801718,0.183021783525739,0.171966340866576,0.171616812586097,0.168720006300193,0.151066428184072]) self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01)) def test_denoise_udwt_2d(self): x = array([[1,2,3,4],[5,6,7,8],[9,10.09,11,12],[13,13.91,15,16]]) h = daubcqf(4)[0] signal_denoised, subtracted_noise, actual_options = denoise(x, h, 1) signal_denoised_corr = array([[1.007040488866197,1.993405274521765,3.006268404030089,3.996424654030090],[4.995935171857875,6.002401216530091,7.001252328142127,8.005847881693983],[9.009508189685661,10.059981743374523,11.001190131625481,11.999030274521770],[12.987516149590270,13.944211765573623,14.991289136202310,15.998697189754166]]) self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01)) def test_denoise_threshold_low(self): signal = makesig('Doppler', 32) noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940]) with_noise = signal + noise / 10 h = daubcqf(6)[0] signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 0, [1,3.0,0,2,0,0]) signal_denoised_corr = array([0.0187742354278351,0.0237616568429558,0.0206757281175629,0.0196391497677469,0.0558712413698966,0.108066519873873,-0.105234914944725,0.0391987754140600,0.0797180290772401,-0.0740175955054904,-0.130289781408801,-0.158878739469097,-0.0319830490844931,0.184364007426586,0.0366498064865601,-0.207589173776562,-0.172090606715827,-0.101347518485950,-0.0578609338074549,0.0201653830008125,0.0471398001025425,0.0656913783345127,0.121981974511949,0.184977508143422,0.175697399907486,0.142802391083602,0.120264279581054,0.0834135162033633,0.0720830819781554,0.0660010637766539,0.0440849986034073,0.0205934006768717]) self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01)) def test_denoise_thresh_multiplier(self): signal = makesig('Doppler', 32) noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940]) with_noise = signal + noise / 10 h = daubcqf(6)[0] signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 0, [1,3.5,0,2,0,0]) signal_denoised_corr = array([0.00563527074803461,0.0110853052404048,0.0101590193471916,0.0116789518546074,0.0354625658443208,0.0691904606426981,-0.0647010252187970,0.0393485097012034,0.0302297746478269,-0.0658230296401878,-0.0947938063374137,-0.147943151851009,-0.0355607514547514,0.143027827800490,0.0126752977970079,-0.200577663821584,-0.149059259007655,-0.0564432101940217,-0.0281365070661950,0.0201021371871464,0.0438412772787373,0.0596866399869512,0.0967101937989458,0.136451641917565,0.130716307107088,0.109146914388131,0.0925200849653435,0.0657607417363412,0.0550584910898860,0.0469636231448182,0.0277268486177313,0.00667135407398081]) self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01)) def test_denoise_std(self): signal = makesig('Doppler', 32) noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940]) with_noise = signal + noise / 10 h = daubcqf(6)[0] signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 0, [0,3.0,1,2,0,0]) signal_denoised_corr = array([0.0686926069658060,0.0706216045196474,0.0719769032529757,0.0743568305131058,0.0754251996534692,0.0763549103855611,0.0783972750744446,0.0807092136475563,0.0763109954998047,0.0693017683604205,0.0628697537191382,0.0547492531677562,0.0755519478401559,0.107931256046656,0.0859959791464885,0.0494376118339224,0.0602059364595448,0.0785077229738383,0.0791999606842265,0.0809410605777517,0.0844652184548917,0.0873749084881920,0.0911535278085727,0.0952027332951270,0.0936316016468421,0.0898878427420561,0.0866734185917041,0.0820709685744921,0.0793481432323076,0.0768306965269240,0.0727995727792393,0.0684196591566048]) self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01)) def test_denoise_hard(self): signal = makesig('Doppler', 32) noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940]) with_noise = signal + noise / 10 h = daubcqf(6)[0] signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 0, [0,3.0,0,1,0,0]) signal_denoised_corr = array([0.0977394160103721,0.0994161560983385,0.0832447407807381,0.0666983311697188,0.177420971595413,0.340230583897110,-0.354597069671295,0.0250017872275015,0.394418485343238,-0.0595745304374512,-0.452401570793399,-0.175707560852101,-0.00622320325130765,0.437867065411816,0.187485346584306,-0.241060664687049,-0.306285896120773,-0.373946536466370,-0.246165924475657,0.00210496326791051,0.0528629966064817,0.0967383656953347,0.275410693617439,0.487298926169970,0.454985253718689,0.348603331393631,0.288205743942248,0.186806596496260,0.172147260405660,0.180050851714681,0.142136445826288,0.104484725401481]) self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01)) def test_denoise_levels(self): signal = makesig('Doppler', 32) noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940]) with_noise = signal + noise / 10 h = daubcqf(6)[0] signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 0, [0,3.0,0,2,4,0]) signal_denoised_corr = array([0.164259992817262,0.156379071218712,0.142212685671703,0.125038963573761,0.150297815252073,0.191536767978636,-0.0381639580765735,0.0881092032192094,0.119629284458486,-0.0406090725365491,-0.105645426731493,-0.141820831994602,-0.0280318977202704,0.173171960129832,0.0117537437282443,-0.247115729957293,-0.206759297285911,-0.123147866042363,-0.0685808245422524,0.0255826360141400,0.0635302930397082,0.0930381970490923,0.165728084463140,0.246884147157615,0.246603211345582,0.220210934934003,0.206436991723089,0.177172675548210,0.178948997433275,0.188010177892750,0.179798128181065,0.170937023676945]) self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01)) def test_denoise_actual_thresh(self): signal = makesig('Doppler', 32) noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940]) with_noise = signal + noise / 10 h = daubcqf(6)[0] signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 0, [0,3.0,0,2,0,0.5]) signal_denoised_corr = array([0.0607099183942295,0.0654351521193524,0.0684154759800610,0.0742018934148454,0.0758845005390013,0.0769511530643110,0.0810856606730252,0.0858023375316036,0.0704706443350518,0.0472060906047587,0.0254329679518446,-0.00154590940405266,0.0598455182579352,0.156556707841878,0.0864272987162393,-0.0287835335280487,0.00606017120154721,0.0659592575432934,0.0713958080495586,0.0812891735076492,0.0953701981347179,0.107554576791239,0.123739146895592,0.141180422640726,0.137085044622601,0.124838366760086,0.114852957437233,0.0997294000571788,0.0922174665178409,0.0857758976557685,0.0737052631031342,0.0605470542090229]) self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01)) def test_denoise_udwt_threshold_low(self): signal = makesig('Doppler', 32) noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940]) with_noise = signal + noise / 10 h = daubcqf(6)[0] signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 1, [1,3,0,1,0,0]) signal_denoised_corr = array([0.135039400483741,0.117805175604609,0.0967709584177031,0.0142060292567307,-0.0239840294603812,0.323425861331697,-0.212285200125643,0.166066657685731,0.136653739821785,-0.0361708285655289,-0.244622217319313,-0.0751486112344819,0.279128997196628,0.299915294672821,0.00822389077239383,-0.232180770499244,-0.330137263335199,-0.293955318206172,-0.175538926380835,-0.0733568677543535,0.049241196655251,0.200165899490694,0.304615650610263,0.337325376378116,0.325593984310807,0.282048956150932,0.228861081870546,0.196656880842149,0.180959366486141,0.175210410022406,0.169828050229736,0.155033256209497]) self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01)) def test_denoise_udwt_thresh_multiplier(self): signal = makesig('Doppler', 32) noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940]) with_noise = signal + noise / 10 h = daubcqf(6)[0] signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 1, [1,3.5,0,1,0,0]) signal_denoised_corr = array([0.0479478506866607,0.0160653046305043,-0.012660890293452,-0.0292521383561941,-0.0383355043751224,-0.0239494802109215,0.00200042536526626,0.0135636610003902,0.00399637041195728,-0.100521378500944,-0.229923524965501,-0.102614225576592,0.195850596270724,0.197593413336102,-0.100882406775293,-0.291163630119251,-0.318524834100706,-0.324752887320235,-0.288916218874243,-0.176658530913858,-0.028536592326759,0.108409816572649,0.204063702017061,0.239170248556769,0.230108690684778,0.190119394184444,0.14091827822899,0.11174543739754,0.0991301032767805,0.0977198505254529,0.0937639547688583,0.0745251447941448]) self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01)) def test_denoise_udwt_std(self): signal = makesig('Doppler', 32) noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940]) with_noise = signal + noise / 10 h = daubcqf(6)[0] signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 1, [0,3.0,1,1,0,0]) signal_denoised_corr = array([0.0847626939447046,0.0648669375488877,0.0505127048998841,0.0431477690668965,0.0443458995091662,0.0638361516754724,0.0926698200065443,0.122716357496751,0.135591683864019,0.0377466753027189,-0.0889166586897228,-0.0310700016943258,0.16530654803759,0.237349858169585,0.0577692051497442,-0.137751577705709,-0.18354744395111,-0.188205427540335,-0.157902857480421,-0.055391323576937,0.0791892398460303,0.198068185997372,0.271471422836112,0.282275886815228,0.246689293630916,0.205546705496588,0.16546007731141,0.145130898382968,0.1471329636038,0.142472749823065,0.132163448290946,0.111958195551385]) self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01)) def test_denoise_udwt_soft(self): signal = makesig('Doppler', 32) noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940]) with_noise = signal + noise / 10 h = daubcqf(6)[0] signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 1, [0,3.0,0,2,0,0]) signal_denoised_corr = array([0.086668016749428,0.078090652632278,0.070455842749544,0.062824684205684,0.064249795534642,0.086899924318641,0.053549539548214,0.100644175366308,0.100726560037458,0.051479406046214,-0.011299945211104,0.036115394710961,0.147624998547612,0.159516308766960,0.059119062682569,-0.020817294484415,-0.042170912413038,-0.046825168298822,-0.027179285827824,0.017379645805457,0.071225126011476,0.123532780238470,0.153926034241219,0.160138755049699,0.153562168658336,0.138748019440599,0.123707805352361,0.115223425612607,0.110890877355381,0.107909648973443,0.103630954238181,0.095849084980685]) self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01)) def test_denoise_udwt_levels(self): signal = makesig('Doppler', 32) noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940]) with_noise = signal + noise / 10 h = daubcqf(6)[0] signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 1, [0,3.0,0,1,4,0]) signal_denoised_corr = array([0.137633389000662,0.120676804147327,0.0997827582151432,0.0156985740202669,-0.0251180988153785,0.319788331991522,-0.217919217670089,0.160238201773756,0.131270340429534,-0.0414158027972923,-0.249853610380694,-0.0801267408837784,0.275034335985338,0.296982831400265,0.00620014657281041,-0.234309647934845,-0.33273125185212,-0.296826946748889,-0.178550726178275,-0.0748494125178897,0.0503752660102483,0.203803428830869,0.310249668154709,0.343153832290091,0.330977383703058,0.287293930382695,0.234092474931927,0.201635010491445,0.185054027697432,0.178142873294961,0.171851794429319,0.157162133645098]) self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01)) def test_denoise_udwt_actual_thresh(self): signal = makesig('Doppler', 32) noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940]) with_noise = signal + noise / 10 h = daubcqf(6)[0] signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 1, [0,3.0,0,1,0,0.5]) signal_denoised_corr = array([0.126244615385152,0.09523197124253,0.0671343607152503,0.0513902979722585,0.0430402732682634,0.0586932575131794,0.0861069751902698,0.0989949047763016,0.0908418658128637,-0.0141454670119059,-0.144791527437026,-0.0185533166035902,0.278351613782131,0.279033706376659,-0.0205012032054263,-0.212367658407976,-0.241484343697995,-0.248582298831059,-0.213374214781743,-0.101963712141109,0.0454248851310567,0.181104333949749,0.275294407293258,0.309076259882059,0.298600450385073,0.259080737796607,0.211123535801717,0.183021783525739,0.171966340866576,0.171616812586097,0.168720006300193,0.151066428184072]) self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01)) def test_daubcqf_min(self): (a, b) = daubcqf(4) ax = [0.482962913144534,0.836516303737808,0.224143868042013,-0.129409522551260] bx = [0.129409522551260,0.224143868042013,-0.836516303737808,0.482962913144534] self.assertTrue(allclose(a, ax, 0.000001)) self.assertTrue(allclose(b, bx, 0.000001)) def test_daubcqf_max(self): (a, b) = daubcqf(4, 'max') ax = [-0.129409522551260,0.224143868042013,0.836516303737808,0.482962913144534] bx = [-0.482962913144534,0.836516303737808,-0.224143868042013,-0.129409522551260] self.assertTrue(allclose(a, ax, 0.000001)) self.assertTrue(allclose(b, bx, 0.000001)) def test_daubcqf_mid_even_k(self): (a, b) = daubcqf(4, 'mid') ax = [0.482962913144534,0.836516303737808,0.224143868042013,-0.129409522551260] bx = [0.129409522551260,0.224143868042013,-0.836516303737808,0.482962913144534] self.assertTrue(allclose(a, ax, 0.000001)) self.assertTrue(allclose(b, bx, 0.000001)) def test_daubcqf_mid_odd_k(self): (a, b) = daubcqf(6, 'mid') ax = [0.332670552950083,0.806891509311093,0.459877502118491,-0.135011020010255,-0.085441273882027,0.035226291885710] bx = [-0.035226291885710,-0.085441273882027,0.135011020010255,0.459877502118491,-0.806891509311093,0.332670552950083] self.assertTrue(allclose(a, ax, 0.000001)) self.assertTrue(allclose(b, bx, 0.000001)) if __name__ == '__main__': unittest.main() ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/readme ================================================ test ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/Readme.html ================================================ ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/architecture/html/matlab_xunit_architecture.html ================================================ MATLAB xUnit Test Framework: Architectural Notes

MATLAB xUnit Test Framework: Architectural Notes

This document summarizes the key classes and design choices for MATLAB xUnit, a MATLAB unit testing framework based on xUnit patterns.

Note: Testing pattern and smell terminology in this document is drawn from xUnit Test Patterns: Refactoring Test Code, by Gerard Meszaros, Addison-Wesley, 2007.

Contents

TestComponent, TestCase, and TestSuite

The abstract TestComponent class defines an object that has a description (a name and a location) and that can be run.

A TestCase object is a test component that defines an individual test case that can be run with a pass or fail result.

A TestSuite object is a test component that contains a collection of other test components. Note the hierarchical nature of test suites; they can contain both individual test case objects as well as other test suites. Running a test suite means invoking the run method on each test component in its collection.

TestCase: The Four-Phase Test

The TestCase class provides the standard xUnit Four-Phase Test, using a Fresh Fixture, Implicit Setup, and Implicit Teardown. These all elements can all be seen in the run method of TestCase:

       function did_pass = run(self, monitor)
           %run Execute the test case
           %    test_case.run(monitor) calls the TestCase object's setUp()
           %    method, then the test method, then the tearDown() method.
           %    observer is a TestRunObserver object.  The testStarted(),
           %    testFailure(), testError(), and testFinished() methods of
           %    observer are called at the appropriate times.  monitor is a
           %    TestRunMonitor object.  Typically it is either a TestRunLogger
           %    subclass or a CommandWindowTestRunDisplay subclass.
           %
           %    test_case.run() automatically uses a
           %    CommandWindowTestRunDisplay object in order to print test
           %    suite execution information to the Command Window.
           if nargin < 2
               monitor = CommandWindowTestRunDisplay();
           end
           did_pass = true;
           monitor.testComponentStarted(self);
           try
               self.setUp();
               f = str2func(self.MethodName);
               try
                   % Call the test method.
                   f(self);
               catch failureException
                   monitor.testCaseFailure(self, failureException);
                   did_pass = false;
               end
               self.tearDown();
           catch errorException
               monitor.testCaseError(self, errorException);
               did_pass = false;
           end
           monitor.testComponentFinished(self, did_pass);
       end

Phase 1 sets up the test fixture via the Implicit Setup call, self.setUp(). The base class setUp() method does nothing.

Phases 2 and 3 (exercising the system under test and verifying the expected outcome) are handled by the test method, which is invoked by f(self).

Phase 4 tears down the test fixture via the Implicit Teardown call, self.tearDown(). The base class tearDown() method does nothing.

Test failure and test error exceptions are caught and handled by the run() method, so test methods do not need to use try-catch. This facilitates simple, straight-line test-method code.

Note: The monitor object will be discussed later.

Test Case Discovery

The static method TestSuite.fromName constructs a test suite based on the name of an M-file. If the M-file defines a TestCase subclass, then fromName inspects the methods of the class and constructs a TestCase object for each method whose name begins with "[tT]est". If the M-file does not define a TestCase subclass, then fromName attempts to construct either a simple procedural test case or a set of subfunction-based test cases. (See the next section).

The static method TestSuite.fromPwd constructs a test suite by discovering all the test cases in the present working directory. It discovers all TestCase subclasses in the directory. In addition, it constructs test suites from all the procedural M-files in the directory beginning with "[tT]est".

The File System Test Runner, runtests, provides convenient syntaxes for performing test case discovery automatically.

FunctionHandleTestCase: For the Procedural World

Most MATLAB users are much more comfortable with procedural programming. An important design goal for MATLAB xUnit is to make it as easy as possible for MATLAB users with little object-oriented programming experience to create and run their own tests. The FunctionHandleTestCase supplies the plumbing necessary to support procedural test functions:

Private properties SetupFcn, TestFcn, and TeardownFcn are procedural function handles (similar to function pointers or function references in other languages).

runTestCase() is the test method used for constructing a TestCase object.

Managing test fixtures requires special consideration, because procedural function handles don't have access to object instance data in order to access a test fixture.

The overridden setUp() method looks at the number of outputs of the function handle SetupFcn. If it has an output argument, then the argument is saved in the private TestData property, and TestData is then passed to both TestFcn and TeardownFcn for their use.

Writing Procedural Test Cases

Procedural test cases can be written in two ways:

  • A simple M-file function that is treated as a single test case
  • An M-file containing multiple subfunctions that are each treated as a test case.

In either case, the test case is considered to pass if it executes without error.

Writing one test case per file is not ideal; it would lead to either zillions of tiny little test files, or long test methods exhibiting various bad test smells (Multiple Test Conditions, Flexible Test, Conditional Test Logic, Eager Test, Obscure Test, etc.) So we need a way to write multiple test cases in a single procedural M-file. The natural MATLAB way would be to use subfunctions.

However, subfunction-based test cases require special consideration. Consider the following M-file structure:

  === File A.m ===
  function A
     ...
  function B
     ...
  function C
     ...
  function D
     ...

The first function in the file, A, has the same name as the file. When other code outside this function calls A, it is this first function that gets called. Functions B, C, and D are called subfunctions. Normally, these subfunctions are only visible to and can only be called by A. The only way that code elsewhere might be able to call B, C, or D is if function A forms handles to them and passes those handles out of its scope. Normally this would be done by returning the function handles as output arguments.

Note that no code executing outside the scope of a function in A.m can form function handles to B, C, or D, or can even determine that these functions exist.

This obviously poses a problem for test discovery!

The MATLAB xUnit solution is to establish the following convention for subfunction-based tests. The first function in a test M-file containing subfunction tests has to begin with these lines:

  === File A.m ===
  function test_suite = A
  initTestSuite;
  ...

initTestSuite is a script that runs in the scope of the function A. initTestSuite determines which subfunctions are test functions, as well as setup or teardown functions. It forms handles to these functions and constructs a set of FunctionHandleTestCase objects, which function A returns as the output argument test_suite.

TestRunMonitor

The abstract TestRunMonitor class defines the interface for an object that "observe" the in-progress execution of a test suite. MATLAB xUnit provides two subclasses of TestRunMonitor:

  • TestRunLogger silently logs test suite events and captures the details of any test failures or test errors.
  • CommandWindowTestRunDisplay prints the progress of an executing test suite to the Command Window.

A TestRunMonitor is passed to the run() method of a TestComponent object. The run() method calls the appropriate notification methods of the monitor.

Here is the output when using the CommandWindowTestRunDisplay object on the MATLAB xUnit's own test suite:

  runtests
  Starting test run with 92 test cases.
  ....................
  ....................
  ....................
  ....................
  ............
  PASSED in 7.040 seconds.

File System Test Runner

MATLAB xUnit provides a command-line File System Test Runner called runtests. When called with no input arguments, runtests gathers all the test cases from the current directory and runs them, summarizing the results to the Command Window. runtests can also take a string argument specifying which test file, and optionally which specific test case, to run.

Test Selection

Test selection is supported in runtests by passing in a string of the form:

   'Location:Name'

or just:

   'Location'

Both of these forms are handled by runtests and by TestSuite.fromName.

'Location' is the name of the M-file containing test cases. 'Name' is the name of a specific test case. Normally, the name of the test case is the name of the corresponding TestCase method. For FunctionHandleTestCase objects, though, 'Name' is the subfunction name.

Assertion Methods

MATLAB xUnit provides the following assertion methods:

  • Stated Outcome Assertion (assertTrue, assertFalse)
  • Equality Assertion (assertEqual)
  • Fuzzy Equality Assertion (assertElementsAlmostEqual, assertVectorsAlmostEqual)
  • Expected Exception Assertion (assertExceptionRaised)

Assertion functions are provided via globally accessible names (e.g., assertEqual). The assertion functions could be moved to the xunit package, but MATLAB users are not accustomed yet to packages and package name-scoping syntax.

'message' is the last input to the assertion functions and is optional. (See below for discussion of Assertion Roulette.)

The Expected Exception Assertion, assertExceptionRaised is used by forming an anonymous function handle from an expression that is expected to error, and then passing that function handle to assertExceptionRaised along with the expected exception identifier. For example:

  f = @() sin(1,2,3);
  assertExceptionRaised(f, 'MATLAB:maxrhs')

By using this mechanism, test writers can verify exceptions without using try-catch logic in their test code.

Stack Traces and "Assertion Roulette"

xUnit Test Patterns explains the smell Assertion Roulette this way: "It is hard to tell which of several assertions within the same test method caused a test failure.

MATLAB xUnit mitigates against Assertion Roulette by capturing the entire stack trace, including line numbers, for every test failure and test error. (The MATLAB MException object, which you obtain via the catch clause, contains the stack trace.) The stack trace is displayed to the Command Window, with clickable links that load the corresponding M-file into editor at the appropriate line number.

Stack traces can be pretty long, though. Also, test framework plumbing tends to occupy the trace in between the assertion and the user's test code, thus making the trace hard to interpret for less-experienced users. MATLAB xUnit, therefore, uses a stack filtering heuristic for displaying test fault traces: Starting at the deepest call level, once the trace leaves MATLAB xUnit framework functions, all further framework functions are filtered out of the stack trace.

Here's an example of stack trace display in the output of runtests:

>> runtests testSample
Starting test run with 1 test case.
F
FAILED in 0.081 seconds.

===== Test Case Failure =====
Location: c:\work\matlab_xunit\architecture\testSample.m
Name: testMyCode

c:\work\matlab_xunit\architecture\testSample.m at line 6

Input elements are not all equal within relative tolerance: 1.49012e-008

First input:
1

Second input:
1.1000

Clicking on the blue, underlined link above loads the corresponding file into the editor, positioned at the appropriate line.

Extending the Framework

The MATLAB xUnit framework can be extended primarily by subclassing TestCase, TestSuite, and TestMonitor.

TestCase can be subclassed to enable a new set of test cases that all share some particular behavior. The MATLAB xUnit Test Framework contains three examples of extending TestCase behavior in this way:

  • FunctionHandleTestCase provides the ability to define test cases based on procedural function handles.
  • TestCaseInDir defines a test case that must be run inside a particular directory. The setUp and tearDown functions are overridden to change the MATLAB working directory before running the test case, and then to restore the original working directory when the test case finished. The class is used by the framework's own test suite.
  • TestCaseInPath defines a test case that must be run with a particular directory temporarily added to the MATLAB path. Its implementation is similar to TestCaseInDir, and it is also used by the framework's own test suite.

TestSuite could be similarly extended by subclassing. This might a provide a way in the future to define a test suite containing collections of test components in separate directories, which is not currently supported.

Finally TestRunMonitor could be subclassed to support a variety of test monitoring mechanisms, such as what might be required by a Graphical Test Runner.

================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/architecture/matlab_xunit_architecture.m ================================================ %% MATLAB xUnit Test Framework: Architectural Notes % This document summarizes the key classes and design choices for MATLAB xUnit, % a MATLAB unit testing framework based on xUnit patterns. % % Note: Testing pattern and smell terminology in this document is drawn from % _xUnit Test Patterns: Refactoring Test Code_, by Gerard Meszaros, % Addison-Wesley, 2007. %% TestComponent, TestCase, and TestSuite % % <> % % The abstract |TestComponent| class defines an object that has a description (a % name and a location) and that can be run. % % A |TestCase| object is a test component that defines an individual test case % that can be run with a pass or fail result. % % A |TestSuite| object is a test component that contains a collection of other % test components. Note the hierarchical nature of test suites; they can % contain both individual test case objects as well as other test suites. % Running a test suite means invoking the |run| method on each test component in % its collection. %% TestCase: The Four-Phase Test % % The TestCase class provides the standard xUnit _Four-Phase Test_, using % a _Fresh Fixture_, _Implicit Setup_, and _Implicit Teardown_. These all % elements can all be seen in the |run| method of TestCase: % % function did_pass = run(self, monitor) % %run Execute the test case % % test_case.run(monitor) calls the TestCase object's setUp() % % method, then the test method, then the tearDown() method. % % observer is a TestRunObserver object. The testStarted(), % % testFailure(), testError(), and testFinished() methods of % % observer are called at the appropriate times. monitor is a % % TestRunMonitor object. Typically it is either a TestRunLogger % % subclass or a CommandWindowTestRunDisplay subclass. % % % % test_case.run() automatically uses a % % CommandWindowTestRunDisplay object in order to print test % % suite execution information to the Command Window. % % if nargin < 2 % monitor = CommandWindowTestRunDisplay(); % end % % did_pass = true; % monitor.testComponentStarted(self); % % try % self.setUp(); % f = str2func(self.MethodName); % % try % % Call the test method. % f(self); % catch failureException % monitor.testCaseFailure(self, failureException); % did_pass = false; % end % % self.tearDown(); % % catch errorException % monitor.testCaseError(self, errorException); % did_pass = false; % end % % monitor.testComponentFinished(self, did_pass); % end % % Phase 1 sets up the test fixture via the _Implicit Setup_ call, |self.setUp()|. % The base class |setUp()| method does nothing. % % Phases 2 and 3 (exercising the system under test and verifying the expected % outcome) are handled by the test method, which is invoked by |f(self)|. % % Phase 4 tears down the test fixture via the _Implicit Teardown_ call, % |self.tearDown()|. The base class |tearDown()| method does nothing. % % Test failure and test error exceptions are caught and handled by the |run()| % method, so test methods do not need to use try-catch. This facilitates % simple, straight-line test-method code. % % _Note: The |monitor| object will be discussed later._ %% Test Case Discovery % The static method |TestSuite.fromName| constructs a test suite based on the % name of an M-file. If the M-file defines a |TestCase| subclass, then |fromName| % inspects the methods of the class and constructs a |TestCase| object for each % method whose name begins with "[tT]est". If the M-file does not define a % |TestCase| subclass, then |fromName| attempts to construct either a simple % procedural test case or a set of subfunction-based test cases. (See the next % section). % % The static method |TestSuite.fromPwd| constructs a test suite by discovering % all the test cases in the present working directory. It discovers all % |TestCase| subclasses in the directory. In addition, it constructs test suites % from all the procedural M-files in the directory beginning with "[tT]est". % % The _File System Test Runner_, |runtests|, provides convenient syntaxes for % performing test case discovery automatically. %% FunctionHandleTestCase: For the Procedural World % Most MATLAB users are much more comfortable with procedural programming. An % important design goal for MATLAB xUnit is to make it as easy as possible for MATLAB % users with little object-oriented programming experience to create and run % their own tests. The FunctionHandleTestCase supplies the plumbing necessary % to support procedural test functions: % % <> % % Private properties |SetupFcn|, |TestFcn|, and |TeardownFcn| are procedural % _function handles_ (similar to function pointers or function references in % other languages). % % |runTestCase()| is the test method used for constructing a TestCase object. % % Managing test fixtures requires special consideration, because procedural % function handles don't have access to object instance data in order to access % a test fixture. % % The overridden |setUp()| method looks at the number of outputs of the function % handle |SetupFcn|. If it has an output argument, then the argument is saved % in the private |TestData| property, and |TestData| is then passed to both % |TestFcn| and |TeardownFcn| for their use. %% Writing Procedural Test Cases % Procedural test cases can be written in two ways: % % * A simple M-file function that is treated as a single test case % * An M-file containing multiple subfunctions that are each treated as a test case. % % In either case, the test % case is considered to pass if it executes without error. % % Writing one test case per file is not ideal; it would lead to either zillions % of tiny little test files, or long test methods exhibiting various bad test % smells (_Multiple Test Conditions_, _Flexible Test_, _Conditional Test Logic_, % _Eager Test_, _Obscure Test_, etc.) So we need a way to write multiple test % cases in a single procedural M-file. The natural MATLAB way would be to use % subfunctions. % % However, subfunction-based test cases require special consideration. Consider % the following M-file structure: % % === File A.m === % function A % ... % % function B % ... % % function C % ... % % function D % ... % % The first function in the file, |A|, has the same name as the file. When % other code outside this function calls |A|, it is this first function that % gets called. Functions |B|, |C|, and |D| are called _subfunctions_. % Normally, these subfunctions are only visible to and can only be called by % |A|. The only way that code elsewhere might be able to call |B|, |C|, or |D| % is if function |A| forms handles to them and passes those handles out of its % scope. Normally this would be done by returning the function handles as % output arguments. % % Note that no code executing outside the scope of a function in A.m can form % function handles to |B|, |C|, or |D|, or can even determine that these % functions exist. % % This obviously poses a problem for test discovery! % % The MATLAB xUnit solution is to establish the following convention for % subfunction-based tests. The first function in a test M-file containing % subfunction tests has to begin with these lines: % % === File A.m === % function test_suite = A % initTestSuite; % ... % % |initTestSuite| is a _script_ that runs in the scope of the function |A|. % |initTestSuite| determines which subfunctions are test functions, as well as setup % or teardown functions. It forms handles to these functions and constructs a % set of FunctionHandleTestCase objects, which function |A| returns as the % output argument |test_suite|. %% TestRunMonitor % The abstract |TestRunMonitor| class defines the interface for an object that % "observe" the in-progress execution of a test suite. MATLAB xUnit provides two % subclasses of |TestRunMonitor|: % % * |TestRunLogger| silently logs test suite events and captures the details of % any test failures or test errors. % * |CommandWindowTestRunDisplay| prints the progress of an executing test suite % to the Command Window. % % <> % % A TestRunMonitor is passed to the |run()| method of a TestComponent object. % The |run()| method calls the appropriate notification methods of the % monitor. % % Here is the output when using the CommandWindowTestRunDisplay object on the % MATLAB xUnit's own test suite: % % runtests % Starting test run with 92 test cases. % .................... % .................... % .................... % .................... % ............ % PASSED in 7.040 seconds. %% File System Test Runner % MATLAB xUnit provides a command-line _File System Test Runner_ called % |runtests|. When called with no input arguments, |runtests| gathers all the % test cases from the current directory and runs them, summarizing the results % to the Command Window. |runtests| can also take a string argument specifying % which test file, and optionally which specific test case, to run. %% Test Selection % Test selection is supported in |runtests| by passing in a string of the form: % % 'Location:Name' % % or just: % % 'Location' % % Both of these forms are handled by |runtests| and by |TestSuite.fromName|. % % 'Location' is the name of the M-file containing test cases. 'Name' is the % name of a specific test case. Normally, the name of the test case is the name % of the corresponding TestCase method. For FunctionHandleTestCase objects, % though, 'Name' is the subfunction name. %% Assertion Methods % MATLAB xUnit provides the following assertion methods: % % * _Stated Outcome Assertion_ (|assertTrue|, |assertFalse|) % * _Equality Assertion_ (|assertEqual|) % * _Fuzzy Equality Assertion_ (|assertElementsAlmostEqual|, |assertVectorsAlmostEqual|) % * _Expected Exception Assertion_ (|assertExceptionRaised|) % % Assertion functions are provided via globally accessible names (e.g., % |assertEqual|). The assertion functions could be moved to the |xunit| % package, but MATLAB users are not accustomed yet to packages and package % name-scoping syntax. % % 'message' is the last input to the assertion functions and is optional. (See % below for discussion of _Assertion Roulette_.) % % The _Expected Exception Assertion_, |assertExceptionRaised| is used by forming % an anonymous function handle from an expression that is expected to error, and % then passing that function handle to |assertExceptionRaised| along with the % expected exception identifier. For example: % % f = @() sin(1,2,3); % assertExceptionRaised(f, 'MATLAB:maxrhs') % % By using this mechanism, test writers can verify exceptions without using % try-catch logic in their test code. %% Stack Traces and "Assertion Roulette" % _xUnit Test Patterns_ explains the smell _Assertion Roulette_ this way: "It is % hard to tell which of several assertions within the same test method caused a % test failure. % % MATLAB xUnit mitigates against _Assertion Roulette_ by capturing the entire stack % trace, including line numbers, for every test failure and test error. (The % MATLAB MException object, which you obtain via the |catch| clause, contains % the stack trace.) The stack trace is displayed to the Command Window, with % clickable links that load the corresponding M-file into editor at the % appropriate line number. % % Stack traces can be pretty long, though. Also, test framework plumbing tends % to occupy the trace in between the assertion and the user's test code, thus % making the trace hard to interpret for less-experienced users. MATLAB xUnit, % therefore, uses a stack filtering heuristic for displaying test fault traces: % Starting at the deepest call level, once the trace leaves MATLAB xUnit framework % functions, all further framework functions are filtered out of the stack % trace. % % Here's an example of stack trace display in the output of |runtests|: % % % % >> runtests testSample
% Starting test run with 1 test case.
% F
% FAILED in 0.081 seconds.
%
% ===== Test Case Failure =====
% Location: c:\work\matlab_xunit\architecture\testSample.m
% Name: testMyCode
%
% c:\work\matlab_xunit\architecture\testSample.m at line 6
%
% Input elements are not all equal within relative tolerance: 1.49012e-008
%
% First input:
% 1
%
% Second input:
% 1.1000
%
% % % Clicking on the blue, underlined link above loads the corresponding file into % the editor, positioned at the appropriate line. %% Extending the Framework % The MATLAB xUnit framework can be extended primarily by subclassing |TestCase|, % |TestSuite|, and |TestMonitor|. % % |TestCase| can be subclassed to enable a new set of test cases that all share % some particular behavior. The MATLAB xUnit Test Framework contains three % examples of extending |TestCase| behavior in this way: % % * |FunctionHandleTestCase| provides the ability to define test cases based on % procedural function handles. % * |TestCaseInDir| defines a test case that must be run inside a particular % directory. The |setUp| and |tearDown| functions are overridden to change the % MATLAB working directory before running the test case, and then to restore the % original working directory when the test case finished. The class is used by % the framework's own test suite. % * |TestCaseInPath| defines a test case that must be run with a particular % directory temporarily added to the MATLAB path. Its implementation is similar % to |TestCaseInDir|, and it is also used by the framework's own test suite. % % |TestSuite| could be similarly extended by subclassing. This might a provide a % way in the future to define a test suite containing collections of test % components in separate directories, which is not currently supported. % % Finally |TestRunMonitor| could be subclassed to support a variety of test % monitoring mechanisms, such as what might be required by a _Graphical Test % Runner_. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/architecture/testSample.m ================================================ function test_suite = testSample initTestSuite; function testMyCode assertEqual(1, 1); assertElementsAlmostEqual(1, 1.1); assertTrue(10 == 10); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/+abc/+tests/test_that.m ================================================ % Do-nothing test used in the examples for organizing tests inside packages. % % Steven L. Eddins % Copyright 2010 The MathWorks, Inc. function test_that ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/+abc/+tests/test_this.m ================================================ % Do-nothing test used in the examples for organizing tests inside packages. % % Steven L. Eddins % Copyright 2010 The MathWorks, Inc. function test_this ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/+abc_tests/test_that.m ================================================ % Do-nothing test used in the examples for organizing tests inside packages. % % Steven L. Eddins % Copyright 2010 The MathWorks, Inc. function test_that ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/+abc_tests/test_this.m ================================================ % Do-nothing test used in the examples for organizing tests inside packages. % % Steven L. Eddins % Copyright 2010 The MathWorks, Inc. function test_this ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/exException.m ================================================ %% <../index.html MATLAB xUnit Test Framework>: How to Test an Error Message % It's surprising to most people (but not quality engineers) how % often programmers make errors in error-handling code. Because of % this unfortunate truth, it is useful to write unit tests that % verify that your MATLAB code throws the proper error, at the % proper time. % % The assertion function that makes this task easy is % |assertExceptionThrown|. This example shows how to write a unit % test that verifies the "Too many input arguments" error for the % |cos| function. % % Your first step is to determine the _error identifier_ associated % with the error message. You can find out the error identifier by % using the |lasterror| function. % % If you call |cos| with two input arguments, like this: % % cos(1, 2) % % you get this error message: % % Error using ==> cos % Too many input arguments. % % Then if you call |lasterror|, you get this output: % % ans = % % message: [1x45 char] % identifier: 'MATLAB:maxrhs' % stack: [0x1 struct] % % So the _identifier_ associated with this error message is % |'MATLAB:maxrhs'|. % % When you write your test function, you'll form an anonymous % function handle that calls |cos| with the erroneous additional % input argument. f = @() cos(1, 2) %% % You then pass this function to |assertExceptionThrown|, along with % the expected error identifier. assertExceptionThrown(f, 'MATLAB:maxrhs'); %% % |assertExceptionThrown| verifies that when |f()| is called, an % error results with the specified error identifier. % % Here's our error condition test for the |cos| function. cd examples_general type testCos %% % Run the test using |runtests|. runtests testCos %% % <../index.html Back to MATLAB xUnit Test Framework> %% % Copyright 2008-2010 The MathWorks, Inc. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/exQuickStart.m ================================================ %% <../index.html MATLAB xUnit Test Framework>: How to Write and Run Tests % This example shows how to write and run a couple of test cases for the MATLAB % |fliplr| function. %% Make a folder for your tests % To get started, create a folder (directory) that will contain your tests, and % then make that your working folder. The test directory in this example is % example_quick_start. cd example_quick_start %% Write each test case as a simple M-file % Write each test case as an M-file function that returns no output arguments. % The function name should start or end with "test" or "Test". The test case % passes if the function runs with no error. % % Here's a test-case M-file that verifies the correct output for a vector input. type testFliplrVector %% % The function |testFliplrVector| calls the function being tested and checks the % output against the expected output. If the output is different than expected, % the function calls |error|. % % Here's another test-case M-file that verifies the correct |fliplr| output for % a matrix input. type testFliplrMatrix %% % This function is simpler than |testFliplrVector| because it uses the utility % testing function |assertEqual|. |assertEqual| checks to see whether its two % inputs are equal. If they are equal, |assertEqual| simply returns silently. % If they are not equal, |assertEqual| calls |error|. %% Run all the tests using |runtests| % To run all your test cases, simply call |runtests|. |runtests| automatically finds % all the test cases in the current directory, runs them, and reports the % results to the Command Window. runtests %% % <../index.html Back to MATLAB xUnit Test Framework> %% % Copyright 2008-2010 The MathWorks, Inc. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/exRunSpecificTest.m ================================================ %% <../index.html MATLAB xUnit Test Framework>: How to Run a Specific Test % To run all the test cases in just one M-file, ignoring other test % cases that might be in other files in the same directory, give % the name of the file (without the ".m" extension) as an argument % to |runtests|. % % For example cd example_subfunction_tests runtests testFliplr %% % To run a single test case, add the name of the test case using a % colon (":"), like this: runtests testFliplr:testFliplrVector %% % <../index.html Back to MATLAB xUnit Test Framework> %% % Copyright 2008-2010 The MathWorks, Inc. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/exRunTestsInADirectory.m ================================================ %% <../index.html MATLAB xUnit Test Framework>: How to Run Tests in Specific Directories % To run all the test cases in a specific directory, give the name of the % directory as an argument to |runtests|. % % For example runtests example_subfunction_tests %% % To run tests in multiple directories, give each directory name as a separate % argument to |runtests|. %% % <../index.html Back to MATLAB xUnit Test Framework> %% % Copyright 2008-2010 The MathWorks, Inc. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/exRunTestsInPackage.m ================================================ %% <../index.html MATLAB xUnit Test Framework>: How to Run Tests in a Package % To run all the test cases in a package, give the name of the % package as an argument to |runtests|. *Note:* Running tests in a package % requires MATLAB R2009a or later. % % For example, suppose you are distributing a set of MATLAB files called the % "ABC Toolbox." Then you could put your tests inside a package called abc_tests % and run them like this: runtests abc_tests %% % (Note that the initial "+" character in the name of the package folder on disk % is not part of the package name.) % % Or you could put your tests inside a subpackage called abc.tests and run them % like this: runtests abc.tests %% % You should not use a generic top-level package name such "tests" because then % your package might be unintentionally combined with packages with the same % name created by other people. %% % <../index.html Back to MATLAB xUnit Test Framework> %% % Copyright 2010 The MathWorks, Inc. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/exSilentRunning.m ================================================ %% <../index.html MATLAB xUnit Test Framework>: How to Run Tests Silently and Query the Results % When you run a test suite using |runtests|, the results are % summarized in the Command Window. This example shows you how to % run a test suite so that nothing prints to the Command Window, and % it shows you how to write a program to automatically determine the % results of running the test suite. % % There are four steps to follow. % % 1. Construct a |TestSuite| object. In this example we'll use the |fromPwd| % method of the |TestSuite| class to construct a test suite using all the test % cases found in the |examples_general| directory. cd examples_general suite = TestSuite.fromPwd(); %% % You can look up information about the individual test cases. suite.TestComponents{1} %% % You can see above that the first test component in the test suite is itself % another test suite, which contains the test cases defined by the M-file named % TestUsingTestCase. Here's what one of these individual test cases looks like: suite.TestComponents{1}.TestComponents{1} %% % 2. Construct a TestLogger object. This object can receive % notifications about what happens when a test suite is executed. logger = TestRunLogger; %% % 3. Call the |run| method of the |TestSuite| object, passing it the % logger. suite.run(logger); %% % The |TestLogger| object can now be queried to determine what % happened during the test. logger %% % There were eight test cases run (logger.NumTestCases), resulting in % one test failure and one test error. Detailed information about % what went wrong can be found in |logger.Faults|. logger.Faults(1) %% logger.Faults(2) %% % You can drill further to determine the names of the failing tests, % as well as the complete stack trace associated with each failure. logger.Faults(1).TestCase %% logger.Faults(1).Exception.stack(1) %% logger.Faults(1).Exception.stack(2) %% % <../index.html Back to MATLAB xUnit Test Framework> %% % Copyright 2008-2010 The MathWorks, Inc. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/exSubfunctionTests.m ================================================ %% <../index.html MATLAB xUnit Test Framework>: How to Put Multiple Test Cases in One M-file % The Quick Start example showed how you can write a simple M-file % to be a single test case. This example shows you how to put multiple % test cases in one M-file. % % Name your M-file beginning or ending with "test", like % "testMyFunc". Start by putting the following two lines at the % beginning of the file. It's important that the output variable % name on line 1 be |test_suite|. % % function test_suite = testMyFunc % initTestSuite; % % Next, add subfunctions to the file. Each subfunction beginning % or ending with "test" becomes an individual test case. % % The directory example_subfunction_tests contains a test M-file % containing subfunction test cases for the |fliplr| function. cd example_subfunction_tests type testFliplr %% % As usual, run the test cases using |runtests|: runtests %% % <../index.html Back to MATLAB xUnit Test Framework> %% % Copyright 2008-2010 The MathWorks, Inc. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/exTestCase.m ================================================ %% <../index.html MATLAB xUnit Test Framework>: How to Write xUnit-Style Tests by Subclassing TestCase % The MATLAB xUnit architecture is based closely on the xUnit style, in % which each test case is an instance of a subclass of the base % TestCase class. Programmers who are familiar with this style may % want to write their own TestCase subclasses instead of using % <./exSubfunctionTests.html subfunction-based tests>. % % This example shows a TestCase subclass containing test case % methods and test fixture methods. If you are not familiar with % defining your own classes in MATLAB, you might want to review the % MATLAB documentation on % , % or you can simply stick to using subfunction-based tests. % % The sample M-file begins with the |classdef| statement, which sets % the name of the class and indicates that it is a subclass of % |TestCase|. cd examples_general dbtype TestUsingTestCase 1 %% % The properties block contains a field that is initialized by the % setup method and is used by the two test methods. dbtype TestUsingTestCase 3:5 %% % The first method in the methods block is the constructor. It % takes the desired test method name as its input argument, and it % passes that input along to the base class constructor. dbtype TestUsingTestCase 7:10 %% % The |setUp| method creates a figure window and stores its handle in % the field |fh|. dbtype TestUsingTestCase 12:14 %% % Test methods are those beginning with "test". dbtype TestUsingTestCase 20:26 %% % The |tearDown| method cleans up by deleting the figure window. dbtype TestUsingTestCase 16:18 %% % Run the test cases in the class by calling |runtests| with the name % of the class. runtests TestUsingTestCase %% % <../index.html Back to MATLAB xUnit Test Framework> %% % Copyright 2008-2010 The MathWorks, Inc. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/exTestCaseSearching.m ================================================ %% <../index.html MATLAB xUnit Test Framework>: How RUNTESTS Searches for Test Cases % When you call |runtests| with no input arguments: % % >> runtests % % it automatically searches for all the test cases in the current directory. It % looks for test cases in three types of M-files: % % 1. An M-file function whose name begins or ends with "test" or "Test" and that does % not return an output argument. Such a function is considered to be a single % test case. % % 2. An M-file function whose name begins or ends with "test" or "Test" and that returns % an output argument that is a test suite. Such a function is considered to contain % subfunction-style test cases. Each subfunction whose name begins or ends with "test" % or "Test" is a test case. % % 3. An M-file that defines a subclass of TestCase. Each method beginning or ending with % "test" or "Test" is a test case. % % |runtests| uses the |TestSuite| static methods |fromName| and |fromPwd| to % automatically construct the test suites. % % Here are a couple of examples. % % |TestSuite.fromName| takes an M-file name, determines what % kind of test file it is, and returns a cell array of test case objects. cd examples_general test_suite_1 = TestSuite.fromName('testSetupExample') %% % |TestSuite.fromPwd| returns a test suite based on all the test files in the % current directory. test_suite_2 = TestSuite.fromPwd() %% % <../index.html Back to MATLAB xUnit Test Framework> %% % Copyright 2008-2010 The MathWorks, Inc. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/exTestFixtures.m ================================================ %% <../index.html MATLAB xUnit Test Framework>: How to Write Tests That Share Common Set-Up Code % Sometimes you want to write a set of test cases in which the same % set of initialization steps is performed before each test case, or % in which the same set of cleanup steps is performed after each % test case. This set of common _setup_ and _teardown_ code is % called a _test fixture_. % % In subfunction-based test files, you can add subfunctions whose % names begin with "setup" and "teardown". These functions will be % called before and after every test-case subfunction is called. If % the setup function returns an output argument, that value is saved % and passed to every test-case subfunction and also to the teardown % function. % % This example shows a setup function that creates a figure and % returns its handle. The figure handle is passed to each test-case % subfunction. The figure handle is also passed to the teardown % function, which cleans up after each test case by deleting the % figure. cd examples_general type testSetupExample %% % Run the tests using |runtests|. runtests testSetupExample %% % You might also want to see the % <./exTestCase.html example on writing test cases by % subclassing TestCase>. %% % <../index.html Back to MATLAB xUnit Test Framework> %% % Copyright 2008-2010 The MathWorks, Inc. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/exTolerance.m ================================================ %% <../index.html MATLAB xUnit Test Framework>: How to Test Using a Floating-Point Tolerance % MATLAB performs arithmetic operations using the floating-point % hardware instructions on your processor. Because % almost all floating-point operations are subject to round-off % error, arithmetic operations can sometimes produce surprising % results. Here's an example. a = 1 + 0.1 + 0.1 + 0.1 %% a == 1.3 %% % So why doesn't |a| equal 1.3? Because 0.1, 1.3, and most other % decimal fractions do not have exact representations in the binary % floating-point number representation your computer uses. The % first line above is doing an approximate addition of 1 plus an % approximation of 0.1, plus an approximation of 0.1, plus an % approximation of 0.1. The second line compares the result of all % that with an approximation of 1.3. % % If you subtract 1.3 from |a|, you can see that the computed result % for |a| is _extremely close_ to the floating-point approximation % of 1.3, but it is not exactly the same. a - 1.3 %% % As a general rule, when comparing the results of floating-point % calculations for equality, it is necessary to use a tolerance % value. Two types of tolerance comparisons are commonly used: absolute % tolerance and relative tolerance. An absolute tolerance comparison of _a_ and _b_ % looks like: % % $$|a-b| \leq T$$ % % A relative tolerance comparison looks like: % % $$|a-b| \leq T\max(|a|,|b|) + T_f$$ % % where _Tf_ is called the _floor tolerance_. It acts as an absolute tolerance % when _a_ and _b_ are very close to 0. % % For example, suppose that _a_ is 100, _b_ is 101, and T is 0.1. Then _a_ and % _b_ would not be considered equal using an absolute tolerance, because 1 > % 0.1. However, _a_ and _b_ would be considered equal using a relative % tolerance, because they differ by only 1 part in 100. % % MATLAB xUnit provides the utility assertion functions called % |assertElementsAlmostEqual| and |assertVectorAlmostEqual|. These functions % make it easy to write tests involving floating-point tolerances. % % |assertElementsAlmostEqual(A,B)| applies the tolerance test independently to % every element of |A| and |B|. The function uses a relative tolerance test by % default, but you make it use an absolute tolerance test, or change the % tolerance values used, by passing additional arguments to it. % % |assertVectorsAlmostEqual(A,B)| applies the tolerance test to the vectors |A| % and |B| in the L2-norm sense. For example, suppose |A| is |[1 1e10|], |B| % is |[2 1e10]|, and the tolerance is 1e-8. Then |A| and |B| would fail an % elementwise relative tolerance comparison, because the relative difference % between the first elements is 0.5. However, they would pass a vector relative % tolerance comparison, because the relative vector difference between |A| and % |B| is only about 1 part in 1e10. % % The |examples_general| directory contains a portion of a unit test for the % |sin| function. The output of |sin| can sometimes be a bit surprising because % of floating-point issues. For example: sin(pi) %% % That's very close but not exactly equal to 0. Here's how the % |sin| unit test uses |assertElementsAlmostEqual| to write the |sin(pi)| % test with a minimum of fuss. cd examples_general type testSin %% % Run the test using |runtests|. runtests testSin %% % <../index.html Back to MATLAB xUnit Test Framework> %% % Copyright 2008-2010 The MathWorks, Inc. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/example_quick_start/testFliplrMatrix.m ================================================ function testFliplrMatrix %testFliplrMatrix Unit test for fliplr with matrix input in = magic(3); assertEqual(fliplr(in), in(:, [3 2 1])); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/example_quick_start/testFliplrVector.m ================================================ function testFliplrVector %testFliplrVector Unit test for fliplr with vector input in = [1 4 10]; out = fliplr(in); expected_out = [10 4 1]; if ~isequal(out, expected_out) error('testFliplrVector:notEqual', 'Incorrect output for vector.'); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/example_subfunction_tests/testFliplr.m ================================================ function test_suite = testFliplr initTestSuite; function testFliplrMatrix in = magic(3); assertEqual(fliplr(in), in(:, [3 2 1])); function testFliplrVector assertEqual(fliplr([1 4 10]), [10 4 1]); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/examples_general/TestUsingTestCase.m ================================================ classdef TestUsingTestCase < TestCase properties fh end methods function self = TestUsingTestCase(name) self = self@TestCase(name); end function setUp(self) self.fh = figure; end function tearDown(self) delete(self.fh); end function testColormapColumns(self) assertEqual(size(get(self.fh, 'Colormap'), 2), 3); end function testPointer(self) assertEqual(get(self.fh, 'Pointer'), 'arrow'); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/examples_general/testBadSinTest.m ================================================ function test_suite = testBadSinTest initTestSuite; function testSinPi % Example of a failing test case. The test writer should have used % assertAlmostEqual here. assertEqual(sin(pi), 0); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/examples_general/testCos.m ================================================ function test_suite = testCos initTestSuite; function testTooManyInputs assertExceptionThrown(@() cos(1, 2), 'MATLAB:maxrhs'); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/examples_general/testSetupExample.m ================================================ function test_suite = testSetupExample initTestSuite; function fh = setup fh = figure; function teardown(fh) delete(fh); function testColormapColumns(fh) assertEqual(size(get(fh, 'Colormap'), 2), 3); function testPointer(fh) assertEqual(get(fh, 'Pointer'), 'arrow'); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/examples_general/testSin.m ================================================ function testSin assertElementsAlmostEqual(sin(pi), 0); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/examples_general/testWithSetupError.m ================================================ function test_suite = testWithSetupError %Example of a test with an error. The setup function calls cos with %too many input arguments. initTestSuite; function testData = setup testData = cos(1, 2); function testMyFeature(testData) assertEqual(1, 1); function teardown(testData) ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/file_exchange_description.txt ================================================ MATLAB xUnit Test Framework is a unit test framework for MATLAB code. MATLAB xUnit is designed to be easy to use for MATLAB users with a wide range of experience. Users can write tests using ordinary M-files that are very simple in structure. MATLAB xUnit comes with extensive documentation that ranges in scope from a "Getting Started" section to advanced techniques and architectural notes. You can view this documentation online without downloading the package. For example, scroll down to the "Published M Files" section on this page and click on "MATLAB xUnit Quick Start - How to write and run tests." To see all the MATLAB xUnit documentation online, scroll down to the "HTML Files" section on this page and click on "Readme.html." Only the "xunit" directory is needed to use the framework. The "tests" directory contains the framework's own test suite. The "architecture" directory contains architectural notes on the framework's design and how it might be extended. MATLAB xUnit can be used with MATLAB releases R2008a and later. MATLAB xUnit relies heavily on object-oriented language features introduced in R2008a and will not work with earlier releases. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/html/exException.html ================================================ MATLAB xUnit Test Framework: How to Test an Error Message

MATLAB xUnit Test Framework: How to Test an Error Message

It's surprising to most people (but not quality engineers) how often programmers make errors in error-handling code. Because of this unfortunate truth, it is useful to write unit tests that verify that your MATLAB code throws the proper error, at the proper time.

The assertion function that makes this task easy is assertExceptionThrown. This example shows how to write a unit test that verifies the "Too many input arguments" error for the cos function.

Your first step is to determine the error identifier associated with the error message. You can find out the error identifier by using the lasterror function.

If you call cos with two input arguments, like this:

 cos(1, 2)

you get this error message:

 Error using ==> cos
 Too many input arguments.

Then if you call lasterror, you get this output:

 ans =
        message: [1x45 char]
     identifier: 'MATLAB:maxrhs'
          stack: [0x1 struct]

So the identifier associated with this error message is 'MATLAB:maxrhs'.

When you write your test function, you'll form an anonymous function handle that calls cos with the erroneous additional input argument.

f = @() cos(1, 2)
f = 

    @()cos(1,2)

You then pass this function to assertExceptionThrown, along with the expected error identifier.

assertExceptionThrown(f, 'MATLAB:maxrhs');

assertExceptionThrown verifies that when f() is called, an error results with the specified error identifier.

Here's our error condition test for the cos function.

cd examples_general
type testCos
function test_suite = testCos
initTestSuite;

function testTooManyInputs
assertExceptionThrown(@() cos(1, 2), 'MATLAB:maxrhs');

Run the test using runtests.

runtests testCos
Starting test run with 1 test case.
.
PASSED in 0.018 seconds.

Back to MATLAB xUnit Test Framework

================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/html/exQuickStart.html ================================================ MATLAB xUnit Test Framework: How to Write and Run Tests

MATLAB xUnit Test Framework: How to Write and Run Tests

This example shows how to write and run a couple of test cases for the MATLAB fliplr function.

Contents

Make a folder for your tests

To get started, create a folder (directory) that will contain your tests, and then make that your working folder. The test directory in this example is example_quick_start.

cd example_quick_start

Write each test case as a simple M-file

Write each test case as an M-file function that returns no output arguments. The function name should start or end with "test" or "Test". The test case passes if the function runs with no error.

Here's a test-case M-file that verifies the correct output for a vector input.

type testFliplrVector
function testFliplrVector
%testFliplrVector Unit test for fliplr with vector input

in = [1 4 10];
out = fliplr(in);
expected_out = [10 4 1];

if ~isequal(out, expected_out)
    error('testFliplrVector:notEqual', 'Incorrect output for vector.');
end

The function testFliplrVector calls the function being tested and checks the output against the expected output. If the output is different than expected, the function calls error.

Here's another test-case M-file that verifies the correct fliplr output for a matrix input.

type testFliplrMatrix
function testFliplrMatrix
%testFliplrMatrix Unit test for fliplr with matrix input

in = magic(3);
assertEqual(fliplr(in), in(:, [3 2 1]));

This function is simpler than testFliplrVector because it uses the utility testing function assertEqual. assertEqual checks to see whether its two inputs are equal. If they are equal, assertEqual simply returns silently. If they are not equal, assertEqual calls error.

Run all the tests using runtests

To run all your test cases, simply call runtests. runtests automatically finds all the test cases in the current directory, runs them, and reports the results to the Command Window.

runtests
Starting test run with 2 test cases.
..
PASSED in 0.002 seconds.

Back to MATLAB xUnit Test Framework

================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/html/exRunSpecificTest.html ================================================ MATLAB xUnit Test Framework: How to Run a Specific Test

MATLAB xUnit Test Framework: How to Run a Specific Test

To run all the test cases in just one M-file, ignoring other test cases that might be in other files in the same directory, give the name of the file (without the ".m" extension) as an argument to runtests.

For example

cd example_subfunction_tests

runtests testFliplr
Starting test run with 2 test cases.
..
PASSED in 0.023 seconds.

To run a single test case, add the name of the test case using a colon (":"), like this:

runtests testFliplr:testFliplrVector
Starting test run with 1 test case.
.
PASSED in 0.001 seconds.

Back to MATLAB xUnit Test Framework

================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/html/exRunTestsInADirectory.html ================================================ MATLAB xUnit Test Framework: How to Run Tests in Specific Directories

MATLAB xUnit Test Framework: How to Run Tests in Specific Directories

To run all the test cases in a specific directory, give the name of the directory as an argument to runtests.

For example

runtests example_subfunction_tests
Starting test run with 2 test cases.
..
PASSED in 0.062 seconds.

To run tests in multiple directories, give each directory name as a separate argument to runtests.

Back to MATLAB xUnit Test Framework

================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/html/exRunTestsInPackage.html ================================================ MATLAB xUnit Test Framework: How to Run Tests in a Package

MATLAB xUnit Test Framework: How to Run Tests in a Package

To run all the test cases in a package, give the name of the package as an argument to runtests. Note: Running tests in a package requires MATLAB R2009a or later.

For example, suppose you are distributing a set of MATLAB files called the "ABC Toolbox." Then you could put your tests inside a package called abc_tests and run them like this:

runtests abc_tests
Test suite: abc_tests
Test suite location: Package
19-Nov-2010 14:14:36

Starting test run with 2 test cases.
..
PASSED in 0.028 seconds.

(Note that the initial "+" character in the name of the package folder on disk is not part of the package name.)

Or you could put your tests inside a subpackage called abc.tests and run them like this:

runtests abc.tests
Test suite: abc.tests
Test suite location: Package
19-Nov-2010 14:14:36

Starting test run with 2 test cases.
..
PASSED in 0.001 seconds.

You should not use a generic top-level package name such "tests" because then your package might be unintentionally combined with packages with the same name created by other people.

Back to MATLAB xUnit Test Framework

================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/html/exSilentRunning.html ================================================ MATLAB xUnit Test Framework: How to Run Tests Silently and Query the Results

MATLAB xUnit Test Framework: How to Run Tests Silently and Query the Results

When you run a test suite using runtests, the results are summarized in the Command Window. This example shows you how to run a test suite so that nothing prints to the Command Window, and it shows you how to write a program to automatically determine the results of running the test suite.

There are four steps to follow.

1. Construct a TestSuite object. In this example we'll use the fromPwd method of the TestSuite class to construct a test suite using all the test cases found in the examples_general directory.

cd examples_general
suite = TestSuite.fromPwd();

You can look up information about the individual test cases.

suite.TestComponents{1}
ans = 

  TestSuite handle

  Properties:
    TestComponents: {[1x1 TestUsingTestCase]  [1x1 TestUsingTestCase]}
              Name: 'TestUsingTestCase'
          Location: [1x80 char]


You can see above that the first test component in the test suite is itself another test suite, which contains the test cases defined by the M-file named TestUsingTestCase. Here's what one of these individual test cases looks like:

suite.TestComponents{1}.TestComponents{1}
ans = 

  TestUsingTestCase handle

  Properties:
            fh: []
    MethodName: 'testPointer'
          Name: 'testPointer'
      Location: 'C:\Users\eddins\local-work\matlab_xunit\doc\examples_general\TestUsingTestCase.m'


2. Construct a TestLogger object. This object can receive notifications about what happens when a test suite is executed.

logger = TestRunLogger;

3. Call the run method of the TestSuite object, passing it the logger.

suite.run(logger);

The TestLogger object can now be queried to determine what happened during the test.

logger
logger = 

  TestRunLogger handle

  Properties:
             Log: {1x34 cell}
     NumFailures: 1
       NumErrors: 1
    NumTestCases: 8
          Faults: [1x2 struct]


There were eight test cases run (logger.NumTestCases), resulting in one test failure and one test error. Detailed information about what went wrong can be found in logger.Faults.

logger.Faults(1)
ans = 

         Type: 'failure'
     TestCase: [1x1 FunctionHandleTestCase]
    Exception: [1x1 MException]

logger.Faults(2)
ans = 

         Type: 'error'
     TestCase: [1x1 FunctionHandleTestCase]
    Exception: [1x1 MException]

You can drill further to determine the names of the failing tests, as well as the complete stack trace associated with each failure.

logger.Faults(1).TestCase
ans = 

  FunctionHandleTestCase handle

  Properties:
    MethodName: 'runTestCase'
          Name: 'testSinPi'
      Location: 'C:\Users\eddins\local-work\matlab_xunit\doc\examples_general\testBadSinTest.m'


logger.Faults(1).Exception.stack(1)
ans = 

    file: 'C:\Users\eddins\local-work\matlab_xunit\doc\examples_general\testBadSinTest.m'
    name: 'testSinPi'
    line: 7

logger.Faults(1).Exception.stack(2)
ans = 

    file: 'C:\Users\eddins\local-work\matlab_xunit\xunit\FunctionHandleTestCase.m'
    name: 'FunctionHandleTestCase.runTestCase'
    line: 112

Back to MATLAB xUnit Test Framework

================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/html/exSubfunctionTests.html ================================================ MATLAB xUnit Test Framework: How to Put Multiple Test Cases in One M-file

MATLAB xUnit Test Framework: How to Put Multiple Test Cases in One M-file

The Quick Start example showed how you can write a simple M-file to be a single test case. This example shows you how to put multiple test cases in one M-file.

Name your M-file beginning or ending with "test", like "testMyFunc". Start by putting the following two lines at the beginning of the file. It's important that the output variable name on line 1 be test_suite.

  function test_suite = testMyFunc
  initTestSuite;

Next, add subfunctions to the file. Each subfunction beginning or ending with "test" becomes an individual test case.

The directory example_subfunction_tests contains a test M-file containing subfunction test cases for the fliplr function.

cd example_subfunction_tests

type testFliplr
function test_suite = testFliplr
initTestSuite;

function testFliplrMatrix
in = magic(3);
assertEqual(fliplr(in), in(:, [3 2 1]));

function testFliplrVector
assertEqual(fliplr([1 4 10]), [10 4 1]);


As usual, run the test cases using runtests:

runtests
Starting test run with 2 test cases.
..
PASSED in 0.027 seconds.

Back to MATLAB xUnit Test Framework

================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/html/exTestCase.html ================================================ MATLAB xUnit Test Framework: How to Write xUnit-Style Tests by Subclassing TestCase

MATLAB xUnit Test Framework: How to Write xUnit-Style Tests by Subclassing TestCase

The MATLAB xUnit architecture is based closely on the xUnit style, in which each test case is an instance of a subclass of the base TestCase class. Programmers who are familiar with this style may want to write their own TestCase subclasses instead of using subfunction-based tests.

This example shows a TestCase subclass containing test case methods and test fixture methods. If you are not familiar with defining your own classes in MATLAB, you might want to review the MATLAB documentation on classes and object-oriented programming, or you can simply stick to using subfunction-based tests.

The sample M-file begins with the classdef statement, which sets the name of the class and indicates that it is a subclass of TestCase.

cd examples_general
dbtype TestUsingTestCase 1
1     classdef TestUsingTestCase < TestCase

The properties block contains a field that is initialized by the setup method and is used by the two test methods.

dbtype TestUsingTestCase 3:5
3         properties
4             fh
5         end

The first method in the methods block is the constructor. It takes the desired test method name as its input argument, and it passes that input along to the base class constructor.

dbtype TestUsingTestCase 7:10
7         methods
8             function self = TestUsingTestCase(name)
9                 self = self@TestCase(name);
10            end

The setUp method creates a figure window and stores its handle in the field fh.

dbtype TestUsingTestCase 12:14
12            function setUp(self)
13                self.fh = figure;
14            end

Test methods are those beginning with "test".

dbtype TestUsingTestCase 20:26
20            function testColormapColumns(self)
21                assertEqual(size(get(self.fh, 'Colormap'), 2), 3);
22            end
23    
24            function testPointer(self)
25                assertEqual(get(self.fh, 'Pointer'), 'arrow');
26            end

The tearDown method cleans up by deleting the figure window.

dbtype TestUsingTestCase 16:18
16            function tearDown(self)
17                delete(self.fh);
18            end

Run the test cases in the class by calling runtests with the name of the class.

runtests TestUsingTestCase
Starting test run with 2 test cases.
..
PASSED in 0.095 seconds.

Back to MATLAB xUnit Test Framework

================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/html/exTestCaseSearching.html ================================================ MATLAB xUnit Test Framework: How RUNTESTS Searches for Test Cases

MATLAB xUnit Test Framework: How RUNTESTS Searches for Test Cases

When you call runtests with no input arguments:

 >> runtests

it automatically searches for all the test cases in the current directory. It looks for test cases in three types of M-files:

1. An M-file function whose name begins or ends with "test" or "Test" and that does not return an output argument. Such a function is considered to be a single test case.

2. An M-file function whose name begins or ends with "test" or "Test" and that returns an output argument that is a test suite. Such a function is considered to contain subfunction-style test cases. Each subfunction whose name begins or ends with "test" or "Test" is a test case.

3. An M-file that defines a subclass of TestCase. Each method beginning or ending with "test" or "Test" is a test case.

runtests uses the TestSuite static methods fromName and fromPwd to automatically construct the test suites.

Here are a couple of examples.

TestSuite.fromName takes an M-file name, determines what kind of test file it is, and returns a cell array of test case objects.

cd examples_general
test_suite_1 = TestSuite.fromName('testSetupExample')
test_suite_1 = 

  TestSuite handle

  Properties:
    TestComponents: {[1x1 FunctionHandleTestCase]  [1x1 FunctionHandleTestCase]}
              Name: 'testSetupExample'
          Location: [1x79 char]


TestSuite.fromPwd returns a test suite based on all the test files in the current directory.

test_suite_2 = TestSuite.fromPwd()
test_suite_2 = 

  TestSuite handle

  Properties:
    TestComponents: {1x6 cell}
              Name: 'C:\Users\eddins\local-work\matlab_xunit\doc\examples_general'
          Location: 'C:\Users\eddins\local-work\matlab_xunit\doc\examples_general'


Back to MATLAB xUnit Test Framework

================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/html/exTestFixtures.html ================================================ MATLAB xUnit Test Framework: How to Write Tests That Share Common Set-Up Code

MATLAB xUnit Test Framework: How to Write Tests That Share Common Set-Up Code

Sometimes you want to write a set of test cases in which the same set of initialization steps is performed before each test case, or in which the same set of cleanup steps is performed after each test case. This set of common setup and teardown code is called a test fixture.

In subfunction-based test files, you can add subfunctions whose names begin with "setup" and "teardown". These functions will be called before and after every test-case subfunction is called. If the setup function returns an output argument, that value is saved and passed to every test-case subfunction and also to the teardown function.

This example shows a setup function that creates a figure and returns its handle. The figure handle is passed to each test-case subfunction. The figure handle is also passed to the teardown function, which cleans up after each test case by deleting the figure.

cd examples_general
type testSetupExample
function test_suite = testSetupExample
initTestSuite;

function fh = setup
fh = figure;

function teardown(fh)
delete(fh);

function testColormapColumns(fh)
assertEqual(size(get(fh, 'Colormap'), 2), 3);

function testPointer(fh)
assertEqual(get(fh, 'Pointer'), 'arrow');

Run the tests using runtests.

runtests testSetupExample
Starting test run with 2 test cases.
..
PASSED in 0.095 seconds.

You might also want to see the example on writing test cases by subclassing TestCase.

Back to MATLAB xUnit Test Framework

================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/html/exTolerance.html ================================================ MATLAB xUnit Test Framework: How to Test Using a Floating-Point Tolerance

MATLAB xUnit Test Framework: How to Test Using a Floating-Point Tolerance

MATLAB performs arithmetic operations using the floating-point hardware instructions on your processor. Because almost all floating-point operations are subject to round-off error, arithmetic operations can sometimes produce surprising results. Here's an example.

a = 1 + 0.1 + 0.1 + 0.1
a =

    1.3000

a == 1.3
ans =

     0

So why doesn't a equal 1.3? Because 0.1, 1.3, and most other decimal fractions do not have exact representations in the binary floating-point number representation your computer uses. The first line above is doing an approximate addition of 1 plus an approximation of 0.1, plus an approximation of 0.1, plus an approximation of 0.1. The second line compares the result of all that with an approximation of 1.3.

If you subtract 1.3 from a, you can see that the computed result for a is extremely close to the floating-point approximation of 1.3, but it is not exactly the same.

a - 1.3
ans =

  2.2204e-016

As a general rule, when comparing the results of floating-point calculations for equality, it is necessary to use a tolerance value. Two types of tolerance comparisons are commonly used: absolute tolerance and relative tolerance. An absolute tolerance comparison of a and b looks like:

$$|a-b| \leq T$$

A relative tolerance comparison looks like:

$$|a-b| \leq T\max(|a|,|b|) + T_f$$

where Tf is called the floor tolerance. It acts as an absolute tolerance when a and b are very close to 0.

For example, suppose that a is 100, b is 101, and T is 0.1. Then a and b would not be considered equal using an absolute tolerance, because 1 > 0.1. However, a and b would be considered equal using a relative tolerance, because they differ by only 1 part in 100.

MATLAB xUnit provides the utility assertion functions called assertElementsAlmostEqual and assertVectorAlmostEqual. These functions make it easy to write tests involving floating-point tolerances.

assertElementsAlmostEqual(A,B) applies the tolerance test independently to every element of A and B. The function uses a relative tolerance test by default, but you make it use an absolute tolerance test, or change the tolerance values used, by passing additional arguments to it.

assertVectorsAlmostEqual(A,B) applies the tolerance test to the vectors A and B in the L2-norm sense. For example, suppose A is [1 1e10], B is [2 1e10], and the tolerance is 1e-8. Then A and B would fail an elementwise relative tolerance comparison, because the relative difference between the first elements is 0.5. However, they would pass a vector relative tolerance comparison, because the relative vector difference between A and B is only about 1 part in 1e10.

The examples_general directory contains a portion of a unit test for the sin function. The output of sin can sometimes be a bit surprising because of floating-point issues. For example:

sin(pi)
ans =

  1.2246e-016

That's very close but not exactly equal to 0. Here's how the sin unit test uses assertElementsAlmostEqual to write the sin(pi) test with a minimum of fuss.

cd examples_general
type testSin
function testSin

assertElementsAlmostEqual(sin(pi), 0);

Run the test using runtests.

runtests testSin
Starting test run with 1 test case.
.
PASSED in 0.023 seconds.

Back to MATLAB xUnit Test Framework

================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/index.html ================================================ ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/release-history.html ================================================ MATLAB xUnit Release History

MATLAB xUnit Release History

3.1 19-Nov-2010

  • Add -logfile option to runtests.
  • Allow test names to be passed to runtests as a cell array of strings.
  • Add test suite name and execution date to output of runtests.
  • Added warning message if function-handle-based test file has a setup function that returns more than one output argument.
  • Fix bug related to handling subfunction test files in packages.
  • Fix TestSuite.fromPackageName to find tests inside subpackages.
  • Correct text in exTestCaseSearching.m to show that test files and functions can begin or end with "test" or "Test".

3.0.2 30-Jul-2010

  • Fixed bug that caused TestCase subclasses in a test package to be executed twice in some versions of MATLAB.
  • Documented the out = runsuite(...) syntax.
  • Added home doc links to the various doc pages.

3.0.1 16-Jun-2010

Fixed handling of TestCase subclasses in a test package.

3.0 12-Jun-2010

  • Added capability to run tests stored inside packages.
  • runtests errors if no test cases are found instead of silently passing.
  • Accept function names that end in "test" or "Test" as test functions.
  • In assertElementsAlmostEqual and assertVectorsAlmostEqual, change the default floor_tol value to sqrt(eps) instead of eps. This makes the assertion a bit more forgiving when comparing numbers very close to 0.
  • Added -verbose option to runtests.
  • Fixed handling of message strings containing sprintf-style control characters in the assert*.m functions.

2.0.1 04-Aug-2009

Corrected errors in assertElementsAlmostEqual and assertVectorsAlmostEqual related to NaN and Inf inputs. assertElementsAlmostEqual now properly asserts when one input is finite and the other is infinite. assertVectorsAlmostEqual now asserts whenever any input element is NaN or infinite. The behavior of xunit.utils.compareFloats has been changed to match.

2.0 05-June-2009

  • The name of the package has been changed to "MATLAB xUnit Test Framework." The command-line test runner is now called runtests. The utilities package is now called xunit.utils. If you want to continue using the old command-line test runner (mtest) and utilities package (mtest.utils), then put the obsolete directory on the MATLAB path.
  • The assertion functions assertEqual, assertElementsAlmostEqual, and assertVectorsAlmostEqual now print more information about the input values in the case of an assertion failure.
  • A new assertion function, assertFilesEqual, has been added.
  • The command-line test runner, runtests, now supports multiple directory names.
  • The assertion function assertAlmostEqual has been deprecated. Use assertElementsAlmostEqual and assertVectorsAlmostEqual instead. If you want to continue using assertAlmostEqual, then put the obsolete directory on the MATLAB path.

1.1.3 20-May-2009

Remove the LICENSE.txt file because the open source BSD license is now supplied automatically by the MATLAB Central File Exchange. There are no functional changes in this version.

1.1.2 02-Apr-2009

This release fixes a bug with assertVectorsAlmostEqual when the caller provided a custom message. When the function errored out because the tolerance was exceeded, the function would issue a different error message than expected.

1.1.1 16-Mar-2009

This release fixes a problem when calling mtest with no input arguments. Previously, it was not limiting its test-case discovery to TestCase subclasses and ordinary M-files beginning with "test" or "Test" as documented.

This release also integrates the MTEST documentation with the MATLAB Help Browser.

1.1 11-Mar-2009

This release adds new tools for performing floating-point comparisons. Using the new assertion functions assertElementsAlmostEqual and assertVectorsAlmostEqual, you can perform both absolute and relative tolerance comparisons, either elementwise or in a vector L2-norm fashion.

The previous floating-point comparison function, assertAlmostEqual, is still available, but its use is discouraged.

1.0 30-Jan-2009

This release, the first to be posted on the MATLAB Central File Exchange, includes a refactoring of TestCase and TestSuite to use the composite design pattern. Both classes now subclass the abstract class TestComponent, and the individual items contained in a TestSuite object are TestComponent objects. That means a TestSuite object can contain both TestSuite objects and TestCase objects.

TestSuites are now built up hierarchically. All the test cases in a subfunction-based test M-file become a test suite, which in turn can be part of a test suite for an entire test directory.

The mtest driver function can now take the name of a directory, in which case it will automatically discover and run all the test cases in that directory.

The old TestRunObserver class has become the abstract TestRunMonitor class, with subclasses TestRunLogger and CommandWindowTestRunDisplay.

TestCaseInDir has been modified to do a directory change before executing the test case. The new class TestCaseWithAddPath makes a temporary path addition before executing the test case.

Subfunction-based test M-files written for one of the alpha versions of MTEST need to be revised so that the output variable name is "test_suite", and so that the first line of code calls the script "initTestSuite".

0.9 12-Sep-2008

This release is an extensive update that provides simpler ways of writing and running test cases.

  • The new function mtest automatically finds and runs all test cases in the current directory.
  • Test cases can be written as simple M-file functions.
  • Multiple test cases can be defined in a single M-file by using subfunctions.
  • Many new documentation examples have been provided, including a "Quick Start" example intended to enable users to write and run their first tests in just a few minutes.

0.8.1 17-Mar-2008

  • Some of the sample TestCase classes were missing classdef lines. FIXED
  • Now using dot method invocation syntax in examples and doc.
  • Minor edits to HTML doc (munit_doc.html).
  • Edited munit_doc.html by hand to clean up command-window links.

0.8 15-Mar-2008

  • Limited initial distribution for review and comment.
================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/xunit_product_page.html ================================================ MATLAB® xUnit Test Framework

MATLAB® xUnit Test Framework

MATLAB xUnit is a unit test framework for MATLAB code. MATLAB xUnit is designed to be easy to use for MATLAB users with a wide range of experience. Users can write tests using ordinary M-files that are very simple in structure.

Important Note: MATLAB xUnit relies heavily on object-oriented language features introduced in MATLAB 7.6 (R2008a), which was released in March 2008. MATLAB xUnit functions and classes will not work in earlier MATLAB releases. In addition, writing and running tests inside packages requires MATLAB 7.7 (R008b) or later.

Installation

To use MATLAB xUnit in MATLAB, add the "xunit" folder (directory) to the MATLAB path. See the MATLAB documentation for setting the search path. (The "tests" directory contains the framework's own self-tests, and the "architecture" directory contains information about the framework architecture; these directories are not needed for using MATLAB xUnit.)

Note for users of earlier versions of MATLAB xUnit: If you have already written unit tests based on MTEST, an earlier version of MATLAB xUnit, you may also want to add the "obsolete" folder to the MATLAB path. This folder contains the old command-line test runner, mtest, as well as the deprecated function assertAlmostEqual.

Getting Started

Quick Start: How to Write and Run Tests

How to Put Multiple Test Cases in One M-file

How to Run a Specific Test

How to Run Tests in Specific Directories

How to Run Tests in a Package

Advanced Usage

How to Test Using a Floating-Point Tolerance

How to Test an Error Message

How to Run Tests Silently and Query the Results

How to Write Tests That Share Common Set-Up Code

How to Write xUnit-Style Tests by Subclassing TestCase

How MATLAB xUnit Searches for Test Cases

Key Functions and Classes

Main test driver function:

Assertion functions you can use in your tests:

The key xUnit-style classes that make everything work:

Release History

Copyright 2008-2010 The MathWorks, Inc.

================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/license.txt ================================================ Copyright (c) 2010, The MathWorks, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution * Neither the name of the The MathWorks, Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/+mtest/+utils/Contents.m ================================================ % UTILS Utility package for MTEST unit testing framework % % Array Comparison % compareFloats - Compare floating-point arrays using tolerance % % Test Case Discovery Functions % isTestCaseSubclass - True for name of TestCase subclass % % String Functions % containsRegexp - True if string contains regular expression % isSetUpString - True for string that looks like a setup function % isTearDownString - True for string that looks like teardown function % isTestString - True for string that looks like a test function % % Miscellaneous Functions % generateDoc - Publish test scripts in mtest/doc % parseFloatAssertInputs - Common input-parsing logic for several functions % Undocumented Functions % isAlmostEqual - Floating-point equality test using relative tolerance % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/+mtest/+utils/compareFloats.m ================================================ function result = compareFloats(varargin) %compareFloats Compare floating-point arrays using tolerance. % result = compareFloats(A, B, compare_type, tol_type, tol, floor_tol) % compares the floating-point arrays A and B using a tolerance. compare_type % is either 'elementwise' or 'vector'. tol_type is either 'relative' or % 'absolute'. tol and floor_tol are the scalar tolerance values. % % There are four different tolerance tests used, depending on the comparison % type and the tolerance type: % % 1. Comparison type: 'elementwise' Tolerance type: 'relative' % % all( abs(A(:) - B(:)) <= tol * max(abs(A(:)), abs(B(:))) + floor_tol ) % % 2. Comparison type: 'elementwise' Tolerance type: 'absolute' % % all( abs(A(:) - B(:) <= tol ) % % 3. Comparison type: 'vector' Tolerance type: 'relative' % % norm(A(:) - B(:) <= tol * max(norm(A(:)), norm(B(:))) + floor_tol % % 4. Comparison type: 'vector' Tolerance type: 'absolute' % % norm(A(:) - B(:)) <= tol % % Note that floor_tol is not used when the tolerance type is 'absolute'. % % compare_type, tol_type, tol, and floor_tol are all optional inputs. The % default value for compare_type is 'elementwise'. The default value for % tol_type is 'relative'. If both A and B are double, then the default value % for tol is sqrt(eps), and the default value for floor_tol is eps. If either % A or B is single, then the default value for tol is sqrt(eps('single')), and % the default value for floor_tol is eps('single'). % % If A or B is complex, then the tolerance test is applied independently to % the real and imaginary parts. % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. if nargin >= 3 % compare_type specified. Grab it and then use parseFloatAssertInputs to % process the remaining input arguments. compare_type = varargin{3}; varargin(3) = []; if isempty(strcmp(compare_type, {'elementwise', 'vector'})) error('MTEST:compareFloats:unrecognizedCompareType', ... 'COMPARE_TYPE must be ''elementwise'' or ''vector''.'); end else compare_type = 'elementwise'; end params = mtest.utils.parseFloatAssertInputs(varargin{:}); A = params.A(:); B = params.B(:); [A, B] = preprocessNanInf(A, B); switch compare_type case 'elementwise' magFcn = @abs; case 'vector' magFcn = @norm; otherwise error('MTEST:compareFloats:unrecognizedCompareType', ... 'COMPARE_TYPE must be ''elementwise'' or ''vector''.'); end switch params.ToleranceType case 'relative' compareFcn = @(A, B) magFcn(A - B) <= ... params.Tolerance * max(magFcn(A), magFcn(B)) + ... params.FloorTolerance; case 'absolute' compareFcn = @(A, B) magFcn(A - B) <= params.Tolerance; otherwise error('MTEST:compareFloats:unrecognizedToleranceType', ... 'TOL_TYPE must be ''relative'' or ''absolute''.'); end if isreal(A) && isreal(B) result = compareFcn(A, B); else result = compareFcn(real(A), real(B)) & compareFcn(imag(A), imag(B)); end result = all(result); %=============================================================================== function [A, B] = preprocessNanInf(A, B) make_zero = isnan(A) & isnan(B); make_zero = make_zero | ((A == Inf) & (B == Inf)); make_zero = make_zero | ((A == -Inf) & (B == -Inf)); A(make_zero) = 0; B(make_zero) = 0; ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/+mtest/+utils/containsRegexp.m ================================================ function tf = containsRegexp(str, exp) %containsRegexp True if string contains regular expression % TF = containsRegexp(str, exp) returns true if the string str contains the % regular expression exp. If str is a cell array of strings, then % containsRegexp tests each string in the cell array, returning the results in % a logical array with the same size as str. % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. % Convert to canonical input form: A cell array of strings. if ~iscell(str) str = {str}; end matches = regexp(str, exp); tf = ~cellfun('isempty', matches); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/+mtest/+utils/generateDoc.m ================================================ function generateDoc %generateDoc Publish the example scripts in the doc directory % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. doc_dir = fullfile(fileparts(which('mtest')), '..', 'doc'); addpath(doc_dir); cd(doc_dir) mfiles = dir('*.m'); for k = 1:numel(mfiles) publish(mfiles(k).name); cd(doc_dir) end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/+mtest/+utils/isAlmostEqual.m ================================================ function same = isAlmostEqual(A, B, reltol) %isAlmostEqual Equality test using relative tolerance % same = isAlmostEqual(A, B, reltol), for two floating-point arrays A and B, % tests A and B for equality using the specified relative tolerance. % isAlmostEqual returns true if the following relationship is satisfied for % all values in A and B: % % abs(A - B) ./ max(abs(A), abs(B)) <= reltol % % same = isAlmostEqual(A, B) uses the following value for the relative % tolerance: % % 100 * max(eps(class(A)), eps(class(B))) % % If either A or B is not a floating-point array, then isAlmostEqual returns % the result of isequal(A, B). % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. if ~isfloat(A) || ~isfloat(B) same = isequal(A, B); return end if nargin < 3 reltol = 100 * max(eps(class(A)), eps(class(B))); end if ~isequal(size(A), size(B)) same = false; return end A = A(:); B = B(:); delta = abs(A - B) ./ max(max(abs(A), abs(B)), 1); % Some floating-point values require special handling. delta((A == 0) & (B == 0)) = 0; delta(isnan(A) & isnan(B)) = 0; delta((A == Inf) & (B == Inf)) = 0; delta((A == -Inf) & (B == -Inf)) = 0; same = all(delta <= reltol); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/+mtest/+utils/isSetUpString.m ================================================ function tf = isSetUpString(str) %isSetUpString True if string looks like the name of a setup function % tf = isSetUpString(str) returns true if the string str looks like the name % of a setup function. If str is a cell array of strings, then isSetUpString % tests each string in the cell array, returning the results in a logical % array with the same size as str. % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. setup_exp = '^[sS]et[uU]p'; tf = mtest.utils.containsRegexp(str, setup_exp); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/+mtest/+utils/isTearDownString.m ================================================ function tf = isTearDownString(str) %isTearDownString True if string looks like the name of a teardown function % tf = isTearDownString(str) returns true if the string str looks like the % name of a teardown function. If str is a cell array of strings, then % isTearDownString tests each string in the cell array, returning the results % in a logical array with the same size as str. % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. setup_exp = '^[tT]ear[dD]own'; tf = mtest.utils.containsRegexp(str, setup_exp); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/+mtest/+utils/isTestCaseSubclass.m ================================================ function tf = isTestCaseSubclass(name) %isTestCaseSubclass True for name of a TestCase subclass % tf = isTestCaseSubclass(name) returns true if the string name is the name of % a TestCase subclass on the MATLAB path. % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. tf = false; class_meta = meta.class.fromName(name); if isempty(class_meta) % Not the name of a class return; end if strcmp(class_meta.Name, 'TestCase') tf = true; else tf = isMetaTestCaseSubclass(class_meta); end function tf = isMetaTestCaseSubclass(class_meta) tf = false; if strcmp(class_meta.Name, 'TestCase') tf = true; else % Invoke function recursively on parent classes. super_classes = class_meta.SuperClasses; for k = 1:numel(super_classes) if isMetaTestCaseSubclass(super_classes{k}) tf = true; break; end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/+mtest/+utils/isTestString.m ================================================ function tf = isTestString(str) %isTestString True if string looks like the name of a test % tf = isTestString(str) returns true if the string str looks like the name of % a test. If str is a cell array of strings, then isTestString tests each % string in the cell array, returning the results in a logical array with the % same size as str. % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. test_exp = '^[tT]est'; tf = mtest.utils.containsRegexp(str, test_exp); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/+mtest/+utils/parseFloatAssertInputs.m ================================================ function params = parseFloatAssertInputs(varargin) %parseFloatAssertInputs Parse inputs for floating-point assertion functions. % params = parseFloatAssertInputs(varargin) parses the input arguments for % assertElementsAlmostEqual, assertVectorsAlmostEqual, and compareFcn. It % returns a parameter struct containing the fields: % % A B ToleranceType Tolerance FloorTolerance % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. error(nargchk(2, 6, nargin, 'struct')); params = struct('A', {[]}, 'B', {[]}, 'ToleranceType', {[]}, ... 'Tolerance', {[]}, 'FloorTolerance', {[]}, 'Message', {''}); % The first two input arguments are always A and B. params.A = varargin{1}; params.B = varargin{2}; varargin(1:2) = []; % If the last argument is a message string, process it and remove it from the list. if (numel(varargin) >= 1) && ischar(varargin{end}) && ... ~any(strcmp(varargin{end}, {'relative', 'absolute'})) params.Message = varargin{end}; varargin(end) = []; end checkAB(params.A, params.B); epsilon = max(eps(class(params.A)), eps(class(params.B))); if numel(varargin) < 3 % floor_tol not specified; set default. params.FloorTolerance = epsilon; else params.FloorTolerance = varargin{3}; end if numel(varargin) < 2 % tol not specified; set default. params.Tolerance = sqrt(epsilon); else params.Tolerance = varargin{2}; end if numel(varargin) < 1 % tol_type not specified; set default. params.ToleranceType = 'relative'; else params.ToleranceType = varargin{1}; end %=============================================================================== function checkAB(A, B) if ~isfloat(A) || ~isfloat(B) error('MTEST:parseFloatAssertInputs:inputsNotFloat', ... 'A and B must be floating-point arrays.'); end if ~isequal(size(A), size(B)) error('MTEST:parseFloatAssertInputs:sizeMismatch', ... 'A and B must have the same size.'); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/assertAlmostEqual.m ================================================ function assertAlmostEqual(A, B, reltol, message) %assertEqual Assert that inputs are equal within relative tolerance % assertEqual(A, B, RELTOL) throws an exception of any of the values in A and % B are not equal within the specified tolerance. NaN values are considered % to be equal. A and B have to have the same class and sparsity to be % considered equal. % % assertEqual(A, B) uses the following relative tolerance value: % % 100 * eps(class(A)) % % assertEqual(A, B, RELTOL, MESSAGE) uses the specified message string when % throwing the exception. With this syntax, use RELTOL = [] to specify the % default relative tolerance. % % Note that if either A or B are not floating-point arrays, then A and B are % compared using ISEQUALWITHEQUALNANS and the relative tolerance value is not % used. % % Examples % -------- % % This call returns silently. % assertAlmostEqual(1.0, 1.0 + eps); % % % This call throws an error. % assertAlmostEqual(1.0, 1.1); % % See also assertEqual, mtest.utils.isAlmostEqual % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. if ~(issparse(A) == issparse(B)) throw(MException('assertAlmostEqual:sparsityNotEqual', message)); end if ~strcmp(class(A), class(B)) throw(MException('assertAlmostEqual:classNotEqual', message)); end if nargin < 3 || isempty(reltol) reltol = 100 * eps(class(A)); end if nargin < 4 message = sprintf('Inputs are not equal within relative tolerance: %g', ... reltol); end if ~mtest.utils.isAlmostEqual(A, B, reltol) throw(MException('assertAlmostEqual:tolExceeded', message)); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/mtest.m ================================================ function out = mtest(name) %mtest Run unit tests % mtest runs all the test cases that can be found in the current directory and % summarizes the results in the Command Window. % % Test cases can be found in the following places in the current directory: % % * An M-file function whose name starts with "test" or "Test" that % returns no output arguments. % % * An M-file function whose name starts with "test" or "Test" that % contains subfunction tests and uses the initTestSuite script to % return a TestSuite object. % % * An M-file defining a subclass of TestCase. % % mtest(mfilename) runs test cases found in the specified function or class % name. The function or class needs to be in the current directory or on the % MATLAB path. % % mtest('mfilename:testname') runs the specific test case named 'testname' % found in the function or class 'name'. % % mtest(dirname) runs all the test cases that can be found in the specified % directory. % % Examples % -------- % Find and run all the test cases in the current directory. % % mtest % % Find and run all the test cases contained in the M-file myfunc. % % mtest myfunc % % Find and run all the test cases contained in the TestCase subclass % MyTestCase. % % mtest MyTestCase % % Run the test case named 'testFeature' contained in the M-file myfunc. % % mtest myfunc:testFeature % % Run all the tests in a specific directory. % % mtest c:\Work\MyProject\tests % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. if nargin < 1 suite = TestSuite.fromPwd(); else suite = TestSuite.fromName(name); user_gave_a_directory_name = isempty(suite.TestComponents) && ... (exist(name, 'file') == 7); if user_gave_a_directory_name % Before changing directories, arrange to restore the current directory % safely. currentDir = pwd; c = onCleanup(@() cd(currentDir)); cd(name); suite = TestSuite.fromPwd(); end end did_pass = suite.run(CommandWindowTestRunDisplay()); if nargout > 0 out = did_pass; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/tests/MtestTest.m ================================================ %TestSuiteTest Unit tests for mtest command-line test runner. classdef MtestTest < TestCaseInDir methods function self = MtestTest(name) self = self@TestCaseInDir(name, ... fullfile(fileparts(which(mfilename)), 'cwd_test')); end function test_noInputArgs(self) [T, did_pass] = evalc('mtest'); % The cwd_test directory contains some test cases that fail, % so output of mtest should be false. assertFalse(did_pass); end function test_oneInputArg(self) [T, did_pass] = evalc('mtest(''testFoobar'')'); % cwd_test/testFoobar.m is supposed to pass. assertTrue(did_pass); end function test_oneInputArgWithFilter_passing(self) [T, did_pass] = evalc('mtest(''TestCaseSubclass:testA'')'); assertTrue(did_pass); end function test_oneInputArgWithFilter_failing(self) [T, did_pass] = evalc('mtest(''TestCaseSubclass:testB'')'); assertFalse(did_pass); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/tests/cwd_test/TestCaseSubclass.m ================================================ %TestCaseSubclass TestCase subclass containing two passing tests % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. classdef TestCaseSubclass < TestCase methods function self = TestCaseSubclass(name) self = self@TestCase(name); end function testA(self) end function testB(self) % Intentionally fail this test case. assertFalse(true); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/tests/cwd_test/testFoobar.m ================================================ function testFoobar %testFoobar Passing M-file test % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/tests/cwd_test/testSubfunctions.m ================================================ function test_cases = testSubfunctions %testSubfunctions Contains two passing subfunction tests % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. findSubfunctionTests; function testSub1 function testSub2 ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/tests/testAssertAlmostEqual.m ================================================ function test_suite = testAssertAlmostEqual %testAssertAlmostEqual Unit tests for assertAlmostEqual % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. initTestSuite; function testEqual assertAlmostEqual(1, 1); function testEqualWithThreeInputs assertAlmostEqual(1, 1.1, 0.2); function testEqualWithFourInputs assertExceptionThrown(@() assertAlmostEqual(1, 2, 0.1, 'checkmate'), ... 'assertAlmostEqual:tolExceeded'); function testEmptyRelTol assertAlmostEqual(1, 1+10*eps, [], 'checkmate'); function testNotEqual assertExceptionThrown(@() assertAlmostEqual(1, 1+1000*eps), ... 'assertAlmostEqual:tolExceeded'); function testSingleEqual assertAlmostEqual(single(1), single(1 + 10*eps('single'))); function testSingleNotEqual assertExceptionThrown(@() assertAlmostEqual(single(1), ... single(1 + 1000*eps('single'))), 'assertAlmostEqual:tolExceeded'); function testZeros assertAlmostEqual(0, 0); function testSingleZeros assertAlmostEqual(single(0), single(0)); function testSparse assertAlmostEqual(sparse(1), sparse(1 + 10*eps)); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/tests/testIsAlmostEqual.m ================================================ function test_suite = testIsAlmostEqual %testIsAlmostEqual Unit tests for isAlmostEqual % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. initTestSuite; function testExactlyEqual A = [1 2; 3 4]; B = [1 2; 3 4]; assertTrue(mtest.utils.isAlmostEqual(A, B)); function testDefaultTolerance assertTrue(mtest.utils.isAlmostEqual(1, 1+10*eps)); assertFalse(mtest.utils.isAlmostEqual(1, 1+1000*eps)); function testDefaultToleranceSingle assertTrue(mtest.utils.isAlmostEqual(single(1), 1 + 10*eps('single'))); assertFalse(mtest.utils.isAlmostEqual(single(1), 1 + 1000*eps('single'))); function testSpecifiedTolerance assertTrue(mtest.utils.isAlmostEqual(1, 1.09, 0.1)); assertFalse(mtest.utils.isAlmostEqual(1, 1.2, 0.1)); function testSpecialValues A = [Inf, -Inf, NaN, 2.0]; B = [Inf, -Inf, NaN, 2.0+10*eps]; assertTrue(mtest.utils.isAlmostEqual(A, B)); C = [Inf, -Inf, NaN, 2.0]; D = [Inf, -Inf, 0, 2.0+10*eps]; assertFalse(mtest.utils.isAlmostEqual(C, D)); function testUint8 assertTrue(mtest.utils.isAlmostEqual(uint8(1), uint8(1))); assertFalse(mtest.utils.isAlmostEqual(uint8(1), uint8(2))); function testChar assertTrue(mtest.utils.isAlmostEqual('foobar', 'foobar')); assertFalse(mtest.utils.isAlmostEqual('foo', 'bar')); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/readme ================================================ test ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/+xunit/+mocktests/+subpkg/test_a_bit.m ================================================ function test_suite = test_a_bit initTestSuite function test_now function test_later ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/+xunit/+mocktests/A.m ================================================ % Class A is a TestCase subclass containing two test cases (test_a and test_b). classdef A < TestCase methods function self = A(name) self = self@TestCase(name); end function test_a(self) end function test_b(self) end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/+xunit/+mocktests/B.m ================================================ % Class B is not a TestCase subclass. classdef B end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/+xunit/+mocktests/FooTest.m ================================================ classdef FooTest < TestCase methods function object = FooTest(name) object = object@TestCase(name); end function test_sanity(object) assertEqual(0, 0) end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/+xunit/+mocktests/helper_that.m ================================================ % helper_that is not a test file. function y = helper_that(x) y = x; ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/+xunit/+mocktests/test_that.m ================================================ % test_that.m is a subfunction test file. function test_suite = test_this initTestSuite function test_the_other a = magic(3); function test_nifty b = magic(5); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/+xunit/+mocktests/test_this.m ================================================ % test_this.m is a function-file test case. function test_this ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/Readme.m ================================================ % This directory contains the test suite for the mUnit test framework. Before % running the test suite, do the following: % % 1. Make sure the mUnit test framework directory is on your path % 2. Make sure the helper_classes subdirectory of the test directory is on % your path. % 3. Make the test directory your current directory. % % To run the test suite: % % run(TestSuite()) % Steven L. Eddins % Copyright 2008 The MathWorks help Readme ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/RuntestsTest.m ================================================ %TestSuiteTest Unit tests for runtests command-line test runner. classdef RuntestsTest < TestCaseInDir methods function self = RuntestsTest(name) self = self@TestCaseInDir(name, ... fullfile(fileparts(which(mfilename)), 'cwd_test')); end function test_noInputArgs(self) [T, did_pass] = evalc('runtests'); % The cwd_test directory contains some test cases that fail, % so output of runtests should be false. assertFalse(did_pass); end function test_Verbose(self) [T, did_pass] = evalc('runtests(''-verbose'')'); assertFalse(did_pass); end function test_oneInputArg(self) [T, did_pass] = evalc('runtests(''testFoobar'')'); % cwd_test/testFoobar.m is supposed to pass. assertTrue(did_pass); end function test_verboseThenTestName(self) [T, did_pass] = evalc('runtests(''-verbose'', ''.'')'); assertFalse(did_pass); end function test_testNameThenVerbose(self) [T, did_pass] = evalc('runtests(''.'', ''-verbose'')'); assertFalse(did_pass); end function test_oneInputArgWithFilter_passing(self) [T, did_pass] = evalc('runtests(''TestCaseSubclass:testA'')'); assertTrue(did_pass); end function test_oneInputArgWithFilter_failing(self) [T, did_pass] = evalc('runtests(''TestCaseSubclass:testB'')'); assertFalse(did_pass); end function test_oneDirname(self) [T, did_pass] = evalc('runtests(''../dir1'')'); assertTrue(did_pass); [T, did_pass] = evalc('runtests(''../dir2'')'); assertFalse(did_pass); end function test_twoDirnames(self) [T, did_pass] = evalc('runtests(''../dir1'', ''../dir2'')'); assertFalse(did_pass); end function test_packageName(self) [T, did_pass] = evalc('runtests(''xunit.mocktests'')'); assertTrue(did_pass); end function test_noTestCasesFound(self) assertExceptionThrown(@() runtests('no_such_test'), ... 'xunit:runtests:noTestCasesFound'); end function test_optionStringsIgnored(self) % Option string at beginning. [T, did_pass] = evalc('runtests(''-bogus'', ''../dir1'')'); assertTrue(did_pass); % Option string at end. [T, did_pass] = evalc('runtests(''../dir2'', ''-bogus'')'); assertFalse(did_pass); end function test_logfile(self) name = tempname; command = sprintf('runtests(''../dir1'', ''-logfile'', ''%s'')', name); [T, did_pass] = evalc(command); assertTrue(did_pass); assertTrue(exist(name, 'file') ~= 0); delete(name); end function test_logfileWithNoFile(self) assertExceptionThrown(@() runtests('../dir1', '-logfile'), ... 'xunit:runtests:MissingLogfile'); end function test_logfileWithNoWritePermission(self) assertExceptionThrown(@() runtests('../dir1', '-logfile', ... 'C:\dir__does__not__exist\foobar.txt'), ... 'xunit:runtests:FileOpenFailed'); end function test_namesInCellArray(self) [T, did_pass] = evalc('runtests({''TestCaseSubclass:testA''})'); assertTrue(did_pass); [T, did_pass] = evalc('runtests({''TestCaseSubclass:testA'', ''TestCaseSubclass:testB''})'); assertFalse(did_pass); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/TestCaseTest.m ================================================ %TestCaseTest Unit tests for the TestCase class % Steven L. Eddins % Copyright The MathWorks 2008 classdef TestCaseTest < TestCaseInDir methods function self = TestCaseTest(name) self = self@TestCaseInDir(name, ... fullfile(fileparts(which(mfilename)), 'helper_classes')); end function testConstructor(self) % Exercise the constructor. Verify that the Name and Location % properties are set correctly. tc = TwoPassingTests('testMethod1'); assertEqual(tc.Name, 'testMethod1'); assertEqual(tc.Location, which('TwoPassingTests')); end function testPassingTests(self) % Verify that the expected observer notifications are received in % the proper order. logger = TestRunLogger(); TestSuite('TwoPassingTests').run(logger); assertTrue(isequal(logger.Log, ... {'TestRunStarted', 'TestComponentStarted', ... 'TestComponentStarted', 'TestComponentFinished', ... 'TestComponentStarted', 'TestComponentFinished', ... 'TestComponentFinished', 'TestRunFinished'})); end function testFixtureCalls(self) % Verify that fixture calls are made in the proper order. tc = LoggingTestCase('testMethod'); tc.run(TestRunLogger()); assertTrue(isequal(tc.log, {'setUp', 'testMethod', 'tearDown'})); end function testTestFailure(self) % Verify that a test failure is recorded. logger = TestRunLogger(); TestSuite('FailingTestCase').run(logger); assertTrue(isequal(logger.NumFailures, 1)); end function testTestError(self) % Verify that a test error is recorded. logger = TestRunLogger(); TestSuite('BadFixture').run(logger); assertTrue(isequal(logger.NumErrors, 1)); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/TestCaseWithAddPathTest.m ================================================ %TestCaseTest Unit tests for the TestCaseWithAddPath class % Steven L. Eddins % Copyright The MathWorks 2008 classdef TestCaseWithAddPathTest < TestCaseWithAddPath methods function self = TestCaseWithAddPathTest(name) self = self@TestCaseWithAddPath(name, ... fullfile(fileparts(which(mfilename)), 'helper_classes')); end function testPath(self) % Verify that a function in helper_classes is seen on the path. assertEqual(exist('testFunctionHandlesA', 'file'), 2); end function testRunTestOnPath(self) % Verify that we can make a test suite and run it using a file % in the new path directory. logger = TestRunLogger(); suite = TestSuite('testFunctionHandlesA'); did_pass = suite.run(logger); assertTrue(did_pass); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/TestFuncHandleTests.m ================================================ %TestFuncHandleTests TeseCase class used to test function-handle-based tests % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. classdef TestFuncHandleTests < TestCaseInDir methods function self = TestFuncHandleTests(name) self = self@TestCaseInDir(name, ... fullfile(fileparts(which(mfilename)), 'helper_classes')); end function testSuiteNameAndLocation(self) test_suite = testFunctionHandlesA(); assertEqual(test_suite.Name, 'testFunctionHandlesA'); assertEqual(test_suite.Location, which('testFunctionHandlesA')); end function testOutputs(self) % Exercise the function-handle test M-file. Output should be a % two-element cell array of TestCase objects. test_suite = testFunctionHandlesA(); assertTrue(isa(test_suite, 'TestSuite')); assertEqual(test_suite.numTestCases(), 2); end function testCaseNames(self) % Verify that Name property of test cases is set properly. test_suite = testFunctionHandlesA(); assertEqual(test_suite.TestComponents{1}.Name, 'testA'); assertEqual(test_suite.TestComponents{2}.Name, 'testB'); end function testCaseLocation(self) % Verify that the Location field of test cases is set properly. test_suite = testFunctionHandlesA(); expected_location = which('testFunctionHandlesA'); assertEqual(test_suite.TestComponents{1}.Location, expected_location); assertEqual(test_suite.TestComponents{2}.Location, expected_location); end function testPassingTests(self) % Verify that the expected observer notifications are received in % the proper order. logger = TestRunLogger(); suite = testFunctionHandlesA; suite.run(logger); assertEqual(logger.Log, ... {'TestRunStarted', 'TestComponentStarted', ... 'TestComponentStarted', 'TestComponentFinished', ... 'TestComponentStarted', 'TestComponentFinished', ... 'TestComponentFinished', 'TestRunFinished'}); end function testTestFixture(self) % Verify that test fixture functions that use testData run without % error. (See test assertions in testFunctionHandlesB.) logger = TestRunLogger(); suite = testFunctionHandlesB; suite.run(logger); assertEqual(logger.NumFailures, 0); assertEqual(logger.NumErrors, 0); end function testTestFixtureError(self) % Verify that an exception thrown in a test fixture is recorded as a % test error. logger = TestRunLogger(); suite = testFunctionHandlesC(); suite.run(logger); assertEqual(logger.NumErrors, 2); end function testFixtureNoTestData(self) % Verify that when setupFcn returns no output argument, the test % functions and the teardown function are called with no inputs. % (See test assertions in testFunctionHandlesD.) logger = TestRunLogger(); suite = testFunctionHandlesD(); suite.run(logger); assertEqual(logger.NumFailures, 0); assertEqual(logger.NumErrors, 0); end function testFailingTest(self) % Verify that the expected observer notifications are received in % the proper order for a failing test. logger = TestRunLogger(); suite = testFunctionHandlesE(); suite.run(logger); assertEqual(logger.Log, ... {'TestRunStarted', 'TestComponentStarted', ... 'TestComponentStarted', 'TestCaseFailure', 'TestComponentFinished', ... 'TestComponentFinished', 'TestRunFinished'}); end function testTeardownFcnButNoSetupFcn(self) % Verify that a test file works if it has a teardown function but no % setup function. logger = TestRunLogger(); suite = testFunctionHandlesTeardownNoSetup(); suite.run(logger); assertEqual(logger.NumTestCases, 1); assertEqual(logger.NumFailures, 0); assertEqual(logger.NumErrors, 0); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/TestRunLoggerTest.m ================================================ %TestSuiteTest Unit tests for TestSuite class classdef TestRunLoggerTest < TestCaseInDir methods function self = TestRunLoggerTest(name) self = self@TestCaseInDir(name, ... fullfile(fileparts(which(mfilename)), 'helper_classes')); end function testTwoPassingTests(self) logger = TestRunLogger; suite = TestSuite('TwoPassingTests'); suite.run(logger); assertEqual(logger.Log, ... {'TestRunStarted', ... 'TestComponentStarted', ... 'TestComponentStarted', 'TestComponentFinished', ... 'TestComponentStarted', 'TestComponentFinished', ... 'TestComponentFinished', ... 'TestRunFinished'}); assertEqual(logger.NumTestCases, 2); assertEqual(logger.NumFailures, 0); assertEqual(logger.NumErrors, 0); assertTrue(isempty(logger.Faults)); end function testFailingTestCase(self) logger = TestRunLogger; suite = TestSuite('FailingTestCase'); suite.run(logger); assertEqual(logger.Log, ... {'TestRunStarted', ... 'TestComponentStarted', ... 'TestComponentStarted', 'TestCaseFailure', 'TestComponentFinished', ... 'TestComponentFinished', ... 'TestRunFinished'}); assertEqual(logger.NumTestCases, 1); assertEqual(logger.NumFailures, 1); assertEqual(logger.NumErrors, 0); assertEqual(numel(logger.Faults), 1); assertEqual(logger.Faults(1).Type, 'failure'); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/TestSuiteTest.m ================================================ %TestSuiteTest Unit tests for TestSuite class classdef TestSuiteTest < TestCaseInDir methods function self = TestSuiteTest(name) self = self@TestCaseInDir(name, ... fullfile(fileparts(which(mfilename)), 'helper_classes')); end function testClassNameIn(self) % Syntax check: TestSuite('classname') suite = TestSuite('TwoPassingTests'); assertTrue(numel(suite.TestComponents) == 2, ... 'TestSuite finds two test methods given class name'); end function testCurrentDirectory(self) % See that the no-input syntax executes without error. % Not sure how to test this more effectively. suite = TestSuite(); end function testNoTestMethods(self) % TestCase class containing no test methods suite = TestSuite('NoTestMethods'); assertTrue(numel(suite.TestComponents) == 0, ... 'No test cases when class contains no test methods'); end function test_fromTestCaseClassName(self) suite = TestSuite.fromTestCaseClassName('TwoPassingTests'); assertTrue(numel(suite.TestComponents) == 2); assertTrue(ismember(suite.TestComponents{1}.Name, ... {'testMethod1', 'testMethod2'})); assertTrue(ismember(suite.TestComponents{2}.Name, ... {'testMethod1', 'testMethod2'})); end function test_fromTestCaseClassName_badclass(self) assertExceptionThrown(@() TestSuite.fromTestCaseClassName('atan2'), ... 'xunit:fromTestCaseClassName'); end function test_fromName_TestCaseSubclass(self) suite = TestSuite.fromName('TwoPassingTests'); assertTrue(numel(suite.TestComponents) == 2); assertEqual(suite.Name, 'TwoPassingTests'); end function test_fromName_notTestCaseSubclass(self) suite = TestSuite.fromName('TestRunMonitor'); assertTrue(isempty(suite.TestComponents)); assertEqual(suite.Name, 'TestRunMonitor'); end function test_fromName_simpleTest(self) suite = TestSuite.fromName('testSimple'); assertEqual(numel(suite.TestComponents), 1); assertEqual(suite.Name, 'testSimple'); assertEqual(suite.Location, which('testSimple')); end function test_fromName_subfunctions(self) suite = TestSuite.fromName('testFunctionHandlesA'); assertEqual(numel(suite.TestComponents), 2); assertEqual(suite.Name, 'testFunctionHandlesA'); assertEqual(suite.Location, which('testFunctionHandlesA')); end function test_fromName_bogus_name(self) suite = TestSuite.fromName('atan2'); assertTrue(isempty(suite.TestComponents)); assertEqual(suite.Name, 'atan2'); end function test_fromName_with_filter_string(self) suite = TestSuite.fromName('testFunctionHandlesA:testA'); assertEqual(numel(suite.TestComponents), 1); assertEqual(suite.TestComponents{1}.Name, 'testA'); assertEqual(suite.Name, 'testFunctionHandlesA'); end function test_fromName_with_nonmatching_filter_string(self) suite = TestSuite.fromName('testFunctionHandlesA:foobar'); assertTrue(isempty(suite.TestComponents)); end function test_fromName_with_dirname(self) xunit_test_dir = which('TestSuiteTest'); xunit_test_dir = fileparts(xunit_test_dir); cwd_test_dir = fullfile(xunit_test_dir, 'cwd_test'); suite = TestSuite.fromName(cwd_test_dir); assertEqual(suite.Name, 'cwd_test'); assertEqual(suite.Location, cwd_test_dir); assertEqual(numel(suite.TestComponents), 3); end function test_fromPwd(self) % Verify that the fromPwd method returns a nonempty TestSuite object % from the helper_classes directory, with the correct number of % test components. suite = TestSuite.fromPwd(); assertTrue(isa(suite, 'TestSuite')); assertTrue(numel(suite.TestComponents) == 16); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/ThrowsExceptionTest.m ================================================ classdef ThrowsExceptionTest < TestCaseInDir methods function self = ThrowsExceptionTest(methodName) self = self@TestCaseInDir(methodName, ... fullfile(fileparts(which(mfilename)), 'helper_classes')); end function testPassingTest(self) logger = TestRunLogger(); TestSuite('PassingExceptionTest').run(logger); assertTrue((logger.NumTestCases == 1) && ... (logger.NumFailures == 0) && ... (logger.NumErrors == 0), ... 'Passing exception test should have no failures or errors'); end function testNoExceptionTest(self) logger = TestRunLogger(); TestSuite('ExceptionNotThrownTest').run(logger); assertTrue(strcmp(logger.Faults(1).Exception.identifier, ... 'assertExceptionThrown:noException'), ... 'Fault exception should be throwsException:noException'); end function testWrongExceptionTest(self) logger = TestRunLogger(); TestSuite('WrongExceptionThrownTest').run(logger); assertTrue(strcmp(logger.Faults(1).Exception.identifier, ... 'assertExceptionThrown:wrongException'), ... 'Fault exception should be throwsException:wrongException'); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/cwd_test/TestCaseSubclass.m ================================================ %TestCaseSubclass TestCase subclass containing two passing tests % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. classdef TestCaseSubclass < TestCase methods function self = TestCaseSubclass(name) self = self@TestCase(name); end function testA(self) end function testB(self) % Intentionally fail this test case. assertFalse(true); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/cwd_test/testFoobar.m ================================================ function testFoobar %testFoobar Passing M-file test % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/cwd_test/testSubfunctions.m ================================================ function test_suite = testSubfunctions %testSubfunctions Contains two passing subfunction tests % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. initTestSuite; function testSub1 function testSub2 ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/dir1/test_thatPasses.m ================================================ function test_suite = test_thatPasses initTestSuite; function test_case assertTrue(true); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/dir2/test_thatFails.m ================================================ function test_suite = test_thatFails initTestSuite; function test_case assertTrue(false); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/empty_file ================================================ ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/BadFixture.m ================================================ classdef BadFixture < TestCase methods function self = BadFixture(name) self = self@TestCase(name); end function setUp(self) throw(MException('setUpError:BadFixture', ... 'BadFixture setUp method always throws exception')); end function testMethod(self) end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/Contents.m ================================================ % Helper Classes for mUnit Test Suite % % TestCase Subclasses % BadFixture - Contains setUp method that throws exception % FailingTestCase - Contains one test method that throws exception % LoggingTestCase - Logs calls to setUp, tearDown, and test method % NoTestMethods - TestCase subclass that contains no test methods % TestsToBeDiscovered - Used in TestSuiteTest % TwoPassingTests - Contains two passing test methods % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/ExceptionNotThrownTest.m ================================================ classdef ExceptionNotThrownTest < TestCase methods function self = ExceptionNotThrownTest(methodName) self = self@TestCase(methodName); end function testThrowsException(self) f = @() []; assertExceptionThrown(f, 'a:b:c'); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/FailingTestCase.m ================================================ % FailingTestCase % Utility class used by unit tests. % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. classdef FailingTestCase < TestCase methods function self = FailingTestCase(name) self = self@TestCase(name); end function testFail(self) throw(MException('testFail:FailingTestCase', ... 'testFail always fails')); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/LoggingTestCase.m ================================================ % LoggingTestCase % Utility class used by unit tests. % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. classdef LoggingTestCase < TestCase properties log = {}; end methods function self = LoggingTestCase(name) self = self@TestCase(name); end function setUp(self) self.log{end + 1} = 'setUp'; end function tearDown(self) self.log{end + 1} = 'tearDown'; end function testMethod(self) self.log{end + 1} = 'testMethod'; end function testBrokenMethod(self) throw(MException('brokenMethod:WasRun', ... 'Call to testBrokenMethod always throws exception')); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/NoTestMethods.m ================================================ classdef NoTestMethods < TestCase methods function self = NoTestMethods(name) self = self@TestCase(name); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/PassingExceptionTest.m ================================================ classdef PassingExceptionTest < TestCase methods function self = PassingExceptionTest(methodName) self = self@TestCase(methodName); end function testThrowsException(self) f = @() error('a:b:c', 'error message'); assertExceptionThrown(f, 'a:b:c'); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/TestsToBeDiscovered.m ================================================ classdef TestsToBeDiscovered < TestCase methods function self = TestsToBeDiscovered(name) self = self@TestCase(name); end function testMethodA end function testMethodB end function notATestMethod end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/TwoPassingTests.m ================================================ classdef TwoPassingTests < TestCase methods function self = TwoPassingTests(name) self = self@TestCase(name); end function testMethod1(self) end function testMethod2(self) end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/WrongExceptionThrownTest.m ================================================ classdef WrongExceptionThrownTest < TestCase methods function self = WrongExceptionThrownTest(methodName) self = self@TestCase(methodName); end function testThrowsException(self) f = @() error('d:e:f', 'message'); assertExceptionThrown(f, 'a:b:c'); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/notTestString.m ================================================ function suite = notTestString % This function exists to help test that the TestSuite.fromPwd() method does not % pick up function-handle test files that do not match the naming convention. initTestSuite; function testA function testB ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/testFunctionHandlesA.m ================================================ function test_suite = testFunctionHandlesA %testFunctionHandlesA Test file used by TestFunctionHandlesTest % Contains two passing tests. % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. initTestSuite; function testA function testB ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/testFunctionHandlesB.m ================================================ function test_suite = testFunctionHandlesB %testFunctionHandlesB Test file used by TestFunctionHandlesTest % Contains two passing tests that use a test fixture. % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. initTestSuite; function testData = setUpFcn testData = 5; function testA(testData) assertEqual(testData, 5); function testB(testData) assertEqual(testData, 5); function tearDownFcn(testData) assertEqual(testData, 5); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/testFunctionHandlesC.m ================================================ function test_suite = testFunctionHandlesC %testFunctionHandlesC Test file used by TestFunctionHandlesTest % Contains two passing tests that use a test fixture containing an intentional % error. % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. initTestSuite; function testData = setUpFcn testData = 5; function testA(testData) assertEqual(testData, 5); function testB(testData) assertEqual(testData, 5); function tearDownFcn(testData) % This assertion is expected to error. assertEqual(testData, 20); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/testFunctionHandlesD.m ================================================ function test_suite = testFunctionHandlesD %testFunctionHandlesD Test file used by TestFunctionHandlesTest % Contains two passing tests that use a test fixture with no test data. % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. initTestSuite; function setUpFcn function testA(varargin) assertTrue(isempty(varargin)); function testB(varargin) assertTrue(isempty(varargin)); function tearDownFcn(varargin) assertTrue(isempty(varargin)); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/testFunctionHandlesE.m ================================================ function test_suite = testFunctionHandlesA %testFunctionHandlesE Test file used by TestFunctionHandlesTest % Contains one failing test. % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. initTestSuite; function testA error('testFunctionHandlesA:expectedFailure', 'Bogus message'); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/testFunctionHandlesTeardownNoSetup.m ================================================ function suite = testFunctionHandlesTeardownNoSetup % Verify that test file works if it has a teardown function but no setup % function. initTestSuite; function teardown close all function test_normalCase assertEqual(1, 1); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/testSimple.m ================================================ function testSimple %testSimple Simple M-file test that passes % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/testAssertEqual.m ================================================ function test_suite = testAssertEqual %testAssertEqual Unit tests for assertEqual % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. initTestSuite; function testAssertEqualHappyCase assertEqual(5, 5); function testAssertEqualWithThreeInputs assertEqual(5, 5, 'Scandinavian Defense'); function testAssertEqualHappyCaseString assertEqual('foobar', 'foobar'); function testAssertEqualHappyCaseMatrix assertEqual(magic(3), magic(3)) function testInfAndInf assertEqual(Inf, Inf); function testMinusInfAndMinusInf assertEqual(-Inf, -Inf); function testOppositeSignInfs assertExceptionThrown(@() assertEqual(-Inf, Inf), 'assertEqual:nonEqual'); function testFiniteAndInf assertExceptionThrown(@() assertEqual(1, Inf), 'assertEqual:nonEqual'); function testFiniteAndNaN assertExceptionThrown(@() assertEqual(1, NaN), 'assertEqual:nonEqual'); function testInfiniteAndNaN assertExceptionThrown(@() assertEqual(Inf, NaN), 'assertEqual:nonEqual'); function testAssertEqualNotEqual assertExceptionThrown(@() assertEqual(5, 4), 'assertEqual:nonEqual'); function testAssertEqualSparsity assertExceptionThrown(@() assertEqual(5, sparse(5)), 'assertEqual:sparsityNotEqual'); function testAssertEqualNans assertEqual([1 NaN 2], [1 NaN 2]); function testAssertEqualClass assertExceptionThrown(@() assertEqual(5, uint8(5)), 'assertEqual:classNotEqual'); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/testAssertExceptionThrown.m ================================================ function test_suite = testAssertExceptionThrown %testAssertExceptionThrown Unit tests for assertExceptionThrown % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. initTestSuite; function test_happyCase assertExceptionThrown(... @() error('MyProd:MyFun:MyId', 'my message'), 'MyProd:MyFun:MyId'); function test_wrongException assertExceptionThrown(@() assertExceptionThrown(... @() error('MyProd:MyFun:MyId', 'my message'), ... 'MyProd:MyFun:DifferentId'), 'assertExceptionThrown:wrongException'); function test_noException assertExceptionThrown(@() assertExceptionThrown(@() sin(pi), 'foobar'), ... 'assertExceptionThrown:noException'); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/testAssertFalse.m ================================================ function test_suite = testAssertFalse %testAssertFalse Unit tests for assertFalse % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. initTestSuite; function testAssertFalseHappyCase assertFalse(false); function testAssertFalseHappyCaseWithTwoArgs assertFalse(false, '1.e4 e5 2.Nf3 Nc6'); function testAssertFalseFailed % Verify exception when false is passed to assertFalse. assertExceptionThrown(@() assertFalse(true), 'assertFalse:trueCondition'); function testAssertFalseNonscalar % Verify that assertFalse doesn't like nonscalar input. assertExceptionThrown(@() assertFalse(logical([0 0])), 'assertFalse:invalidCondition'); function testAssertFalseNonlogical % Verify that assertFalse doesn't like nonlogical input. assertExceptionThrown(@() assertFalse(0), 'assertFalse:invalidCondition'); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/testAssertTrue.m ================================================ function test_suite = testAssertTrue %testAssertTrue Unit tests for assertTrue % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. initTestSuite; function testAssertTrueHappyCase assertTrue(true); function testAssertTrueHappyCaseWithTwoArgs assertTrue(true, '1.e4 e5 2.Nf3 Nc6'); function testAssertTrueFailed % Verify exception when false is passed to assertTrue. assertExceptionThrown(@() assertTrue(false), 'assertTrue:falseCondition'); function testAssertTrueNonscalar % Verify that assertTrue doesn't like nonscalar input. assertExceptionThrown(@() assertTrue(logical([1 1])), 'assertTrue:invalidCondition'); function testAssertTrueNonlogical % Verify that assertTrue doesn't like nonlogical input. assertExceptionThrown(@() assertTrue(5), 'assertTrue:invalidCondition'); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/testContainsRegexp.m ================================================ function test_suite = testContainsRegexp %testContainsRegexp Unit tests for containsRegexp % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. initTestSuite; function testOneStringContains assertTrue(xunit.utils.containsRegexp('MATLAB is great', '[A-Z]')); function testOneStringDoesntContain assertTrue(~ xunit.utils.containsRegexp('no upper-case letters', '[A-Z]')); function testCellArray strs = {'MATLAB is great', 'no upper-case letters'}; assertEqual(xunit.utils.containsRegexp(strs, '[A-Z]'), [true false]); assertEqual(xunit.utils.containsRegexp(strs', '[A-Z]'), [true; false]); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/testIsSetUpString.m ================================================ function test_suite = testIsSetUpString %testIsSetUpString Unit tests for isSetUpString % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. initTestSuite; function testOneStringIs assertTrue(xunit.utils.isSetUpString('setup')); assertTrue(xunit.utils.isSetUpString('setUp_fixture')); function testOneStringIsNot assertFalse(xunit.utils.isSetUpString('bogus')); function testCellArray strs = {'setup', 'bogus'}; assertEqual(xunit.utils.isSetUpString(strs), [true false]); assertEqual(xunit.utils.isSetUpString(strs'), [true; false]); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/testIsTearDownString.m ================================================ function test_suite = testIsTearDownString %testIsTearDownString Unit tests for isTearDownString % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. initTestSuite; function testOneStringIs assertTrue(xunit.utils.isTearDownString('teardownfoobar')); assertTrue(xunit.utils.isTearDownString('TearDown_foobar')); function testOneStringIsNot assertFalse(xunit.utils.isTearDownString('tEardown')); function testCellArray strs = {'teardown', 'tearup'}; assertEqual(xunit.utils.isTearDownString(strs), [true false]); assertEqual(xunit.utils.isTearDownString(strs'), [true; false]); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/testIsTestCaseSubclass.m ================================================ function test_suite = testIsTestCaseSubclass %testIsTestCaseSubclass Unit tests for isTestCaseSubclass % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. initTestSuite; function testTestCase assertTrue(xunit.utils.isTestCaseSubclass('TestCase')); function testSubclass assertTrue(xunit.utils.isTestCaseSubclass('TestCaseInDir')); function testNotASubclass assertFalse(xunit.utils.isTestCaseSubclass('atan2')); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/testIsTestString.m ================================================ function test_suite = testIsTestString %testIsTestString Unit tests for isTestString % Steven L. Eddins % Copyright 2008 The MathWorks, Inc. initTestSuite; function testOneStringIs assertTrue(xunit.utils.isTestString('testFoobar')); assertTrue(xunit.utils.isTestString('Test_foobar')); function testOneStringIsNot assertFalse(xunit.utils.isTestString('foobar')); function testCellArray strs = {'testFoobar', 'foobar_test', 'foobar', 'foobar_Test'}; assertEqual(xunit.utils.isTestString(strs), [true true false true]); assertEqual(xunit.utils.isTestString(strs'), [true; true; false; true]); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/testRuntestsWithDirectoryName.m ================================================ function test_suite = testRuntestsWithDirectoryName %testRuntestsWithDirectoryName Unit test for mtest('dirname') syntax. initTestSuite; function testDirName current_dir = pwd; target_dir = fullfile(fileparts(which(mfilename)), 'cwd_test'); [T, did_pass] = evalc('runtests(target_dir)'); assertFalse(did_pass); assertEqual(current_dir, pwd); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/test_TestSuiteInDir.m ================================================ function test_suite = test_TestSuiteInDir %test_TestSuiteInDir Unit test for TestSuiteInDir class. % Steven L. Eddins % Copyright 2009 The MathWorks, Inc. initTestSuite; function test_constructor this_test_path = fileparts(which(mfilename)); cwd_test_dir = fullfile(this_test_path, 'cwd_test'); suite = TestSuiteInDir(cwd_test_dir); assertEqual(suite.Name, 'cwd_test'); assertEqual(suite.Location, cwd_test_dir); function test_gatherTestCases this_test_path = fileparts(which(mfilename)); cwd_test_dir = fullfile(this_test_path, 'cwd_test'); suite = TestSuiteInDir(cwd_test_dir); suite.gatherTestCases(); assertEqual(numel(suite.TestComponents), 3); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/test_arrayToString.m ================================================ function test_suite = test_arrayToString %test_arrayToString Unit test for arrayToString. % Steven L. Eddins % Copyright 2009 The MathWorks, Inc. initTestSuite; function test_smallInput A = [1 2 3]; assertEqual(strtrim(xunit.utils.arrayToString(A)), '1 2 3'); function test_largeInput A = zeros(1000, 1000); assertEqual(xunit.utils.arrayToString(A), '[1000x1000 double]'); function test_emptyInput assertEqual(xunit.utils.arrayToString(zeros(1,0,2)), '[1x0x2 double]'); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/test_assertElementsAlmostEqual.m ================================================ function suite = test_assertElementsAlmostEqual initTestSuite; %=============================================================================== function test_happyCase % All code here should execute with no error. assertElementsAlmostEqual(1, 1 + sqrt(eps)/10); assertElementsAlmostEqual(1, 1 + sqrt(eps)/10, 'custom message'); %=============================================================================== function test_failedAssertion f = @() assertElementsAlmostEqual(1, 1 + 10*sqrt(eps)); assertExceptionThrown(f, 'assertElementsAlmostEqual:tolExceeded'); %=============================================================================== function test_nonFloatInputs() assertExceptionThrown(@() assertElementsAlmostEqual('hello', 'world'), ... 'assertElementsAlmostEqual:notFloat'); %=============================================================================== function test_sizeMismatch() assertExceptionThrown(@() assertElementsAlmostEqual(1, [1 2]), ... 'assertElementsAlmostEqual:sizeMismatch'); function test_finiteAndInfinite() assertExceptionThrown(@() assertElementsAlmostEqual(1, Inf), ... 'assertElementsAlmostEqual:tolExceeded'); function test_infiniteAndInfinite() assertElementsAlmostEqual(Inf, Inf); function test_finiteAndNaN() assertExceptionThrown(@() assertElementsAlmostEqual(1, NaN), ... 'assertElementsAlmostEqual:tolExceeded'); function test_nanAndNaN() assertElementsAlmostEqual(NaN, NaN); function test_plusMinusInfinity() assertExceptionThrown(@() assertElementsAlmostEqual(+Inf, -Inf), ... 'assertElementsAlmostEqual:tolExceeded'); function test_infiniteAndNaN() assertExceptionThrown(@() assertElementsAlmostEqual(Inf, NaN), ... 'assertElementsAlmostEqual:tolExceeded'); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/test_assertFilesEqual.m ================================================ function test_suite = test_assertFilesEqual %test_assertFilesEqual Unit test for assertFilesEqual % Steven L. Eddins % Copyright 2009 The MathWorks, Inc. initTestSuite; function test_equal assertFilesEqual('black.tif', 'black.tif'); function test_differentSize assertExceptionThrown(@() assertFilesEqual('black.tif', 'black.png'), ... 'assertFilesEqual:sizeMismatch'); function test_sameSizeButDifferent assertExceptionThrown(@() assertFilesEqual('black.tif', 'almost_black.tif'), ... 'assertFilesEqual:valuesDiffer'); function test_oneFileEmpty assertExceptionThrown(@() assertFilesEqual('empty_file', 'black.png'), ... 'assertFilesEqual:sizeMismatch'); function test_bothFilesEmpty assertFilesEqual('empty_file', 'empty_file'); function test_cannotReadFirstFile assertExceptionThrown(@() assertFilesEqual('bogus', 'black.png'), ... 'assertFilesEqual:readFailure'); function test_cannotReadSecondFile assertExceptionThrown(@() assertFilesEqual('black.png', 'bogus'), ... 'assertFilesEqual:readFailure'); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/test_assertVectorsAlmostEqual.m ================================================ function suite = test_assertVectorsAlmostEqual initTestSuite; %=============================================================================== function test_happyCase A = [1 1e10]; B = [2 1e10]; % All code here should execute with no error. assertVectorsAlmostEqual(A, B); assertVectorsAlmostEqual(A, B, 'custom message'); %=============================================================================== function test_failedAssertion A = [1 1e6]; B = [2 1e6]; f = @() assertVectorsAlmostEqual(A, B); assertExceptionThrown(f, 'assertVectorsAlmostEqual:tolExceeded'); %=============================================================================== function test_failedAssertionWithCustomMessage A = [1 1e6]; B = [2 1e6]; f = @() assertVectorsAlmostEqual(A, B, 'my message'); assertExceptionThrown(f, 'assertVectorsAlmostEqual:tolExceeded'); %=============================================================================== function test_nonFloatInputs() assertExceptionThrown(@() assertVectorsAlmostEqual('hello', 'world'), ... 'assertVectorsAlmostEqual:notFloat'); %=============================================================================== function test_sizeMismatch() assertExceptionThrown(@() assertVectorsAlmostEqual(1, [1 2]), ... 'assertVectorsAlmostEqual:sizeMismatch'); %=============================================================================== function test_finiteAndInfinite() assertExceptionThrown(@() assertVectorsAlmostEqual([1 2], [1 Inf]), ... 'assertVectorsAlmostEqual:tolExceeded'); %=============================================================================== function test_infiniteAndInfinite assertExceptionThrown(@() assertVectorsAlmostEqual([1 Inf], [1 Inf]), ... 'assertVectorsAlmostEqual:tolExceeded'); %=============================================================================== function test_finiteAndNaN assertExceptionThrown(@() assertVectorsAlmostEqual([1 2], [1 NaN]), ... 'assertVectorsAlmostEqual:tolExceeded'); %=============================================================================== function test_NanAndNan assertExceptionThrown(@() assertVectorsAlmostEqual([1 NaN], [1 NaN]), ... 'assertVectorsAlmostEqual:tolExceeded'); %=============================================================================== function test_oppositeSignInfs assertExceptionThrown(@() assertVectorsAlmostEqual([1 Inf], [1 -Inf]), ... 'assertVectorsAlmostEqual:tolExceeded'); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/test_compareFloats.m ================================================ function suite = test_compareFloats initTestSuite; %=============================================================================== function test_elementwiseRelativeTolerance tol = 0.1; floor_tol = 0.01; assertTrue(xunit.utils.compareFloats([10 20], [11 20], 'elementwise', ... 'relative', tol, floor_tol)); assertFalse(xunit.utils.compareFloats([10 20], [11.2 20], 'elementwise', ... 'relative', tol, floor_tol)); % Verify floor tolerance assertTrue(xunit.utils.compareFloats([0.001 1], [0.010 1], 'elementwise', ... 'relative', tol, floor_tol)); %=============================================================================== function test_elementwiseAbsoluteTolerance assertTrue(xunit.utils.compareFloats([10 20], [10.1 20], 'elementwise', ... 'absolute', 0.1)); assertFalse(xunit.utils.compareFloats([10 20], [10.1001 20], 'elementwise', ... 'absolute', 0.1)); %=============================================================================== function test_vectorRelativeTolerance % The A-B pair below would fail an elementwise test. A = [1 10]; B = [1.5 10]; tol = 0.05; assertTrue(xunit.utils.compareFloats(A, B, 'vector', 'relative', tol)); B = [1.6 10]; assertFalse(xunit.utils.compareFloats(A, B, 'vector', 'relative', tol)); %=============================================================================== function test_vectorAbsoluteTolerance A = [1 10]; B = [1.4 10]; assertTrue(xunit.utils.compareFloats(A, B, 'vector', 'absolute', 0.5)); assertFalse(xunit.utils.compareFloats(A, B, 'vector', 'absolute', 0.3)); %=============================================================================== function test_NaNs % NaNs in the same spots are OK. A = [1 1 1 NaN 1 1 1 NaN 1]; B = [1 1 1 NaN 1 1 1 NaN 1]; assertTrue(xunit.utils.compareFloats(A, B)); % NaNs in different spots are not OK. B2 = [1 1 NaN NaN 1 1 1 NaN 1]; assertFalse(xunit.utils.compareFloats(A, B2)); %=============================================================================== function test_Infs % Infinities in the same locations are OK if they have the same sign. assertTrue(xunit.utils.compareFloats([1 2 3 Inf 4 5], [1 2 3 Inf 4 5])); assertTrue(xunit.utils.compareFloats([1 2 3 -Inf 4 5], [1 2 3 -Inf 4 5])); assertFalse(xunit.utils.compareFloats([1 2 3 Inf 4 5], [1 2 3 -Inf 4 5], ... 'elementwise', 'absolute')); %=============================================================================== function test_complexInput % Real and imaginary parts are compared separately. assertTrue(xunit.utils.compareFloats(1, 1+0.09i, 'elementwise', 'absolute', 0.1)); assertFalse(xunit.utils.compareFloats(1, 1+0.11i, 'elementwise', 'absolute', 0.1)); %=============================================================================== function test_comparisonTypeSpecified % Verify handling of third input argument, the comparison type. The rest of the % input syntax is handled by parseFloatAssertInputs and tested by the unit test % for that function. % The A-B pair below fails using elementwise comparison but passes using vector % comparison. A = [1.5 10]; B = [1 10]; tol = 0.1; assertFalse(xunit.utils.compareFloats(A, B, 'elementwise', 'relative', tol)); assertTrue(xunit.utils.compareFloats(A, B, 'vector', 'relative', tol)); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/test_comparisonMessage.m ================================================ function test_suite = test_comparisonMessage %test_comparisonMessage Unit test for comparisonMessage. % Steven L. Eddins % Copyright 2009 The MathWorks, Inc. initTestSuite; function test_happyCase s = xunit.utils.comparisonMessage('user message', 'assertion message', ... [1 2 3], 'hello'); c = xunit.utils.stringToCellArray(s); expected_output = { 'user message' 'assertion message' '' 'First input:' ' 1 2 3' '' 'Second input:' 'hello'}; assertEqual(c, expected_output); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/test_packageName.m ================================================ function test_suite = test_packageName initTestSuite; function test_happyCase suite = TestSuite.fromPackageName('xunit.mocktests'); assertEqual(numel(suite.TestComponents), 5); assertEqual(numel(suite.TestComponents{1}.TestComponents), 1); assertEqual(suite.TestComponents{1}.Name, 'xunit.mocktests.subpkg'); assertEqual(numel(suite.TestComponents{2}.TestComponents), 2); assertEqual(suite.TestComponents{2}.Name, 'xunit.mocktests.A'); assertEqual(numel(suite.TestComponents{3}.TestComponents), 1); assertEqual(suite.TestComponents{3}.Name, 'xunit.mocktests.FooTest'); assertEqual(numel(suite.TestComponents{4}.TestComponents), 2); assertEqual(suite.TestComponents{4}.Name, 'test_that'); assertEqual(numel(suite.TestComponents{5}.TestComponents), 1); assertEqual(suite.TestComponents{5}.Name, 'xunit.mocktests.test_this'); function test_badPackageName assertExceptionThrown(@() TestSuite.fromPackageName('bogus'), ... 'xunit:fromPackageName:invalidName'); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/test_parseFloatAssertInputs.m ================================================ function suite = test_parseFloatAssertInputs initTestSuite; %=============================================================================== function test_tooFewInputs() assertExceptionThrown(@() xunit.utils.parseFloatAssertInputs(), ... 'MATLAB:nargchk:notEnoughInputs'); %=============================================================================== function test_tooManyInputs() assertExceptionThrown(@() xunit.utils.parseFloatAssertInputs(1,2,3,4,5,6,7), ... 'MATLAB:nargchk:tooManyInputs'); %=============================================================================== function test_twoInputs() params = xunit.utils.parseFloatAssertInputs(1, 2); assertEqual(params.A, 1); assertEqual(params.B, 2); assertEqual(params.ToleranceType, 'relative'); assertEqual(params.Tolerance, sqrt(eps)); assertEqual(params.FloorTolerance, sqrt(eps)); assertEqual(params.Message, ''); %=============================================================================== function test_threeInputs() expected.A = 1; expected.B = 2; expected.ToleranceType = 'relative'; expected.Tolerance = sqrt(eps); expected.FloorTolerance = sqrt(eps); expected.Message = ''; params = xunit.utils.parseFloatAssertInputs(1, 2, 'relative'); assertEqual(params, expected); params = xunit.utils.parseFloatAssertInputs(1, 2, 'absolute'); expected.ToleranceType = 'absolute'; assertEqual(params, expected); params = xunit.utils.parseFloatAssertInputs(1, 2, 'message'); expected.ToleranceType = 'relative'; expected.Message = 'message'; assertEqual(params, expected); %=============================================================================== function test_fourInputs() expected.A = 1; expected.B = 2; expected.ToleranceType = 'absolute'; expected.Tolerance = sqrt(eps); expected.FloorTolerance = sqrt(eps); expected.Message = ''; params = xunit.utils.parseFloatAssertInputs(1, 2, 'absolute', 0.1); expected.Tolerance = 0.1; assertEqual(params, expected); params = xunit.utils.parseFloatAssertInputs(1, 2, 'absolute', 'message'); expected.Tolerance = sqrt(eps); expected.Message = 'message'; assertEqual(params, expected); %=============================================================================== function test_fiveInputs() expected.A = 1; expected.B = 2; expected.ToleranceType = 'absolute'; expected.Tolerance = 0.1; expected.FloorTolerance = 0.05; expected.Message = ''; params = xunit.utils.parseFloatAssertInputs(1, 2, 'absolute', 0.1, 0.05); assertEqual(params, expected); params = xunit.utils.parseFloatAssertInputs(1, 2, 'absolute', 0.1, 'message'); expected.FloorTolerance = sqrt(eps); expected.Message = 'message'; assertEqual(params, expected); %=============================================================================== function test_sixInputs() expected.A = 1; expected.B = 2; expected.ToleranceType = 'absolute'; expected.Tolerance = 0.1; expected.FloorTolerance = 0.05; expected.Message = 'message'; params = xunit.utils.parseFloatAssertInputs(1, 2, 'absolute', 0.1, 0.05, 'message'); assertEqual(params, expected); %=============================================================================== function test_twoSingleInputs() expected.A = 1; expected.B = 2; expected.ToleranceType = 'relative'; expected.Tolerance = sqrt(eps('single')); expected.FloorTolerance = sqrt(eps('single')); expected.Message = ''; params = xunit.utils.parseFloatAssertInputs(single(1), single(2)); assertEqual(params, expected); %=============================================================================== function test_twoSingleAndDoubleInputs() expected.A = 1; expected.B = 2; expected.ToleranceType = 'relative'; expected.Tolerance = sqrt(eps('single')); expected.FloorTolerance = sqrt(eps('single')); expected.Message = ''; params = xunit.utils.parseFloatAssertInputs(single(1), double(2)); assertEqual(params, expected); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/test_stringToCellArray.m ================================================ function test_suite = test_stringToCellArray %test_stringToCellArray Unit test for stringToCellArray % Steven L. Eddins % Copyright 2009 The MathWorks, Inc. initTestSuite; function test_happyCase s = sprintf('Hello\nWorld'); assertEqual(xunit.utils.stringToCellArray(s), {'Hello' ; 'World'}); function test_emptyInput assertEqual(xunit.utils.stringToCellArray(''), cell(0, 1)); function test_spacesInFront s = sprintf(' Hello\n World\n'); assertEqual(xunit.utils.stringToCellArray(s), {' Hello' ; ' World'}); function test_spacesAtEnd s = sprintf('Hello \nWorld '); assertEqual(xunit.utils.stringToCellArray(s), {'Hello ' ; 'World '}); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/Contents.m ================================================ % UTILS Utility package for MATLAB xUnit Test Framework % % Array Comparison % compareFloats - Compare floating-point arrays using tolerance % % Test Case Discovery Functions % isTestCaseSubclass - True for name of TestCase subclass % % String Functions % arrayToString - Convert array to string for display % comparisonMessage - Assertion message string for comparing two arrays % containsRegexp - True if string contains regular expression % isSetUpString - True for string that looks like a setup function % isTearDownString - True for string that looks like teardown function % isTestString - True for string that looks like a test function % stringToCellArray - Convert string to cell array of strings % % Miscellaneous Functions % generateDoc - Publish test scripts in mtest/doc % parseFloatAssertInputs - Common input-parsing logic for several functions % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/arrayToString.m ================================================ function s = arrayToString(A) %arrayToString Convert array to string for display. % S = arrayToString(A) converts the array A into a string suitable for % including in assertion messages. Small arrays are converted using disp(A). % Large arrays are displayed similar to the way structure field values display % using disp. % Steven L. Eddins % Copyright 2009 The MathWorks, Inc. if isTooBigToDisp(A) s = dispAsStructField(A); else s = dispAsArray(A); end %=============================================================================== function tf = isTooBigToDisp(A) % Use a heuristic to determine if the array is to convert to a string using % disp. The heuristic is based on the size of the array in bytes, as reported % by the whos function. whos_output = whos('A'); byte_threshold = 1000; tf = whos_output.bytes > byte_threshold; %=============================================================================== function s = dispAsArray(A) % Convert A to a string using disp. Remove leading and trailing blank lines. s = evalc('disp(A)'); if isempty(s) % disp displays nothing for some kinds of empty arrays. s = dispAsStructField(A); else s = postprocessDisp(s); end %=============================================================================== function s = dispAsStructField(A) % Convert A to a string using structure field display. b.A = A; s = evalc('disp(b)'); s = postprocessStructDisp(s); %=============================================================================== function out = postprocessDisp(in) % Remove leading and trailing blank lines from input string. Don't include a % newline at the end. lines = xunit.utils.stringToCellArray(in); % Remove leading blank lines. lines = removeLeadingBlankLines(lines); % Remove trailing blank lines. while ~isempty(lines) && isBlankLine(lines{end}) lines(end) = []; end % Convert cell of strings to single string with newlines. Don't put a newline % at the end. out = sprintf('%s\n', lines{1:end-1}); out = [out, lines{end}]; %=============================================================================== function out = postprocessStructDisp(in) % Return the portion of the display string to the right of the colon in the % output of the first structure field. Input is a string. lines = xunit.utils.stringToCellArray(in); % Remove leading blank lines lines = removeLeadingBlankLines(lines); line = lines{1}; idx = find(line == ':'); out = line((idx+2):end); % struct fields display with blank space following colon %=============================================================================== function out = removeLeadingBlankLines(in) % Input and output are cell arrays of strings. out = in; while ~isempty(out) && isBlankLine(out{1}) out(1) = []; end %=============================================================================== function tf = isBlankLine(line) % Input is a string. tf = all(isspace(line)); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/compareFloats.m ================================================ function result = compareFloats(varargin) %compareFloats Compare floating-point arrays using tolerance. % result = compareFloats(A, B, compare_type, tol_type, tol, floor_tol) % compares the floating-point arrays A and B using a tolerance. compare_type % is either 'elementwise' or 'vector'. tol_type is either 'relative' or % 'absolute'. tol and floor_tol are the scalar tolerance values. % % There are four different tolerance tests used, depending on the comparison % type and the tolerance type: % % 1. Comparison type: 'elementwise' Tolerance type: 'relative' % % all( abs(A(:) - B(:)) <= tol * max(abs(A(:)), abs(B(:))) + floor_tol ) % % 2. Comparison type: 'elementwise' Tolerance type: 'absolute' % % all( abs(A(:) - B(:) <= tol ) % % 3. Comparison type: 'vector' Tolerance type: 'relative' % % norm(A(:) - B(:) <= tol * max(norm(A(:)), norm(B(:))) + floor_tol % % 4. Comparison type: 'vector' Tolerance type: 'absolute' % % norm(A(:) - B(:)) <= tol % % Note that floor_tol is not used when the tolerance type is 'absolute'. % % compare_type, tol_type, tol, and floor_tol are all optional inputs. The % default value for compare_type is 'elementwise'. The default value for % tol_type is 'relative'. If both A and B are double, then the default value % for tol is sqrt(eps), and the default value for floor_tol is eps. If either % A or B is single, then the default value for tol is sqrt(eps('single')), and % the default value for floor_tol is eps('single'). % % If A or B is complex, then the tolerance test is applied independently to % the real and imaginary parts. % % For elementwise comparisons, compareFloats returns true for two elements % that are both NaN, or for two infinite elements that have the same sign. % For vector comparisons, compareFloats returns false if any input elements % are infinite or NaN. % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. if nargin >= 3 % compare_type specified. Grab it and then use parseFloatAssertInputs to % process the remaining input arguments. compare_type = varargin{3}; varargin(3) = []; if isempty(strcmp(compare_type, {'elementwise', 'vector'})) error('compareFloats:unrecognizedCompareType', ... 'COMPARE_TYPE must be ''elementwise'' or ''vector''.'); end else compare_type = 'elementwise'; end params = xunit.utils.parseFloatAssertInputs(varargin{:}); A = params.A(:); B = params.B(:); switch compare_type case 'elementwise' magFcn = @abs; case 'vector' magFcn = @norm; otherwise error('compareFloats:unrecognizedCompareType', ... 'COMPARE_TYPE must be ''elementwise'' or ''vector''.'); end switch params.ToleranceType case 'relative' coreCompareFcn = @(A, B) magFcn(A - B) <= ... params.Tolerance * max(magFcn(A), magFcn(B)) + ... params.FloorTolerance; case 'absolute' coreCompareFcn = @(A, B) magFcn(A - B) <= params.Tolerance; otherwise error('compareFloats:unrecognizedToleranceType', ... 'TOL_TYPE must be ''relative'' or ''absolute''.'); end if strcmp(compare_type, 'elementwise') compareFcn = @(A, B) ( coreCompareFcn(A, B) | bothNaN(A, B) | sameSignInfs(A, B) ) & ... ~oppositeSignInfs(A, B) & ... ~finiteAndInfinite(A, B); else compareFcn = @(A, B) coreCompareFcn(A, B) & ... isfinite(magFcn(A)) & ... isfinite(magFcn(B)); end if isreal(A) && isreal(B) result = compareFcn(A, B); else result = compareFcn(real(A), real(B)) & compareFcn(imag(A), imag(B)); end result = all(result); %=============================================================================== function out = bothNaN(A, B) out = isnan(A) & isnan(B); %=============================================================================== function out = oppositeSignInfs(A, B) out = isinf(A) & isinf(B) & (sign(A) ~= sign(B)); %=============================================================================== function out = sameSignInfs(A, B) out = isinf(A) & isinf(B) & (sign(A) == sign(B)); %=============================================================================== function out = finiteAndInfinite(A, B) out = xor(isinf(A), isinf(B)); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/comparisonMessage.m ================================================ function msg = comparisonMessage(user_message, assertion_message, A, B) %comparisonMessage Generate assertion message when comparing two arrays. % msg = comparisonMessage(user_message, assertion_message, A, B) returns a % string appropriate to use in a call to throw inside an assertion function % that compares two arrays A and B. % % The string returned has the following form: % % % % % First input: % % % Second input: % % % user_message can be the empty string, '', in which case user_message is % skipped. % Steven L. Eddins % Copyright 2009 The MathWorks, Inc. msg = sprintf('%s\n\n%s\n%s\n\n%s\n%s', ... assertion_message, ... 'First input:', ... xunit.utils.arrayToString(A), ... 'Second input:', ... xunit.utils.arrayToString(B)); if ~isempty(user_message) msg = sprintf('%s\n%s', user_message, msg); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/containsRegexp.m ================================================ function tf = containsRegexp(str, exp) %containsRegexp True if string contains regular expression % TF = containsRegexp(str, exp) returns true if the string str contains the % regular expression exp. If str is a cell array of strings, then % containsRegexp tests each string in the cell array, returning the results in % a logical array with the same size as str. % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. % Convert to canonical input form: A cell array of strings. if ~iscell(str) str = {str}; end matches = regexp(str, exp); tf = ~cellfun('isempty', matches); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/generateDoc.m ================================================ function generateDoc %generateDoc Publish the example scripts in the doc directory % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. doc_dir = fullfile(fileparts(which('runtests')), '..', 'doc'); addpath(doc_dir); cd(doc_dir) mfiles = dir('*.m'); for k = 1:numel(mfiles) publish(mfiles(k).name); cd(doc_dir) end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/isAlmostEqual.m ================================================ function same = isAlmostEqual(A, B, reltol) %isAlmostEqual Equality test using relative tolerance % same = isAlmostEqual(A, B, reltol), for two floating-point arrays A and B, % tests A and B for equality using the specified relative tolerance. % isAlmostEqual returns true if the following relationship is satisfied for % all values in A and B: % % abs(A - B) ./ max(abs(A), abs(B)) <= reltol % % same = isAlmostEqual(A, B) uses the following value for the relative % tolerance: % % 100 * max(eps(class(A)), eps(class(B))) % % If either A or B is not a floating-point array, then isAlmostEqual returns % the result of isequal(A, B). % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. if ~isfloat(A) || ~isfloat(B) same = isequal(A, B); return end if nargin < 3 reltol = 100 * max(eps(class(A)), eps(class(B))); end if ~isequal(size(A), size(B)) same = false; return end A = A(:); B = B(:); delta = abs(A - B) ./ max(max(abs(A), abs(B)), 1); % Some floating-point values require special handling. delta((A == 0) & (B == 0)) = 0; delta(isnan(A) & isnan(B)) = 0; delta((A == Inf) & (B == Inf)) = 0; delta((A == -Inf) & (B == -Inf)) = 0; same = all(delta <= reltol); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/isSetUpString.m ================================================ function tf = isSetUpString(str) %isSetUpString True if string looks like the name of a setup function % tf = isSetUpString(str) returns true if the string str looks like the name % of a setup function. If str is a cell array of strings, then isSetUpString % tests each string in the cell array, returning the results in a logical % array with the same size as str. % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. setup_exp = '^[sS]et[uU]p'; tf = xunit.utils.containsRegexp(str, setup_exp); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/isTearDownString.m ================================================ function tf = isTearDownString(str) %isTearDownString True if string looks like the name of a teardown function % tf = isTearDownString(str) returns true if the string str looks like the % name of a teardown function. If str is a cell array of strings, then % isTearDownString tests each string in the cell array, returning the results % in a logical array with the same size as str. % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. setup_exp = '^[tT]ear[dD]own'; tf = xunit.utils.containsRegexp(str, setup_exp); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/isTestCaseSubclass.m ================================================ function tf = isTestCaseSubclass(name) %isTestCaseSubclass True for name of a TestCase subclass % tf = isTestCaseSubclass(name) returns true if the string name is the name of % a TestCase subclass on the MATLAB path. % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. tf = false; class_meta = meta.class.fromName(name); if isempty(class_meta) % Not the name of a class return; end if strcmp(class_meta.Name, 'TestCase') tf = true; else tf = isMetaTestCaseSubclass(class_meta); end function tf = isMetaTestCaseSubclass(class_meta) tf = false; if strcmp(class_meta.Name, 'TestCase') tf = true; else % Invoke function recursively on parent classes. super_classes = class_meta.SuperClasses; for k = 1:numel(super_classes) if isMetaTestCaseSubclass(super_classes{k}) tf = true; break; end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/isTestString.m ================================================ function tf = isTestString(str) %isTestString True if string looks like the name of a test % tf = isTestString(str) returns true if the string str looks like the name of % a test. If str is a cell array of strings, then isTestString tests each % string in the cell array, returning the results in a logical array with the % same size as str. % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. test_at_beginning = '^[tT]est'; test_at_end = '[tT]est$'; tf = xunit.utils.containsRegexp(str, test_at_beginning) | ... xunit.utils.containsRegexp(str, test_at_end); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/parseFloatAssertInputs.m ================================================ function params = parseFloatAssertInputs(varargin) %parseFloatAssertInputs Parse inputs for floating-point assertion functions. % params = parseFloatAssertInputs(varargin) parses the input arguments for % assertElementsAlmostEqual, assertVectorsAlmostEqual, and compareFcn. It % returns a parameter struct containing the fields: % % A B Message ToleranceType Tolerance FloorTolerance % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. error(nargchk(2, 6, nargin, 'struct')); params = struct('A', {[]}, 'B', {[]}, 'ToleranceType', {[]}, ... 'Tolerance', {[]}, 'FloorTolerance', {[]}, 'Message', {''}); % The first two input arguments are always A and B. params.A = varargin{1}; params.B = varargin{2}; varargin(1:2) = []; % If the last argument is a message string, process it and remove it from the list. if (numel(varargin) >= 1) && ischar(varargin{end}) && ... ~any(strcmp(varargin{end}, {'relative', 'absolute'})) params.Message = varargin{end}; varargin(end) = []; else params.Message = ''; end try epsilon = max(eps(class(params.A)), eps(class(params.B))); catch epsilon = eps; end if numel(varargin) < 3 % floor_tol not specified; set default. params.FloorTolerance = sqrt(epsilon); else params.FloorTolerance = varargin{3}; end if numel(varargin) < 2 % tol not specified; set default. params.Tolerance = sqrt(epsilon); else params.Tolerance = varargin{2}; end if numel(varargin) < 1 % tol_type not specified; set default. params.ToleranceType = 'relative'; else params.ToleranceType = varargin{1}; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/stringToCellArray.m ================================================ function c = stringToCellArray(s) %stringToCellArray Convert string with newlines to cell array of strings. % C = stringToCellArray(S) converts the input string S to a cell array of % strings, breaking up S at new lines. % Steven L. Eddins % Copyright 2009 The MathWorks, Inc. if isempty(s) c = cell(0, 1); else c = textscan(s, '%s', 'Delimiter', '\n', 'Whitespace', ''); c = c{1}; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/CommandWindowTestRunDisplay.m ================================================ classdef CommandWindowTestRunDisplay < TestRunDisplay %CommandWindowTestRunDisplay Print test suite execution results to Command Window. % CommandWindowTestRunDisplay is a subclass of TestRunMonitor. If a % CommandWindowTestRunDisplay object is passed to the run method of a % TestComponent, such as a TestSuite or a TestCase, it will print information % to the Command Window as the test run proceeds. % % CommandWindowTestRunDisplay methods: % testComponentStarted - Update Command Window display % testComponentFinished - Update Command Window display % testCaseFailure - Log test failure information % testCaseError - Log test error information % % CommandWindowTestRunDisplay properties: % TestCaseCount - Number of test cases executed % Faults - Struct array of test fault info % % See also TestRunLogger, TestRunMonitor, TestSuite % Steven L. Eddins % Copyright 2008-2010 The MathWorks, Inc. methods function self = CommandWindowTestRunDisplay self = self@TestRunDisplay(1); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/Contents.m ================================================ % MATLAB xUnit Test Framework % Version 3.1 (R2010b) 19-Nov-2010 % % Running Unit Tests % runtests - Run unit tests % % Writing Unit Tests % assertElementsAlmostEqual - Assert floating-point array elements almost equal % assertEqual - Assert that inputs are equal % assertFilesEqual - Assert that two files have the same content % assertExceptionThrown - Assert that specified exception is thrown % assertFalse - Assert that input condition is false % assertTrue - Assert that input condition is true % assertVectorsAlmostEqual - Assert floating-point vectors almost equal in norm sense % initTestSuite - Utility script used for subfunction-based tests % % Framework Classes % CommandWindowTestRunDisplay - Print test suite results to command window % FunctionHandleTestCase - Test case based on a function handle % TestCase - Class defining interface for test cases % TestCaseInDir - Test case requiring temporary directory change % TestCaseWithAddPath - Test case requiring temporary path modification % TestComponent - Abstract base class for TestCase and TestSuite % TestComponentInDir - Test component requiring temporary directory change % TestLogger - Collect data (silently) from running test suite % TestRunDisplay - Print test suite execution results % TestRunMonitor - Abstract base class for monitoring test suite % TestSuite - Collection of TestComponent objects % TestSuiteInDir - Test suite requiring temporary directory change % %VerboseTestRunDisplay - Print test suite execution results % Steven L. Eddins % Copyright 2008-2010 The MathWorks, Inc. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/FunctionHandleTestCase.m ================================================ classdef FunctionHandleTestCase < TestCase %FunctionHandleTestCase Test case based on a function handle % FunctionHandleTestCase is a TestCase subclass. It defines a test case object % that executes by running a function handle instead of by running a method of % the TestCase subclass. % % FunctionHandleTestCase methods: % FunctionHandleTestCase - Constructor % runTestCase - Run function handle test % setUp - Run test-fixture setup function % tearDown - Run test-fixture teardown function % % FunctionHandleTestCase properties: % TestFcn - Function handle of test function % SetupFcn - Function handle of setup function % TeardownFcn - Function handle of teardown function % TestData - Data needed by test function or teardown function % % See also TestCase, TestSuite % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. properties (SetAccess = protected, GetAccess = protected, Hidden = true) %TestFcn - Function handle of test function % If SetupFcn has one or more output arguments, then TestFcn is % called with this syntax: % % TestFcn(data) % % where data is the return value from SetupFcn. Otherwise, TestFcn is % called with no input and no output arguments. TestFcn; %SetupFcn - Function handle of setup function % If SetupFcn has one or more output arguments, then SetupFcn is % called with this syntax: % % data = SetupFcn() % % and data will be saved in the TestData property. Otherwise, SetupFcn % is called with no input and no output arguments. SetupFcn; %TeardownFcn - Function handle of teardown function % If SetupFcn has one or more output arguments, then TeardownFcn is % called with this syntax: % % TeardownFcn(data) % % were data is the return value from SetupFcn. Otherwise, TeardownFcn % is called with no input and no output arguments. TeardownFcn; %TestData - Data needed by test function or teardown function. TestData; end methods function self = FunctionHandleTestCase(testFcn, setupFcn, teardownFcn) %FunctionHandleTestCase Constructor % FunctionHandleTestCase(testFcn, setupFcn, teardownFcn) creates a % TestCase object that executes by running the function handle % TestFcn. setupFcn is a function handle that will be executed % before testFcn, and teardownFcn is a function handle that will % be executed after TestFcn. Either setupFcn or teardownFcn can % be empty. % % If setupFcn is function handle that has one output argument, % then the three test functions will be called using these % syntaxes: % % testData = setupFcn(); % testFcn(testData); % teardownFcn(testData); % % Otherwise, the three test functions are all called with no input % arguments: % % setupFcn(); % TestFcn(); % teardownFcn(); % Call the base class constructor. Give it the name of the % FunctionHandleTestCase method that executes TestFcn. self = self@TestCase('runTestCase'); self.TestFcn = testFcn; self.SetupFcn = setupFcn; self.TeardownFcn = teardownFcn; % Determine the name and M-file location of the function handle. functionHandleInfo = functions(testFcn); self.Name = functionHandleInfo.function; if strcmp(functionHandleInfo.type, 'anonymous') % Anonymous function handles don't have an M-file location. self.Location = ''; else self.Location = functionHandleInfo.file; end end function runTestCase(self) %runTestCase Run function handle test % test_case.run() calls the test function handle. If a nonempty % SetupFcn was provided and it has at least one output argument, % pass self.TestData to the test function. Otherwise, call the % test function with no input arguments. if ~isempty(self.SetupFcn) && nargout(self.SetupFcn) > 0 self.TestFcn(self.TestData); else self.TestFcn(); end end function setUp(self) %setUp Run test-fixture setup function % If a nonempty SetupFcn was provided, run it. If the SetupFcn % has at least one output argument, capture the first output % argument in instance data (TestData). if ~isempty(self.SetupFcn) if nargout(self.SetupFcn) > 0 if nargout(self.SetupFcn) > 1 message = sprintf(['A test fixture setup function returns more than one output argument. ', ... 'The test harness only calls the setup function with one output argument. ', ... 'Return a struct or a cell array from your setup function if you need to bundle several parts together.', ... '\nTest name: %s\nTest location: %s'], ... self.Name, self.Location); warning('xunit:FunctionHandleTestCase:TooManySetupOutputs', ... '%s', message); end self.TestData = self.SetupFcn(); else self.SetupFcn(); end end end function tearDown(self) %tearDown Run test-fixture teardown function % If a nonempty TeardownFcn was provided, run it. If there is % TestData (the output of the SetupFcn), then pass it to % TeardownFcn. Otherwise, call TeardownFcn with no input % arguments. if ~isempty(self.TeardownFcn) if ~isempty(self.SetupFcn) && (nargout(self.SetupFcn) > 0) self.TeardownFcn(self.TestData); else self.TeardownFcn(); end end end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/TestCase.m ================================================ %TestCase Class defining interface for test cases % The TestCase class defines an individual test case. % % Normally a test writer will create their own test class that is a subclass % of TestCase. Each instance of the TestCase subclass that gets created will % be associated with a single test method. % % If a test fixture is needed, override the setUp() and tearDown() methods. % % TestSuite(subclass_name), where subclass_name is the name of a TestCase % subclass, creates a test suite containing one TestCase instance per test % method contained in the subclass. % % A simpler test-writing alternative to use subfunction-based M-file tests. % See the MATLAB xUnit documentation. % % TestCase methods: % TestCase - Constructor % run - Execute the test case % % TestCase properties: % Location - Location of M-file containing the test case % Name - Name of test case % % See also TestComponent, TestSuite % Steven L. Eddins % Copyright 2008-2010 The MathWorks, Inc. classdef TestCase < TestComponent properties MethodName end methods function self = TestCase(testMethod) %TestCase Constructor % TestCase(methodName) constructs a TestCase object using the % specified testMethod (a string). self.MethodName = testMethod; self.Name = testMethod; self.Location = which(class(self)); end function did_pass = run(self, monitor) %run Execute the test case % test_case.run(monitor) calls the TestCase object's setUp() % method, then the test method, then the tearDown() method. % observer is a TestRunObserver object. The testStarted(), % testFailure(), testError(), and testFinished() methods of % observer are called at the appropriate times. monitor is a % TestRunMonitor object. Typically it is either a TestRunLogger % subclass or a CommandWindowTestRunDisplay subclass. % % test_case.run() automatically uses a % CommandWindowTestRunDisplay object in order to print test % suite execution information to the Command Window. if nargin < 2 monitor = CommandWindowTestRunDisplay(); end did_pass = true; monitor.testComponentStarted(self); try self.setUp(); f = str2func(self.MethodName); try % Call the test method. f(self); catch failureException monitor.testCaseFailure(self, failureException); did_pass = false; end self.tearDown(); catch errorException monitor.testCaseError(self, errorException); did_pass = false; end monitor.testComponentFinished(self, did_pass); end function num = numTestCases(self) num = 1; end function print(self, numLeadingBlanks) if nargin < 2 numLeadingBlanks = 0; end fprintf('%s%s\n', blanks(numLeadingBlanks), self.Name); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/TestCaseInDir.m ================================================ %TestCaseInDir Test case requiring temporary directory change % The TestCaseInDir class defines a test case that has to be run by first % changing to a specified directory. % % The setUp method adds the starting directory to the path and then uses cd to % change into the specified directory. The tearDown method restores the % original path and directory. % % TestCaseInDir is used by MATLAB xUnit's own test suite in order to test itself. % % TestCaseInDir methods: % TestCaseInDir - Constructor % % See also TestCase, TestCaseWithAddPath, TestComponent % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. classdef TestCaseInDir < TestCase & TestComponentInDir methods function self = TestCaseInDir(methodName, testDirectory) %TestCaseInDir Constructor % TestCaseInDir(testName, testDirectory) constructs a test case % using the specified name and located in the specified directory. self = self@TestCase(methodName); self = self@TestComponentInDir(testDirectory); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/TestCaseWithAddPath.m ================================================ %TestCaseInDir Test case requiring temporary path modification % The TestCaseInDir class defines a test case that has to be run by first % adding a specific directory to the path. % % The setUp method adds the directory to the path, and the tearDown method % restores the original path. % % TestCaseWithAddPath is used by MATLAB xUnit's own test suite in order to test % itself. % % TestCaseWithAddPath methods: % TestCaseWithAddPath - Constructor % setUp - Add test directory to MATLAB path % tearDown - Restore original MATLAB path % % See also TestCase, TestCaseInDir % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. classdef TestCaseWithAddPath < TestCase properties (SetAccess = private, GetAccess = private) %TestDirectory - Directory to be added to the path TestDirectory %OriginalPath - Path prior to adding the test directory OriginalPath end methods function self = TestCaseWithAddPath(methodName, testDirectory) %TestCaseInDir Constructor % TestCaseInDir(testName, testDirectory) constructs a test case % using the specified name and located in the specified directory. self = self@TestCase(methodName); self.TestDirectory = testDirectory; end function setUp(self) %setUp Add test directory to MATLAB path. % test_case.setUp() saves the current path in the OriginalPath % property and then adds the TestDirectory to the MATLAB path. self.OriginalPath = path; addpath(self.TestDirectory); end function tearDown(self) %tearDown Restore original MATLAB path % test_case.tearDown() restores the saved MATLAB path. path(self.OriginalPath); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/TestComponent.m ================================================ classdef TestComponent < handle %TestComponent Abstract base class for TestCase and TestSuite % % TestComponent methods: % run - Run all test cases in test component % print - Display summary of test component to Command Window % numTestCases - Number of test cases in test component % setUp - Initialize test fixture % tearDown - Clean up text fixture % % TestComponent properties: % Name - Name of test component % Location - Directory where test component is defined % % See TestCase, TestSuite % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. properties Name = ''; Location = ''; end properties (Access = 'protected') PrintIndentationSize = 4 end methods (Abstract) print() %print Display summary of test component to Command Window % obj.print() displays information about the test component to the % Command Window. run() %run Execute test cases % obj.run() executes all the test cases in the test component numTestCases() %numTestCases Number of test cases in test component end methods function setUp(self) %setUp Set up test fixture % test_component.setUp() is called at the beginning of the run() % method. Test writers can override setUp if necessary to % initialize a test fixture. end function tearDown(self) %tearDown Tear down test fixture % test_component.tearDown() is at the end of the method. Test % writers can override tearDown if necessary to clean up a test % fixture. end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/TestComponentInDir.m ================================================ %TestComponentInDir Test component requiring temporary directory change % The TestComponentInDir class defines a test component that has to be run by % first changing to a specified directory. % % The setUp method adds the starting directory to the path and then uses cd to % change into the specified directory. The tearDown method restores the % original path and directory. % % TestComponentInDir methods: % TestComponentInDir - Constructor % setUp - Add test directory to MATLAB path % tearDown - Restore original MATLAB path % % See also TestComponent % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. classdef TestComponentInDir < TestComponent properties (SetAccess = private, GetAccess = protected) %TestDirectory - Directory to change to in the test fixture TestDirectory %OriginalPath - Path prior to adding the starting directory OriginalPath %OriginalDirectory - Starting directory OriginalDirectory end methods function self = TestComponentInDir(testDirectory) %TestCaseInDir Constructor % TestCaseInDir(testName, testDirectory) constructs a test case % using the specified name and located in the specified directory. self.TestDirectory = testDirectory; end function setUp(self) %setUp Add test directory to MATLAB path % test_case.setUp() saves the current directory in the % OriginalDirectory property, saves the current path in the % OriginalPath property, and then uses cd to change into the test % directory. self.OriginalDirectory = pwd; self.OriginalPath = path; addpath(pwd); cd(self.TestDirectory); end function tearDown(self) %tearDown Restore original MATLAB path and directory % test_case.tearDown() restores the original path and directory. cd(self.OriginalDirectory); path(self.OriginalPath); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/TestRunDisplay.m ================================================ classdef TestRunDisplay < TestRunMonitor %TestRunDisplay Print test suite execution results. % TestRunDisplay is a subclass of TestRunMonitor. If a TestRunDisplay % object is passed to the run method of a TestComponent, such as a % TestSuite or a TestCase, it will print information to the Command % Window (or specified file handle) as the test run proceeds. % % TestRunDisplay methods: % testComponentStarted - Update Command Window display % testComponentFinished - Update Command Window display % testCaseFailure - Log test failure information % testCaseError - Log test error information % % TestRunDisplay properties: % TestCaseCount - Number of test cases executed % Faults - Struct array of test fault info % % See also TestRunLogger, TestRunMonitor, TestSuite % Steven L. Eddins % Copyright 2008-2010 The MathWorks, Inc. properties (SetAccess = private) %TestCaseCount - Number of test cases executed TestCaseCount %Faults - Struct array of test fault info % Faults is a struct array with these fields: % Type - either 'failure' or 'error' % TestCase - the TestCase object that suffered the fault % Exception - the MException thrown when the fault occurred Faults = struct('Type', {}, 'TestCase', {}, 'Exception', {}); end properties (SetAccess = private, GetAccess = private) %InitialTic - Out of tic at beginning of test run InitialTic %InitialComponent First test component executed % InitialComponent is set to the first test component executed in the % test run. This component is saved so that the end of the test run % can be identified. InitialComponent = [] end properties (Access = protected) %FileHandle - Handle used by fprintf for displaying results. % Default value of 1 displays to Command Window. FileHandle = 1 end methods function self = TestRunDisplay(output) if nargin > 0 if ischar(output) self.FileHandle = fopen(output, 'w'); if self.FileHandle < 0 error('xunit:TestRunDisplay:FileOpenError', ... 'Could not open file "%s" for writing.', ... filename); end else self.FileHandle = output; end end end function testComponentStarted(self, component) %testComponentStarted Update Command Window display % If the InitialComponent property is not yet set, % obj.testComponentStarted(component) sets the property and calls % obj.testRunStarted(component). if isempty(self.InitialComponent) self.InitialComponent = component; self.testRunStarted(component); end end function testComponentFinished(self, component, did_pass) %testComponentFinished Update Command Window display % If component is a TestCase object, then % obj.testComponentFinished(component, did_pass) prints pass/fail % information to the Command Window. % % If component is the InitialComponent, then % obj.testRunFinished(did_pass) is called. if isa(component, 'TestCase') self.TestCaseCount = self.TestCaseCount + 1; if did_pass fprintf(self.FileHandle, '.'); else fprintf(self.FileHandle, 'F'); end line_length = 20; if mod(self.TestCaseCount, line_length) == 0 fprintf(self.FileHandle, '\n'); end end if isequal(component, self.InitialComponent) self.testRunFinished(did_pass); end end function testCaseFailure(self, test_case, failure_exception) %testCaseFailure Log test failure information % obj.testCaseFailure(test_case, failure_exception) logs the test % case failure information. self.logFault('failure', test_case, ... failure_exception); end function testCaseError(self, test_case, error_exception) %testCaseError Log test error information % obj.testCaseError(test_case, error_exception) logs the test % case error information. self.logFault('error', test_case, ... error_exception); end end methods (Access = protected) function testRunStarted(self, component) %testRunStarted Update Command Window display % obj.testRunStarted(component) displays information about the test % run to the Command Window. self.InitialTic = tic; self.TestCaseCount = 0; num_cases = component.numTestCases(); if num_cases == 1 str = 'case'; else str = 'cases'; end fprintf(self.FileHandle, 'Starting test run with %d test %s.\n', ... num_cases, str); end function testRunFinished(self, did_pass) %testRunFinished Update Command Window display % obj.testRunFinished(component) displays information about the test % run results, including any test failures, to the Command Window. if did_pass result = 'PASSED'; else result = 'FAILED'; end fprintf(self.FileHandle, '\n%s in %.3f seconds.\n', result, toc(self.InitialTic)); self.displayFaults(); end function logFault(self, type, test_case, exception) %logFault Log test fault information % obj.logFault(type, test_case, exception) logs test fault % information. type is either 'failure' or 'error'. test_case is a % TestCase object. exception is an MException object. self.Faults(end + 1).Type = type; self.Faults(end).TestCase = test_case; self.Faults(end).Exception = exception; end function displayFaults(self) %displayFaults Display test fault info to Command Window % obj.displayFaults() displays a summary of each test failure and % test error to the command window. for k = 1:numel(self.Faults) faultData = self.Faults(k); if strcmp(faultData.Type, 'failure') str = 'Failure'; else str = 'Error'; end fprintf(self.FileHandle, '\n===== Test Case %s =====\nLocation: %s\nName: %s\n\n', str, ... faultData.TestCase.Location, faultData.TestCase.Name); displayStack(filterStack(faultData.Exception.stack), ... self.FileHandle); fprintf(self.FileHandle, '\n%s\n', faultData.Exception.message); fprintf(self.FileHandle, '\n'); end end end end function displayStack(stack, file_handle) %displayStack Display stack trace from MException instance % displayStack(stack) prints information about an exception stack to the % command window. for k = 1:numel(stack) filename = stack(k).file; linenumber = stack(k).line; href = sprintf('matlab: opentoline(''%s'',%d)', filename, linenumber); fprintf(file_handle, '%s at line %d\n', filename, href, linenumber); end end function new_stack = filterStack(stack) %filterStack Remove unmeaningful stack trace calls % new_stack = filterStack(stack) removes from the input stack trace calls % that are framework functions and methods that are not likely to be % meaningful to the user. % Testing stack traces follow this common pattern: % % 1. The first function call in the trace is often one of the assert functions % in the framework directory. This is useful to see. % % 2. The next function calls are in the user-written test functions/methods and % the user-written code under test. These calls are useful to see. % % 3. The final set of function calls are methods in the various framework % classes. There are usually several of these calls, which clutter up the % stack display without being that useful. % % The pattern above suggests the following stack filtering strategy: Once the % stack trace has left the framework directory, do not follow the stack trace back % into the framework directory. mtest_directory = fileparts(which('runtests')); last_keeper = numel(stack); have_left_mtest_directory = false; for k = 1:numel(stack) directory = fileparts(stack(k).file); if have_left_mtest_directory if strcmp(directory, mtest_directory) % Stack trace has reentered mtest directory. last_keeper = k - 1; break; end else if ~strcmp(directory, mtest_directory) have_left_mtest_directory = true; end end end new_stack = stack(1:last_keeper); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/TestRunLogger.m ================================================ %TestRunLogger Collect data (silently) from running test suite % TestRunLogger is a subclass of TestRunMonitor uses to collect information % from an executing test component (either a test case or a test suite). % It maintains a record of event notifications received, as well as any test % failures or test errors. % % TestRunLogger methods: % testComponentStarted - Log test component started % testComponentFinished - Log test component finished % testCaseFailure - Log test case failure % testCaseError - Log test case error % % TestRunLogger properties: % Log - Cell array of test notification strings % NumFailures - Number of test failures during execution % NumErrors - Number of test errors during execution % NumTestCases - Total number of test cases executed % Faults - Struct array of test fault information % % See also CommandWindowTestRunDisplay, TestRunMonitor, TestSuite % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. classdef TestRunLogger < TestRunMonitor properties (SetAccess = protected) %Log Cell array of test notification strings % Test notification strings include 'TestRunStarted', % 'TestRunFinished', 'TestComponentStarted', 'TestComponentFinished', % 'TestCaseFailure', and 'TestCaseError'. Log %NumFailures Number of test failures during execution NumFailures = 0 %NumErrors Number of test errors during execution NumErrors = 0 %NumTestCases Total number of test cases executed NumTestCases = 0 %Faults Struct array of test fault information % Faults is a struct array with the fields Type, TestCase, and % Exception. Type is either 'failure' or 'error'. TestCase is the % test case object that triggered the fault. Exception is the % MException object thrown during the fault. Faults = struct('Type', {}, 'TestCase', {}, 'Exception', {}); end properties (SetAccess = private, GetAccess = private) InitialTestComponent = [] end methods function testComponentStarted(self, component) if isempty(self.InitialTestComponent) self.InitialTestComponent = component; self.appendToLog('TestRunStarted'); end self.appendToLog('TestComponentStarted'); if isa(component, 'TestCase') self.NumTestCases = self.NumTestCases + 1; end end function testComponentFinished(self, component, did_pass) self.appendToLog('TestComponentFinished'); if isequal(component, self.InitialTestComponent) self.appendToLog('TestRunFinished'); end end function testCaseFailure(self, test_case, failure_exception) self.appendToLog('TestCaseFailure'); self.NumFailures = self.NumFailures + 1; self.logFault('failure', test_case, ... failure_exception); end function testCaseError(self, test_case, error_exception) self.appendToLog('TestCaseError'); self.NumErrors = self.NumErrors + 1; self.logFault('error', test_case, ... error_exception); end end methods (Access = private) function appendToLog(self, item) self.Log{end+1} = item; end function logFault(self, type, test_case, exception) self.Faults(end + 1).Type = type; self.Faults(end).TestCase = test_case; self.Faults(end).Exception = exception; end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/TestRunMonitor.m ================================================ %TestRunMonitor Abstract base class for monitoring a running test suite % The abstract TestRunMonitor class defines an object that can observe and % record the results of running a test suite. The run() method of a % TestComponent object takes a TestRunMonitor object as an input argument. % % Different test suite logging or reporting functionality can be achieved by % subclassing TestRunMonitor. For example, see the TestRunLogger and the % CommandWindowTestRunDisplay classes. % % TestRunMonitor methods: % TestRunMonitor - Constructor % testComponentStarted - Called at beginning of test component run % testComponentFinished - Called when test component run finished % testCaseFailure - Called when a test case fails % testCaseError - Called when a test case causes an error % % See also CommandWindowTestRunDisplay, TestRunLogger, TestCase, TestSuite % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. classdef TestRunMonitor < handle methods (Abstract) testComponentStarted(self, component) testComponentFinished(self, component, did_pass) testCaseFailure(self, test_case, failure_exception) testCaseError(self, test_case, error_exception) end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/TestSuite.m ================================================ %TestSuite Collection of TestComponent objects % The TestSuite class defines a collection of TestComponent objects. % % TestSuite methods: % TestSuite - Constructor % add - Add test component to test suite % print - Display test suite summary to Command Window % run - Run the test suite % keepMatchingTestCase - Keep only the named test component % fromName - Construct test suite from directory or MATLAB function file name % fromTestCaseClassName - Construct test suite from TestCase class name % fromPackageName - Construct test suite from package name % fromPwd - Construct test suite from present directory % % TestSuite properties: % TestComponents - Cell array of TestComponent objects % % Examples % -------- % Run all the test cases in the SampleTests1 class. Display test suite % progress and a summary of results in the Command Window. % % TestSuite('SampleTests1').run() % % Construct a test suite from all test components found in the current % directory. % % suite = TestSuite.fromPwd(); % % Construct a test suite from all test components found in the package % 'mytool.tests'. (Note that the "+" character at the beginning of the package % folder name on disk is not part of the package name.) % % suite = TestSuite.fromPackageName('mytool.tests'); % % Run all the test cases in the SampleTests class. Display no output to the % Command Window. Upon completion, query the number of test failures and test % errors. % % logger = TestRunLogger(); % TestSuite('SampleTests1').run(logger); % numFailures = logger.NumFailures % numErrors = logger.NumErrors % % See also CommandWindowTestRunDisplay, TestCase, TestComponent, TestRunLogger % Steven L. Eddins % Copyright 2008-2010 The MathWorks, Inc. classdef TestSuite < TestComponent properties (SetAccess = protected) TestComponents = {}; end methods function self = TestSuite(name) %TestSuite Constructor % suite = TestSuite constructs an empty test suite. suite = % TestSuite(name) constructs a test suite by searching for test % cases defined in an M-file with the specified name. if nargin >= 1 self = TestSuite.fromName(name); end end function did_pass_out = run(self, monitor) %run Execute test cases in test suite % did_pass = suite.run() executes all test cases in the test % suite, returning a logical value indicating whether or not all % test cases passed. if nargin < 2 monitor = CommandWindowTestRunDisplay(); end monitor.testComponentStarted(self); did_pass = true; self.setUp(); for k = 1:numel(self.TestComponents) this_component_passed = self.TestComponents{k}.run(monitor); did_pass = did_pass && this_component_passed; end self.tearDown(); monitor.testComponentFinished(self, did_pass); if nargout > 0 did_pass_out = did_pass; end end function num = numTestCases(self) %numTestCases Number of test cases in test suite num = 0; for k = 1:numel(self.TestComponents) component_k = self.TestComponents{k}; num = num + component_k.numTestCases(); end end function print(self, numLeadingBlanks) %print Display test suite summary to Command Window % test_suite.print() displays a summary of the test suite to the % Command Window. if nargin < 2 numLeadingBlanks = 0; end fprintf('%s%s\n', blanks(numLeadingBlanks), self.Name); for k = 1:numel(self.TestComponents) self.TestComponents{k}.print(numLeadingBlanks + ... self.PrintIndentationSize); end end function add(self, component) %add Add test component to test suite % test_suite.add(component) adds the TestComponent object to the % test suite. if iscell(component) self.TestComponents((1:numel(component)) + end) = component; else self.TestComponents{end + 1} = component; end end function keepMatchingTestCase(self, name) %keepMatchingTestCase Keep only the named test component % test_suite.keepMatchingTestCase(name) keeps only the test % component with a matching name and discards the rest. idx = []; for k = 1:numel(self.TestComponents) if strcmp(self.TestComponents{k}.Name, name) idx = k; break; end end if isempty(idx) self.TestComponents = {}; else self.TestComponents = self.TestComponents(idx); end end end methods (Static) function suite = fromTestCaseClassName(class_name) %fromTestCaseClassName Construct test suite from TestCase class name % suite = TestSuite.fromTestCaseClassName(name) constructs a % TestSuite object from the name of a TestCase subclass. if ~xunit.utils.isTestCaseSubclass(class_name) error('xunit:fromTestCaseClassName', ... 'Input string "%s" is not the name of a TestCase class.', ... class_name); end suite = TestSuite; suite.Name = class_name; suite.Location = which(class_name); methods = getClassMethods(class_name); for k = 1:numel(methods) if methodIsConstructor(methods{k}) continue end method_name = methods{k}.Name; if xunit.utils.isTestString(method_name) suite.add(feval(class_name, method_name)); end end end function suite = fromName(name) %fromName Construct test suite from M-file name % test_suite = TestSuite.fromName(name) constructs a TestSuite % object from an M-file with the given name. The name can be of a % directory, a TestCase subclass, or an M-file containing a simple % test or containing subfunction-based tests. % % Optionally, name can contain a colon (':') followed by filter % string. The filter string is used to select a particular named % test case. For example, TestSuite.fromName('MyTests:testA') % constructs a TestSuite object containing only the test case % named 'testA' found in the TestCase subclass MyTests. if isdir(name) suite = TestSuiteInDir(name); suite.gatherTestCases(); return; end [name, filter_string] = strtok(name, ':'); if ~isempty(filter_string) filter_string = filter_string(2:end); end if xunit.utils.isTestCaseSubclass(name) suite = TestSuite.fromTestCaseClassName(name); elseif ~isempty(meta.class.fromName(name)) % Input is the name of a class that is not a TestCase subclass. % Return an empty test suite. suite = TestSuite(); suite.Name = name; elseif isPackage(name) suite = TestSuite.fromPackageName(name); else try if nargout(name) == 0 suite = TestSuite(); suite.Name = name; suite.add(FunctionHandleTestCase(str2func(name), [], [])); suite.Location = which(name); else suite = feval(name); if ~isa(suite, 'TestSuite') error('Function did not return a TestSuite object.'); end end catch % Ordinary function does not appear to contain tests. % Return an empty test suite. suite = TestSuite(); suite.Name = name; end end if ~isempty(filter_string) suite.keepMatchingTestCase(filter_string); end end function test_suite = fromPwd() %fromPwd Construct test suite from present directory % test_suite = TestSuite.fromPwd() constructs a TestSuite object % from all the test components in the present working directory. % all TestCase subclasses will be found, as well as simple and % subfunction-based M-file tests beginning with the string 'test' % or 'Test'. test_suite = TestSuite(); test_suite.Name = pwd; test_suite.Location = pwd; mfiles = dir(fullfile('.', '*.m')); for k = 1:numel(mfiles) [path, name] = fileparts(mfiles(k).name); if xunit.utils.isTestCaseSubclass(name) test_suite.add(TestSuite.fromTestCaseClassName(name)); elseif xunit.utils.isTestString(name) suite_k = TestSuite.fromName(name); if ~isempty(suite_k.TestComponents) test_suite.add(suite_k); end end end end function test_suite = fromPackageName(name) %fromPackageName Construct test suite from package name % test_suite = TestSuite.fromPackageName(name) constructs a % TestSuite object from all the test components found in the % specified package. package_info = meta.package.fromName(name); if isempty(package_info) error('xunit:fromPackageName:invalidName', ... 'Input string "%s" is not the name of a package.', ... name); end test_suite = TestSuite(); test_suite.Name = name; test_suite.Location = 'Package'; for k = 1:numel(package_info.Packages) pkg_name = package_info.Packages{k}.Name; pkg_suite = TestSuite.fromPackageName(pkg_name); if ~isempty(pkg_suite.TestComponents) test_suite.add(TestSuite.fromPackageName(pkg_name)); end end class_names = cell(1, numel(package_info.Classes)); for k = 1:numel(package_info.Classes) class_name = package_info.Classes{k}.Name; class_names{k} = class_name; if xunit.utils.isTestCaseSubclass(class_name) test_suite.add(TestSuite.fromTestCaseClassName(class_name)); end end for k = 1:numel(package_info.Functions) function_name = package_info.Functions{k}.Name; if xunit.utils.isTestString(function_name) full_function_name = [package_info.Name '.' package_info.Functions{k}.Name]; if ~ismember(full_function_name, class_names) suite_k = TestSuite.fromName(full_function_name); if ~isempty(suite_k.TestComponents) test_suite.add(suite_k); end end end end end end end function tf = isPackage(name) tf = ~isempty(meta.package.fromName(name)); end function methods = getClassMethods(class_name) class_meta = meta.class.fromName(class_name); methods = class_meta.Methods; end function result = methodIsConstructor(method) method_name = method.Name; if ~isempty(method.DefiningClass.ContainingPackage) method_name = [method.DefiningClass.ContainingPackage.Name, '.', ... method_name]; end result = strcmp(method_name, method.DefiningClass.Name); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/TestSuiteInDir.m ================================================ %TestSuiteInDir Test suite requiring temporary directory change % The TestSuiteInDir class defines a test suite that has to be run by first % changing to a specified directory. % % The setUp method adds the starting directory to the path and then uses cd to % change into the specified directory. The tearDown method restores the % original path and directory. % % TestSuiteInDir methods: % TestSuiteInDir - Constructor % gatherTestCases - Add test cases found in the target directory % % See also TestSuite % Steven L. Eddins % Copyright 2009 The MathWorks, Inc. classdef TestSuiteInDir < TestSuite & TestComponentInDir methods function self = TestSuiteInDir(testDirectory) %TestCaseInDir Constructor % TestCaseInDir(testName, testDirectory) constructs a test case % using the specified name and located in the specified directory. self = self@TestComponentInDir(testDirectory); if strcmp(testDirectory, '.') self.Name = pwd; self.Location = pwd; else [pathstr, name] = fileparts(testDirectory); self.Name = name; self.Location = testDirectory; end end function gatherTestCases(self) %gatherTestCases Add test cases found in the target directory % suite.gatherTestCases() automaticall finds all the test cases in % the directory specified in the constructor call and adds them to % the suite. current_dir = pwd; c = onCleanup(@() cd(current_dir)); cd(self.TestDirectory); tmp = TestSuite.fromPwd(); self.TestComponents = tmp.TestComponents; end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/VerboseTestRunDisplay.m ================================================ classdef VerboseTestRunDisplay < TestRunDisplay %VerboseTestRunDisplay Print test suite execution results. % VerboseTestRunDisplay is a subclass of % TestRunDisplay. It supports the -verbose option of runtests. % % Overriddent methods: % testComponentStarted - Update Command Window display % testComponentFinished - Update Command Window display % testRunFinished - Update Command Window display at end of run % % See also TestRunDisplay, TestRunLogger, TestRunMonitor, TestSuite % Steven L. Eddins % Copyright 2010 The MathWorks, Inc. properties (SetAccess = private, GetAccess = private) TicStack = uint64([]) end methods function self = VerboseTestRunDisplay(output) if nargin < 1 output = 1; end self = self@TestRunDisplay(output); end function testComponentStarted(self, component) %testComponentStarted Update Command Window display self.pushTic(); if ~isa(component, 'TestCase') fprintf(self.FileHandle, '\n'); end fprintf(self.FileHandle, '%s%s', self.indentationSpaces(), component.Name); if ~isa(component, 'TestCase') fprintf(self.FileHandle, '\n'); else fprintf(self.FileHandle, ' %s ', self.leaderDots(component.Name)); end end function testComponentFinished(self, component, did_pass) %testComponentFinished Update Command Window display if ~isa(component, 'TestCase') fprintf(self.FileHandle, '%s%s %s ', self.indentationSpaces(), component.Name, ... self.leaderDots(component.Name)); end component_run_time = toc(self.popTic()); if did_pass fprintf(self.FileHandle, 'passed in %12.6f seconds\n', component_run_time); else fprintf(self.FileHandle, 'FAILED in %12.6f seconds\n', component_run_time); end if ~isa(component, 'TestCase') fprintf(self.FileHandle, '\n'); end if isempty(self.TicStack) self.testRunFinished(); end end end methods (Access = protected) function testRunFinished(self) %testRunFinished Update Command Window display % obj.testRunFinished(component) displays information about the test % run results, including any test failures, to the Command % Window. self.displayFaults(); end end methods (Access = private) function pushTic(self) self.TicStack(end+1) = tic; end function t1 = popTic(self) t1 = self.TicStack(end); self.TicStack(end) = []; end function str = indentationSpaces(self) str = repmat(' ', 1, self.numIndentationSpaces()); end function n = numIndentationSpaces(self) indent_level = numel(self.TicStack) - 1; n = 3 * indent_level; end function str = leaderDots(self, name) num_dots = max(0, 60 - self.numIndentationSpaces() - numel(name)); str = repmat('.', 1, num_dots); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/assertElementsAlmostEqual.m ================================================ function assertElementsAlmostEqual(varargin) %assertElementsAlmostEqual Assert floating-point array elements almost equal. % assertElementsAlmostEqual(A, B, tol_type, tol, floor_tol) asserts that all % elements of floating-point arrays A and B are equal within some tolerance. % tol_type can be 'relative' or 'absolute'. tol and floor_tol are scalar % tolerance values. % % If the tolerance type is 'relative', then the tolerance test used is: % % all( abs(A(:) - B(:)) <= tol * max(abs(A(:)), abs(B(:))) + floor_tol ) % % If the tolerance type is 'absolute', then the tolerance test used is: % % all( abs(A(:) - B(:)) <= tol ) % % tol_type, tol, and floor_tol are all optional. The default value for % tol_type is 'relative'. If both A and B are double, then the default value % for tol and floor_tol is sqrt(eps). If either A or B is single, then the % default value for tol and floor_tol is sqrt(eps('single')). % % If A or B is complex, then the tolerance test is applied independently to % the real and imaginary parts. % % Corresponding elements in A and B that are both NaN, or are both infinite % with the same sign, are considered to pass the tolerance test. % % assertElementsAlmostEqual(A, B, ..., msg) prepends the string msg to the % output message if A and B fail the tolerance test. % Steven L. Eddins % Copyright 2008-2010 The MathWorks, Inc. params = xunit.utils.parseFloatAssertInputs(varargin{:}); if ~isequal(size(params.A), size(params.B)) message = xunit.utils.comparisonMessage(params.Message, ... 'Inputs are not the same size.', ... params.A, params.B); throwAsCaller(MException('assertElementsAlmostEqual:sizeMismatch', ... '%s', message)); end if ~(isfloat(params.A) && isfloat(params.B)) message = xunit.utils.comparisonMessage(params.Message, ... 'Inputs are not both floating-point.', ... params.A, params.B); throwAsCaller(MException('assertElementsAlmostEqual:notFloat', ... '%s', message)); end if ~xunit.utils.compareFloats(params.A, params.B, 'elementwise', ... params.ToleranceType, params.Tolerance, params.FloorTolerance) tolerance_message = sprintf('Input elements are not all equal within %s tolerance: %g', ... params.ToleranceType, params.Tolerance); message = xunit.utils.comparisonMessage(params.Message, tolerance_message, ... params.A, params.B); throwAsCaller(MException('assertElementsAlmostEqual:tolExceeded', ... '%s', message)); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/assertEqual.m ================================================ function assertEqual(A, B, custom_message) %assertEqual Assert that inputs are equal % assertEqual(A, B) throws an exception if A and B are not equal. A and B % must have the same class and sparsity to be considered equal. % % assertEqual(A, B, MESSAGE) prepends the string MESSAGE to the assertion % message if A and B are not equal. % % Examples % -------- % % This call returns silently. % assertEqual([1 NaN 2], [1 NaN 2]); % % % This call throws an error. % assertEqual({'A', 'B', 'C'}, {'A', 'foo', 'C'}); % % See also assertElementsAlmostEqual, assertVectorsAlmostEqual % Steven L. Eddins % Copyright 2008-2010 The MathWorks, Inc. if nargin < 3 custom_message = ''; end if ~ (issparse(A) == issparse(B)) message = xunit.utils.comparisonMessage(custom_message, ... 'One input is sparse and the other is not.', A, B); throwAsCaller(MException('assertEqual:sparsityNotEqual', '%s', message)); end if ~strcmp(class(A), class(B)) message = xunit.utils.comparisonMessage(custom_message, ... 'The inputs differ in class.', A, B); throwAsCaller(MException('assertEqual:classNotEqual', '%s', message)); end if ~isequalwithequalnans(A, B) message = xunit.utils.comparisonMessage(custom_message, ... 'Inputs are not equal.', A, B); throwAsCaller(MException('assertEqual:nonEqual', '%s', message)); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/assertExceptionThrown.m ================================================ function assertExceptionThrown(f, expectedId, custom_message) %assertExceptionThrown Assert that specified exception is thrown % assertExceptionThrown(F, expectedId) calls the function handle F with no % input arguments. If the result is a thrown exception whose identifier is % expectedId, then assertExceptionThrown returns silently. If no exception is % thrown, then assertExceptionThrown throws an exception with identifier equal % to 'assertExceptionThrown:noException'. If a different exception is thrown, % then assertExceptionThrown throws an exception identifier equal to % 'assertExceptionThrown:wrongException'. % % assertExceptionThrown(F, expectedId, msg) prepends the string msg to the % assertion message. % % Example % ------- % % This call returns silently. % f = @() error('a:b:c', 'error message'); % assertExceptionThrown(f, 'a:b:c'); % % % This call returns silently. % assertExceptionThrown(@() sin, 'MATLAB:minrhs'); % % % This call throws an error because calling sin(pi) does not error. % assertExceptionThrown(@() sin(pi), 'MATLAB:foo'); % Steven L. Eddins % Copyright 2008-2010 The MathWorks, Inc. noException = false; try f(); noException = true; catch exception if ~strcmp(exception.identifier, expectedId) message = sprintf('Expected exception %s but got exception %s.', ... expectedId, exception.identifier); if nargin >= 3 message = sprintf('%s\n%s', custom_message, message); end throwAsCaller(MException('assertExceptionThrown:wrongException', ... '%s', message)); end end if noException message = sprintf('Expected exception "%s", but none thrown.', ... expectedId); if nargin >= 3 message = sprintf('%s\n%s', custom_message, message); end throwAsCaller(MException('assertExceptionThrown:noException', '%s', message)); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/assertFalse.m ================================================ function assertFalse(condition, message) %assertFalse Assert that input condition is false % assertFalse(CONDITION, MESSAGE) throws an exception containing the string % MESSAGE if CONDITION is not false. % % MESSAGE is optional. % % Examples % -------- % assertFalse(isreal(sqrt(-1))) % % assertFalse(isreal(sqrt(-1)), ... % 'Expected isreal(sqrt(-1)) to be false.') % % See also assertTrue % Steven L. Eddins % Copyright 2008-2010 The MathWorks, Inc. if nargin < 2 message = 'Asserted condition is not false.'; end if ~isscalar(condition) || ~islogical(condition) throwAsCaller(MException('assertFalse:invalidCondition', ... 'CONDITION must be a scalar logical value.')); end if condition throwAsCaller(MException('assertFalse:trueCondition', '%s', message)); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/assertFilesEqual.m ================================================ function assertFilesEqual(filename1, filename2, user_message) %assertFilesEqual Assert that files contain the same contents. % assertFilesEqual(filename1, filename2) throws an exception if the two % specified files do not contain the same contents. % % assertFilesEqual(filename1, filename2, message) prepends the specified % message string to the assertion message. % Steven L. Eddins % Copyright 2009-2010 The MathWorks, Inc. if nargin < 3 user_message = ''; end fid1 = fopen(filename1, 'r'); if (fid1 < 0) message = sprintf('%s\nCould not open file for reading: %s', ... user_message, filename1); throwAsCaller(MException('assertFilesEqual:readFailure', ... '%s', message)); else c1 = onCleanup(@() fclose(fid1)); end fid2 = fopen(filename2, 'r'); if (fid2 < 0) message = sprintf('%s\nCould not open file for reading: %s', ... user_message, filename2); throwAsCaller(MException('assertFilesEqual:readFailure', '%s', message)); else c2 = onCleanup(@() fclose(fid2)); end block_size = 100000; num_blocks = 0; done = false; while ~done block_from_file1 = fread(fid1, block_size, '*uint8'); block_from_file2 = fread(fid2, block_size, '*uint8'); if numel(block_from_file1) ~= numel(block_from_file2) fseek(fid1, 0, 'eof'); fseek(fid2, 0, 'eof'); message = sprintf('The two files are not the same size. File "%s" has %d bytes and file "%s" has %d bytes', ... filename1, ftell(fid1), filename2, ftell(fid2)); if ~isempty(user_message) message = sprintf('%s\n%s', user_message, message); end throwAsCaller(MException('assertFilesEqual:sizeMismatch', '%s', message)); end if ~isequal(block_from_file1, block_from_file2) first_difference_in_block = find(block_from_file1 ~= block_from_file2); first_difference = num_blocks * block_size + first_difference_in_block; message = sprintf('Files are not equal. First difference is at byte %d, where file "%s" contains 0x%X and file "%s" contains 0x%X', ... first_difference, filename1, block_from_file1(first_difference_in_block), ... filename2, block_from_file2(first_difference_in_block)); if ~isempty(user_message) message = sprintf('%s\n%s', user_message, message); end throwAsCaller(MException('assertFilesEqual:valuesDiffer', '%s', message)); end done = numel(block_from_file1) < block_size; num_blocks = num_blocks + 1; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/assertTrue.m ================================================ function assertTrue(condition, message) %assertTrue Assert that input condition is true % assertTrue(CONDITION, MESSAGE) throws an exception containing the string % MESSAGE if CONDITION is not true. % % MESSAGE is optional. % % Examples % -------- % % This call returns silently. % assertTrue(rand < 1, 'Expected output of rand to be less than 1') % % % This call throws an error. % assertTrue(sum(sum(magic(3))) == 0, ... % 'Expected sum of elements of magic(3) to be 0') % % See also assertEqual, assertFalse % Steven L. Eddins % Copyright 2008-2010 The MathWorks, Inc. if nargin < 2 message = 'Asserted condition is not true.'; end if ~isscalar(condition) || ~islogical(condition) throwAsCaller(MException('assertTrue:invalidCondition', ... 'CONDITION must be a scalar logical value.')); end if ~condition throwAsCaller(MException('assertTrue:falseCondition', '%s', message)); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/assertVectorsAlmostEqual.m ================================================ function assertVectorsAlmostEqual(varargin) %assertVectorsAlmostEqual Assert floating-point vectors almost equal in norm sense. % assertVectorsAlmostEqual(A, B, tol_type, tol, floor_tol) asserts that the % vectors A and B are equal, in the L2-norm sense and within some tolerance. % tol_type can be 'relative' or 'absolute'. tol and floor_tol are scalar % tolerance values. % % If the tolerance type is 'relative', then the tolerance test used is: % % all( norm(A - B) <= tol * max(norm(A), norm(B)) + floor_tol ) % % If the tolerance type is 'absolute', then the tolerance test used is: % % all( norm(A - B) <= tol ) % % tol_type, tol, and floor_tol are all optional. The default value for % tol_type is 'relative'. If both A and B are double, then the default value % for tol and floor_tol is sqrt(eps). If either A or B is single, then the % default value for tol and floor_tol is sqrt(eps('single')). % % If A or B is complex, then the tolerance test is applied independently to % the real and imaginary parts. % % Any infinite or NaN element of A or B will cause an assertion failure. % % assertVectorsAlmostEqual(A, B, ..., msg) prepends the string msg to the % assertion message if A and B fail the tolerance test. % Steven L. Eddins % Copyright 2008-2010 The MathWorks, Inc. params = xunit.utils.parseFloatAssertInputs(varargin{:}); if ~isequal(size(params.A), size(params.B)) message = xunit.utils.comparisonMessage(params.Message, ... 'Inputs are not the same size.', ... params.A, params.B); throwAsCaller(MException('assertVectorsAlmostEqual:sizeMismatch', ... '%s', message)); end if ~(isfloat(params.A) && isfloat(params.B)) message = xunit.utils.comparisonMessage(params.Message, ... 'Inputs are not both floating-point.', ... params.A, params.B); throwAsCaller(MException('assertVectorsAlmostEqual:notFloat', ... '%s', message)); end if ~xunit.utils.compareFloats(params.A, params.B, 'vector', ... params.ToleranceType, params.Tolerance, params.FloorTolerance) tolerance_message = sprintf('Inputs are not equal within %s vector tolerance: %g', ... params.ToleranceType, params.Tolerance); message = xunit.utils.comparisonMessage(params.Message, tolerance_message, ... params.A, params.B); throwAsCaller(MException('assertVectorsAlmostEqual:tolExceeded', ... '%s', message)); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/initTestSuite.m ================================================ %findSubfunctionTests Utility script used for subfunction-based tests % This file is a script that is called at the top of M-files containing % subfunction-based tests. % % The top of a typical M-file using this script looks like this: % % function test_suite = testFeatureA % % findSubfunctionTests; % % IMPORTANT NOTE % -------------- % The output variable name for an M-file using this script must be test_suite. % Steven L. Eddins % Copyright 2008-2009 The MathWorks, Inc. [ST,I] = dbstack('-completenames'); caller_name = ST(I + 1).name; caller_file = ST(I + 1).file; subFcns = which('-subfun', caller_file); setup_fcn_name = subFcns(xunit.utils.isSetUpString(subFcns)); if numel(setup_fcn_name) > 1 error('findSubfunctionTests:tooManySetupFcns', ... 'Found more than one setup subfunction.') elseif isempty(setup_fcn_name) setup_fcn = []; else setup_fcn = str2func(setup_fcn_name{1}); end teardown_fcn_name = subFcns(xunit.utils.isTearDownString(subFcns)); if numel(teardown_fcn_name) > 1 error('findSubfunctionTests:tooManyTeardownFcns', ... 'Found more than one teardown subfunction.') elseif isempty(teardown_fcn_name) teardown_fcn = []; else teardown_fcn = str2func(teardown_fcn_name{1}); end test_fcns = cellfun(@str2func, subFcns(xunit.utils.isTestString(subFcns)), ... 'UniformOutput', false); suite = TestSuite; suite.Name = caller_name; suite.Location = which(caller_file); for k = 1:numel(test_fcns) suite.add(FunctionHandleTestCase(test_fcns{k}, setup_fcn, teardown_fcn)); end if nargout > 0 test_suite = suite; else suite.run(); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/runtests.m ================================================ function out = runtests(varargin) %runtests Run unit tests % runtests runs all the test cases that can be found in the current directory % and summarizes the results in the Command Window. % % Test cases can be found in the following places in the current directory: % % * An M-file function whose name starts or ends with "test" or % "Test" and that returns no output arguments. % % * An M-file function whose name starts or ends with "test" or % "Test" and that contains subfunction tests and uses the % initTestSuite script to return a TestSuite object. % % * An M-file defining a subclass of TestCase. % % runtests(dirname) runs all the test cases found in the specified directory. % % runtests(packagename) runs all the test cases found in the specified % package. (This option requires R2009a or later). % % runtests(mfilename) runs test cases found in the specified function or class % name. The function or class needs to be in the current directory or on the % MATLAB path. % % runtests('mfilename:testname') runs the specific test case named 'testname' % found in the function or class 'name'. % % Multiple directories or file names can be specified by passing multiple % names to runtests, as in runtests(name1, name2, ...) or % runtests({name1, name2, ...}, ...) % % runtests(..., '-verbose') displays the name and result, result, and time % taken for each test case to the Command Window. % % runtests(..., '-logfile', filename) directs the output of runtests to % the specified log file instead of to the Command Window. % % out = runtests(...) returns a logical value that is true if all the % tests passed. % % Examples % -------- % Find and run all the test cases in the current directory. % % runtests % % Find and run all the test cases in the current directory. Display more % detailed information to the Command Window as the test cases are run. % % runtests -verbose % % Save verbose runtests output to a log file. % % runtests -verbose -logfile my_test_log.txt % % Find and run all the test cases contained in the M-file myfunc. % % runtests myfunc % % Find and run all the test cases contained in the TestCase subclass % MyTestCase. % % runtests MyTestCase % % Run the test case named 'testFeature' contained in the M-file myfunc. % % runtests myfunc:testFeature % % Run all the tests in a specific directory. % % runtests c:\Work\MyProject\tests % % Run all the tests in two directories. % % runtests c:\Work\MyProject\tests c:\Work\Book\tests % Steven L. Eddins % Copyright 2009-2010 The MathWorks, Inc. verbose = false; logfile = ''; if nargin < 1 suite = TestSuite.fromPwd(); else [name_list, verbose, logfile] = getInputNames(varargin{:}); if numel(name_list) == 0 suite = TestSuite.fromPwd(); elseif numel(name_list) == 1 suite = TestSuite.fromName(name_list{1}); else suite = TestSuite(); for k = 1:numel(name_list) suite.add(TestSuite.fromName(name_list{k})); end end end if isempty(suite.TestComponents) error('xunit:runtests:noTestCasesFound', 'No test cases found.'); end if isempty(logfile) logfile_handle = 1; % File handle corresponding to Command Window else logfile_handle = fopen(logfile, 'w'); if logfile_handle < 0 error('xunit:runtests:FileOpenFailed', ... 'Could not open "%s" for writing.', logfile); else cleanup = onCleanup(@() fclose(logfile_handle)); end end fprintf(logfile_handle, 'Test suite: %s\n', suite.Name); if ~strcmp(suite.Name, suite.Location) fprintf(logfile_handle, 'Test suite location: %s\n', suite.Location); end fprintf(logfile_handle, '%s\n\n', datestr(now)); if verbose monitor = VerboseTestRunDisplay(logfile_handle); else monitor = TestRunDisplay(logfile_handle); end did_pass = suite.run(monitor); if nargout > 0 out = did_pass; end function [name_list, verbose, logfile] = getInputNames(varargin) name_list = {}; verbose = false; logfile = ''; k = 1; while k <= numel(varargin) arg = varargin{k}; if iscell(arg) name_list = [name_list; arg]; elseif ~isempty(arg) && (arg(1) == '-') if strcmp(arg, '-verbose') verbose = true; elseif strcmp(arg, '-logfile') if k == numel(varargin) error('xunit:runtests:MissingLogfile', ... 'The option -logfile must be followed by a filename.'); else logfile = varargin{k+1}; k = k + 1; end else warning('runtests:unrecognizedOption', 'Unrecognized option: %s', arg); end else name_list{end+1} = arg; end k = k + 1; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/octave/assertEqual.m ================================================ function [] = assertEqual(a, b) if (a != b) testFailed; end function [] = testFailed() [ST, I] = dbstack(2); disp(strcat("FAILED: ", ST(1).name)); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/octave/assertVectorsAlmostEqual.m ================================================ function [] = assertVectorsAlmostEqual(a, b, comparetype, tolerance) if (max(abs(reshape(a-b,[],1))) > tolerance) testFailed(a,b); elseif (min(size(a) == size(b)) < 1) testFailed(a,b); end function [] = testFailed(a, b) [ST, I] = dbstack(2); disp(strcat("FAILED: ", ST(1).name)); disp(a) disp("--") disp(b) ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/octave/runtests.m ================================================ path('../../bin', path) test_mdwt test_midwt test_mirdwt test_mrdwt test_makesig test_denoise test_setopt ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/octave/test_denoise.m ================================================ function test_denoise disp("denoise") test_denoise_default test_denoise_2d test_denoise_threshold_low test_denoise_thresh_multiplier test_denoise_std test_denoise_hard test_denoise_levels test_denoise_actual_thresh function test_denoise_default signal = makesig('Doppler', 32); noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940]; with_noise = signal + noise / 10; h = daubcqf(6); [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h); signal_denoised_corr = [0.0741827688375062 0.0791701902526268 0.0760842615272340 0.0750476831774179 0.111279774779568 0.163475053283544 -0.0498263815350539 0.0946073088237311 0.135126562486911 -0.0186090620958193 -0.0748812479991294 -0.103470206059426 0.0234254843251780 0.239772540836257 0.0920583398962312 -0.152180640366891 -0.116682073306156 -0.0459389850762785 -0.00245240039778375 0.0755739164104836 0.102548333512214 0.121099911744184 0.177390507921620 0.240386041553093 0.231105933317157 0.198210924493273 0.175672812990725 0.138822049613034 0.127491615387826 0.121409597186325 0.0994935320130783 0.0760019340865427]; assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001); function test_denoise_2d x = [1 2 3 4; 5 6 7 8 ; 9 10.09 11 12; 13 13.91 15 16]; h = daubcqf(4); [signal_denoised, subtracted_noise, actual_options] = denoise(x, h); signal_denoised_corr = [1.093495801587334 2.052784169768518 3.036985129109070 4.014510779767102; 5.037416383975946 6.006178652683398 6.994963120759174 7.978382656683513; 9.047593546684929 10.003998510025589 10.977825887256145 11.94698494275469; 13.009489364401729 13.937038667522501 14.939852728547271 15.9224996584731398]; assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001); function test_denoise_threshold_low signal = makesig('Doppler', 32); noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940]; with_noise = signal + noise / 10; h = daubcqf(6); [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 0, [1 3.0 0 2 0 0]); signal_denoised_corr = [0.0187742354278351 0.0237616568429558 0.0206757281175629 0.0196391497677469 0.0558712413698966 0.108066519873873 -0.105234914944725 0.0391987754140600 0.0797180290772401 -0.0740175955054904 -0.130289781408801 -0.158878739469097 -0.0319830490844931 0.184364007426586 0.0366498064865601 -0.207589173776562 -0.172090606715827 -0.101347518485950 -0.0578609338074549 0.0201653830008125 0.0471398001025425 0.0656913783345127 0.121981974511949 0.184977508143422 0.175697399907486 0.142802391083602 0.120264279581054 0.0834135162033633 0.0720830819781554 0.0660010637766539 0.0440849986034073 0.0205934006768717]; assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001); function test_denoise_thresh_multiplier signal = makesig('Doppler', 32); noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940]; with_noise = signal + noise / 10; h = daubcqf(6); [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 0, [1 3.5 0 2 0 0]); signal_denoised_corr = [0.00563527074803461 0.0110853052404048 0.0101590193471916 0.0116789518546074 0.0354625658443208 0.0691904606426981 -0.0647010252187970 0.0393485097012034 0.0302297746478269 -0.0658230296401878 -0.0947938063374137 -0.147943151851009 -0.0355607514547514 0.143027827800490 0.0126752977970079 -0.200577663821584 -0.149059259007655 -0.0564432101940217 -0.0281365070661950 0.0201021371871464 0.0438412772787373 0.0596866399869512 0.0967101937989458 0.136451641917565 0.130716307107088 0.109146914388131 0.0925200849653435 0.0657607417363412 0.0550584910898860 0.0469636231448182 0.0277268486177313 0.00667135407398081]; assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001); function test_denoise_std signal = makesig('Doppler', 32); noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940]; with_noise = signal + noise / 10; h = daubcqf(6); [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 0, [0 3.0 1 2 0 0]); signal_denoised_corr = [0.0686926069658060 0.0706216045196474 0.0719769032529757 0.0743568305131058 0.0754251996534692 0.0763549103855611 0.0783972750744446 0.0807092136475563 0.0763109954998047 0.0693017683604205 0.0628697537191382 0.0547492531677562 0.0755519478401559 0.107931256046656 0.0859959791464885 0.0494376118339224 0.0602059364595448 0.0785077229738383 0.0791999606842265 0.0809410605777517 0.0844652184548917 0.0873749084881920 0.0911535278085727 0.0952027332951270 0.0936316016468421 0.0898878427420561 0.0866734185917041 0.0820709685744921 0.0793481432323076 0.0768306965269240 0.0727995727792393 0.0684196591566048]; assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001); function test_denoise_hard signal = makesig('Doppler', 32); noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940]; with_noise = signal + noise / 10; h = daubcqf(6); [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 0, [0 3.0 0 1 0 0]); signal_denoised_corr = [0.0977394160103721 0.0994161560983385 0.0832447407807381 0.0666983311697188 0.177420971595413 0.340230583897110 -0.354597069671295 0.0250017872275015 0.394418485343238 -0.0595745304374512 -0.452401570793399 -0.175707560852101 -0.00622320325130765 0.437867065411816 0.187485346584306 -0.241060664687049 -0.306285896120773 -0.373946536466370 -0.246165924475657 0.00210496326791051 0.0528629966064817 0.0967383656953347 0.275410693617439 0.487298926169970 0.454985253718689 0.348603331393631 0.288205743942248 0.186806596496260 0.172147260405660 0.180050851714681 0.142136445826288 0.104484725401481]; assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001); function test_denoise_levels signal = makesig('Doppler', 32); noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940]; with_noise = signal + noise / 10; h = daubcqf(6); [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 0, [0 3.0 0 2 4 0]); signal_denoised_corr = [0.164259992817262 0.156379071218712 0.142212685671703 0.125038963573761 0.150297815252073 0.191536767978636 -0.0381639580765735 0.0881092032192094 0.119629284458486 -0.0406090725365491 -0.105645426731493 -0.141820831994602 -0.0280318977202704 0.173171960129832 0.0117537437282443 -0.247115729957293 -0.206759297285911 -0.123147866042363 -0.0685808245422524 0.0255826360141400 0.0635302930397082 0.0930381970490923 0.165728084463140 0.246884147157615 0.246603211345582 0.220210934934003 0.206436991723089 0.177172675548210 0.178948997433275 0.188010177892750 0.179798128181065 0.170937023676945]; assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001); function test_denoise_actual_thresh signal = makesig('Doppler', 32); noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940]; with_noise = signal + noise / 10; h = daubcqf(6); [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 0, [0 3.0 0 2 0 0.5]); signal_denoised_corr = [0.0607099183942295 0.0654351521193524 0.0684154759800610 0.0742018934148454 0.0758845005390013 0.0769511530643110 0.0810856606730252 0.0858023375316036 0.0704706443350518 0.0472060906047587 0.0254329679518446 -0.00154590940405266 0.0598455182579352 0.156556707841878 0.0864272987162393 -0.0287835335280487 0.00606017120154721 0.0659592575432934 0.0713958080495586 0.0812891735076492 0.0953701981347179 0.107554576791239 0.123739146895592 0.141180422640726 0.137085044622601 0.124838366760086 0.114852957437233 0.0997294000571788 0.0922174665178409 0.0857758976557685 0.0737052631031342 0.0605470542090229]; assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/octave/test_makesig.m ================================================ function test_suite = test_makesig disp("makesig") test_makesig_heavisine test_makesig_bumps test_makesig_blocks test_makesig_doppler test_makesig_ramp test_makesig_cusp test_makesig_sing test_makesig_hisine test_makesig_losine test_makesig_linchirp test_makesig_twochirp test_makesig_quadchirp test_makesig_mishmash test_makesig_wernersorrows test_makesig_leopold function test_makesig_heavisine x = makesig('HeaviSine', 8); y = [4.0000 0.0000 -6.0000 -2.0000 2.0000 0.0000 -4.0000 -0.0000]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_bumps x = makesig('Bumps', 8); y = [0.3206 5.0527 0.3727 0.0129 0.0295 0.0489 0.0004 0.0000]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_blocks x = makesig('Blocks', 8); y = [4.0000 0.5000 3.0000 0.9000 0.9000 5.2000 -0.0000 -0.0000]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_doppler x = makesig('Doppler', 12); y = [-0.1954 -0.3067 0.0000 -0.4703 0.4930 -0.2703 -0.4127 0.1025 0.4001 0.3454 0.1425 0]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_ramp x = makesig('Ramp', 8); y = [0.1250 0.2500 -0.6250 -0.5000 -0.3750 -0.2500 -0.1250 0]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_cusp x = makesig('Cusp', 8); y = [0.4950 0.3464 0.0707 0.3606 0.5050 0.6164 0.7106 0.7937]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_sing x = makesig('Sing', 8); y = [5.3333 16.0000 16.0000 5.3333 3.2000 2.2857 1.7778 1.4545]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_hisine x = makesig('HiSine', 8); y = [0.8267 -0.9302 0.2200 0.6827 -0.9882 0.4292 0.5053 -0.9977]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_losine x = makesig('LoSine', 8); y = [0.8660 0.8661 0.0003 -0.8658 -0.8663 -0.0006 0.8657 0.8664]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_linchirp x = makesig('LinChirp', 8); y = [0.0491 0.1951 0.4276 0.7071 0.9415 0.9808 0.6716 0.0000]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_twochirp x = makesig('TwoChirp', 8); y = [0.5132 1.5000 0.5412 0.8660 -0.5132 0 0.5132 0.8660]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_quadchirp x = makesig('QuadChirp', 8); y = [0.0164 0.1305 0.4276 0.8660 0.8895 -0.3827 -0.6217 0.8660]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_mishmash x = makesig('MishMash', 8); y = [0.8922 -0.6046 1.0751 2.2558 0.8429 1.0273 0.5551 -0.1317]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_wernersorrows x = makesig('WernerSorrows', 8); y = [1.5545 5.3175 0.8252 1.6956 -1.2678 0.6466 1.7332 -0.9977]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_leopold x = makesig('Leopold', 8); y = [0 1 0 0 0 0 0 0]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/octave/test_mdwt.m ================================================ function test_suite = test_mdwt disp("mdwt") test_mdwt_1D test_mdwt_2D test_mdwt_compute_L1 test_mdwt_compute_L2 test_mdwt_compute_L3 function test_mdwt_1D x = makesig('LinChirp', 8); h = daubcqf(4, 'min'); L = 2; % For 8 values in x we would normally be L=2 [y, L] = mdwt(x, h, L); y_corr = [1.1097 0.8767 0.8204 -0.5201 -0.0339 0.1001 0.2201 -0.1401]; L_corr = 2; assertVectorsAlmostEqual(y, y_corr, 'relative', 0.001); assertEqual(L, L_corr); function test_mdwt_2D x = [1 2 3 4; 5 6 7 8 ; 9 10 11 12; 13 14 15 16]; h = daubcqf(4); y = mdwt(x, h); y_corr = [34.0000 -3.4641 0.0000 -2.0000; -13.8564 0.0000 0.0000 -2.0000; -0.0000 0.0000 -0.0000 -0.0000; -8.0000 -8.0000 0.0000 -0.0000]; assertVectorsAlmostEqual(y, y_corr, 'relative', 0.001); function test_mdwt_compute_L1 x = [1 2]; h = daubcqf(4, 'min'); [y, L] = mdwt(x, h); assertEqual(L, 1); function test_mdwt_compute_L2 x = [1 2 3 4]; h = daubcqf(4, 'min'); [y, L] = mdwt(x, h); assertEqual(L, 2); function test_mdwt_compute_L3 x = [1 2 3 4 5 6 7 8]; h = daubcqf(4, 'min'); [y, L] = mdwt(x, h); assertEqual(L, 3); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/octave/test_midwt.m ================================================ function test_suite = test_midwt disp("midwt") test_midwt_1D test_midwt_2D function test_midwt_1D x = makesig('LinChirp',8); h = daubcqf(4,'min'); L = 2; [y,L] = mdwt(x,h,L); [x_new,L] = midwt(y,h,L); assertVectorsAlmostEqual(x, x_new,'relative',0.0001); function test_midwt_2D load ../lena512; x = lena512; h = daubcqf(6); [y,L] = mdwt(x,h); [x_new,L] = midwt(y,h); assertEqual(L,9); assertVectorsAlmostEqual(x, x_new,'relative',0.0001); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/octave/test_mirdwt.m ================================================ function test_suite = test_mirdwt disp("mrdwt") test_mirdwt_1 test_mirdwt_2 function test_mirdwt_1 xin = makesig('Leopold',8); h = daubcqf(4,'min'); Lin = 1; [yl,yh,L] = mrdwt(xin,h,Lin); [x,L] = mirdwt(yl,yh,h,L); assertEqual(L,Lin); assertVectorsAlmostEqual(x, xin,'relative',0.0001); function test_mirdwt_2 load ../lena512; x = lena512; h = daubcqf(6); [yl,yh,L] = mrdwt(x,h); assertEqual(L,9); [x_new,L] = mirdwt(yl,yh,h); assertEqual(L,9); assertVectorsAlmostEqual(x, x_new,'relative',0.0001); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/octave/test_mrdwt.m ================================================ function test_suite = test_mrdwt disp("mrdwt") test_mrdwt_1 test_mrdwt_2 test_mrdwt_2L2 function test_mrdwt_1 x = makesig('Leopold',8); h = daubcqf(4,'min'); L = 1; [yl, yh, L] = mrdwt(x, h, L); yl_corr = [0.8365 0.4830 0 0 0 0 -0.1294 0.2241]; yh_corr = [-0.2241 -0.1294 0 0 0 0 -0.4830 0.8365]; L_corr = 1; assertVectorsAlmostEqual(yl, yl_corr, 'relative', 0.001); assertVectorsAlmostEqual(yh, yh_corr, 'relative', 0.001); assertEqual(L, L_corr); function test_mrdwt_2 x = [1 3 5 2; 3 4 8 1; 3 9 2 0; 1 2 3 0]; h = daubcqf(4, 'min'); [yl, yh, L] = mrdwt(x, h, 1); yl_corr = [ 9.0111 10.7799 5.8795 4.1107; 11.1393 8.7766 2.5502 4.9130; 6.9465 5.7578 1.6630 2.8517; 4.8182 7.7611 4.9922 2.0494]; yh_corr = [ 4.5724 0.4285 -1.8828 2.2611 4.8714 -3.1026 -1.7978 0.0290 -2.9620 -1.1818 -1.1295 5.2733; -2.4441 -2.4318 -1.4465 -1.4587 1.8861 -4.2488 -1.9776 4.3403 -0.0233 0.0356 0.9498 -0.9620; -1.7488 -0.5870 0.5592 -0.6026 1.1663 -2.3550 -1.7398 2.9285 -0.6965 1.8583 -0.7120 -0.4498; -0.3795 2.5903 2.7700 -0.1998 4.1516 -1.2087 -1.5601 -1.3828 3.6818 -0.7120 0.8917 -3.8615]; assertVectorsAlmostEqual(yl, yl_corr, 'relative', 0.001); assertVectorsAlmostEqual(yh, yh_corr, 'relative', 0.001); function test_mrdwt_2L2 x = [1 3 5 2; 3 4 8 1; 3 9 2 0; 1 2 3 0]; h = daubcqf(4, 'min'); [yl, yh, L] = mrdwt(x, h, 2); yl_corr = [ 11.7500 11.7500 11.7500 11.7500; 11.7500 11.7500 11.7500 11.7500; 11.7500 11.7500 11.7500 11.7500; 11.7500 11.7500 11.7500 11.7500]; yh_corr = [ 4.5724 0.4285 -1.8828 2.2611 4.8714 -3.1026 -1.7978 0.0290 -2.9620 -1.1818 -1.1295 5.2733 ... 3.1405 3.1405 3.1405 3.1405 4.2075 4.7877 -4.2075 -4.7877 -1.0760 1.8816 1.0760 -1.8816; -2.4441 -2.4318 -1.4465 -1.4587 1.8861 -4.2488 -1.9776 4.3403 -0.0233 0.0356 0.9498 -0.9620 ... 1.9396 1.9396 1.9396 1.9396 4.2075 4.7877 -4.2075 -4.7877 4.3816 -0.9240 -4.3816 0.9240; -1.7488 -0.5870 0.5592 -0.6026 1.1663 -2.3550 -1.7398 2.9285 -0.6965 1.8583 -0.7120 -0.4498 ... -3.1405 -3.1405 -3.1405 -3.1405 4.2075 4.7877 -4.2075 -4.7877 1.0760 -1.8816 -1.0760 1.8816; -0.3795 2.5903 2.7700 -0.1998 4.1516 -1.2087 -1.5601 -1.3828 3.6818 -0.7120 0.8917 -3.8615 ... -1.9396 -1.9396 -1.9396 -1.9396 4.2075 4.7877 -4.2075 -4.7877 -4.3816 0.9240 4.3816 -0.9240]; assertVectorsAlmostEqual(yl, yl_corr, 'relative', 0.001); assertVectorsAlmostEqual(yh, yh_corr, 'relative', 0.001); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/octave/test_setopt.m ================================================ function test_suite = test_setopt disp("setopt") test_setopt_all_defaults test_setopt_nonzero_becomes_zero function test_setopt_all_defaults x = []; default_opts = [5 6 7 8]; z = setopt(x, default_opts); z_corr = [5 6 7 8]; assertVectorsAlmostEqual(z, z_corr, 'relative', 0.0001); function test_setopt_nonzero_becomes_zero x = [1 0 3]; default_opts = [5 6 7 8]; z = setopt(x, default_opts); z_corr = [1 6 3 8]; %z_corr = [1 0 3 8]; % This would be more intuitive assertVectorsAlmostEqual(z, z_corr, 'relative', 0.0001); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/readme ================================================ test ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/runtests.m ================================================ path(path, '../bin') path(path, 'matlab_xunit/xunit') test_mdwt test_midwt test_mirdwt test_mrdwt test_makesig test_denoise test_setopt test_daubcqf ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/test_daubcqf.m ================================================ function test_suite = test_daubcqf initTestSuite; function test_daubcqf_min [a, b] = daubcqf(4); ax = [0.482962913144534 0.836516303737808 0.224143868042013 -0.129409522551260]; bx = [0.129409522551260 0.224143868042013 -0.836516303737808 0.482962913144534]; assertVectorsAlmostEqual(a, ax, 'relative', 0.001); assertVectorsAlmostEqual(b, bx, 'relative', 0.001); function test_daubcqf_max [a, b] = daubcqf(4, 'max'); ax = [-0.129409522551260 0.224143868042013 0.836516303737808 0.482962913144534]; bx = [-0.482962913144534 0.836516303737808 -0.224143868042013 -0.129409522551260]; assertVectorsAlmostEqual(a, ax, 'relative', 0.001); assertVectorsAlmostEqual(b, bx, 'relative', 0.001); function test_daubcqf_mid_even_k [a, b] = daubcqf(4, 'mid'); ax = [0.482962913144534 0.836516303737808 0.224143868042013 -0.129409522551260]; bx = [0.129409522551260 0.224143868042013 -0.836516303737808 0.482962913144534]; assertVectorsAlmostEqual(a, ax, 'relative', 0.001); assertVectorsAlmostEqual(b, bx, 'relative', 0.001); function test_daubcqf_mid_odd_k [a, b] = daubcqf(6, 'mid'); ax = [0.332670552950083 0.806891509311093 0.459877502118491 -0.135011020010255 -0.085441273882027 0.035226291885710]; bx = [-0.035226291885710 -0.085441273882027 0.135011020010255 0.459877502118491 -0.806891509311093 0.332670552950083]; assertVectorsAlmostEqual(a, ax, 'relative', 0.001); assertVectorsAlmostEqual(b, bx, 'relative', 0.001); function test_daubcqf_odd handle = @() daubcqf(9); assertExceptionThrown(handle, ''); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/test_denoise.m ================================================ function test_suite = test_denoise initTestSuite; % We could throw an exception if someone specified zero for SoftTH vs HardTH but currently we don't %function test_denoise_old_invalid_arg_dwt % signal = makesig('Doppler', 32); % h = daubcqf(6); % badarg_handle = @() denoise(signal, h, 0, [0 3.0 0 0 0 0]); %assertExceptionThrown(badarg_handle, ''); %function test_denoise_old_invalid_arg_udwt % signal = makesig('Doppler', 32); % h = daubcqf(6); % badarg_handle = @() denoise(signal, h, 1, [0 3.6 0 0 0 0]); %assertExceptionThrown(badarg_handle, ''); function test_denoise_default signal = makesig('Doppler', 32); noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940]; with_noise = signal + noise / 10; h = daubcqf(6); [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h); signal_denoised_corr = [0.0741827688375062 0.0791701902526268 0.0760842615272340 0.0750476831774179 0.111279774779568 0.163475053283544 -0.0498263815350539 0.0946073088237311 0.135126562486911 -0.0186090620958193 -0.0748812479991294 -0.103470206059426 0.0234254843251780 0.239772540836257 0.0920583398962312 -0.152180640366891 -0.116682073306156 -0.0459389850762785 -0.00245240039778375 0.0755739164104836 0.102548333512214 0.121099911744184 0.177390507921620 0.240386041553093 0.231105933317157 0.198210924493273 0.175672812990725 0.138822049613034 0.127491615387826 0.121409597186325 0.0994935320130783 0.0760019340865427]; assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001); function test_denoise_2d x = [1 2 3 4; 5 6 7 8 ; 9 10.09 11 12; 13 13.91 15 16]; h = daubcqf(4); [signal_denoised, subtracted_noise, actual_options] = denoise(x, h); signal_denoised_corr = [1.093495801587334 2.052784169768518 3.036985129109070 4.014510779767102; 5.037416383975946 6.006178652683398 6.994963120759174 7.978382656683513; 9.047593546684929 10.003998510025589 10.977825887256145 11.94698494275469; 13.009489364401729 13.937038667522501 14.939852728547271 15.9224996584731398]; assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001); function test_denoise_udwt signal = makesig('Doppler', 32); noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940]; with_noise = signal + noise / 10; h = daubcqf(6); [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 1); signal_denoised_corr = [0.126244615385152 0.0952319712425300 0.0671343607152503 0.0513902979722585 0.0430402732682634 0.0586932575131794 0.0861069751902698 0.0989949047763016 0.0908418658128637 -0.0141454670119059 -0.144791527437026 -0.0185533166035902 0.278351613782131 0.279033706376659 -0.0205012032054263 -0.212367658407976 -0.241484343697995 -0.248582298831059 -0.213374214781743 -0.101963712141109 0.0454248851310567 0.181104333949749 0.275294407293259 0.309076259882059 0.298600450385073 0.259080737796607 0.211123535801718 0.183021783525739 0.171966340866576 0.171616812586097 0.168720006300193 0.151066428184072]; assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001); function test_denoise_udwt_2d x = [1 2 3 4; 5 6 7 8 ; 9 10.09 11 12; 13 13.91 15 16]; h = daubcqf(4); [signal_denoised, subtracted_noise, actual_options] = denoise(x, h, 1); signal_denoised_corr = [ 1.007040488866197 1.993405274521765 3.006268404030089 3.996424654030090; 4.995935171857875 6.002401216530091 7.001252328142127 8.005847881693983; 9.009508189685661 10.059981743374523 11.001190131625481 11.999030274521770; 12.987516149590270 13.944211765573623 14.991289136202310 15.998697189754166]; assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001); function test_denoise_threshold_low signal = makesig('Doppler', 32); noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940]; with_noise = signal + noise / 10; h = daubcqf(6); [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 0, [1 3.0 0 2 0 0]); signal_denoised_corr = [0.0187742354278351 0.0237616568429558 0.0206757281175629 0.0196391497677469 0.0558712413698966 0.108066519873873 -0.105234914944725 0.0391987754140600 0.0797180290772401 -0.0740175955054904 -0.130289781408801 -0.158878739469097 -0.0319830490844931 0.184364007426586 0.0366498064865601 -0.207589173776562 -0.172090606715827 -0.101347518485950 -0.0578609338074549 0.0201653830008125 0.0471398001025425 0.0656913783345127 0.121981974511949 0.184977508143422 0.175697399907486 0.142802391083602 0.120264279581054 0.0834135162033633 0.0720830819781554 0.0660010637766539 0.0440849986034073 0.0205934006768717]; assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001); function test_denoise_thresh_multiplier signal = makesig('Doppler', 32); noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940]; with_noise = signal + noise / 10; h = daubcqf(6); [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 0, [1 3.5 0 2 0 0]); signal_denoised_corr = [0.00563527074803461 0.0110853052404048 0.0101590193471916 0.0116789518546074 0.0354625658443208 0.0691904606426981 -0.0647010252187970 0.0393485097012034 0.0302297746478269 -0.0658230296401878 -0.0947938063374137 -0.147943151851009 -0.0355607514547514 0.143027827800490 0.0126752977970079 -0.200577663821584 -0.149059259007655 -0.0564432101940217 -0.0281365070661950 0.0201021371871464 0.0438412772787373 0.0596866399869512 0.0967101937989458 0.136451641917565 0.130716307107088 0.109146914388131 0.0925200849653435 0.0657607417363412 0.0550584910898860 0.0469636231448182 0.0277268486177313 0.00667135407398081]; assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001); function test_denoise_std signal = makesig('Doppler', 32); noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940]; with_noise = signal + noise / 10; h = daubcqf(6); [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 0, [0 3.0 1 2 0 0]); signal_denoised_corr = [0.0686926069658060 0.0706216045196474 0.0719769032529757 0.0743568305131058 0.0754251996534692 0.0763549103855611 0.0783972750744446 0.0807092136475563 0.0763109954998047 0.0693017683604205 0.0628697537191382 0.0547492531677562 0.0755519478401559 0.107931256046656 0.0859959791464885 0.0494376118339224 0.0602059364595448 0.0785077229738383 0.0791999606842265 0.0809410605777517 0.0844652184548917 0.0873749084881920 0.0911535278085727 0.0952027332951270 0.0936316016468421 0.0898878427420561 0.0866734185917041 0.0820709685744921 0.0793481432323076 0.0768306965269240 0.0727995727792393 0.0684196591566048]; assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001); function test_denoise_hard signal = makesig('Doppler', 32); noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940]; with_noise = signal + noise / 10; h = daubcqf(6); [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 0, [0 3.0 0 1 0 0]); signal_denoised_corr = [0.0977394160103721 0.0994161560983385 0.0832447407807381 0.0666983311697188 0.177420971595413 0.340230583897110 -0.354597069671295 0.0250017872275015 0.394418485343238 -0.0595745304374512 -0.452401570793399 -0.175707560852101 -0.00622320325130765 0.437867065411816 0.187485346584306 -0.241060664687049 -0.306285896120773 -0.373946536466370 -0.246165924475657 0.00210496326791051 0.0528629966064817 0.0967383656953347 0.275410693617439 0.487298926169970 0.454985253718689 0.348603331393631 0.288205743942248 0.186806596496260 0.172147260405660 0.180050851714681 0.142136445826288 0.104484725401481]; assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001); function test_denoise_levels signal = makesig('Doppler', 32); noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940]; with_noise = signal + noise / 10; h = daubcqf(6); [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 0, [0 3.0 0 2 4 0]); signal_denoised_corr = [0.164259992817262 0.156379071218712 0.142212685671703 0.125038963573761 0.150297815252073 0.191536767978636 -0.0381639580765735 0.0881092032192094 0.119629284458486 -0.0406090725365491 -0.105645426731493 -0.141820831994602 -0.0280318977202704 0.173171960129832 0.0117537437282443 -0.247115729957293 -0.206759297285911 -0.123147866042363 -0.0685808245422524 0.0255826360141400 0.0635302930397082 0.0930381970490923 0.165728084463140 0.246884147157615 0.246603211345582 0.220210934934003 0.206436991723089 0.177172675548210 0.178948997433275 0.188010177892750 0.179798128181065 0.170937023676945]; assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001); function test_denoise_actual_thresh signal = makesig('Doppler', 32); noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940]; with_noise = signal + noise / 10; h = daubcqf(6); [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 0, [0 3.0 0 2 0 0.5]); signal_denoised_corr = [0.0607099183942295 0.0654351521193524 0.0684154759800610 0.0742018934148454 0.0758845005390013 0.0769511530643110 0.0810856606730252 0.0858023375316036 0.0704706443350518 0.0472060906047587 0.0254329679518446 -0.00154590940405266 0.0598455182579352 0.156556707841878 0.0864272987162393 -0.0287835335280487 0.00606017120154721 0.0659592575432934 0.0713958080495586 0.0812891735076492 0.0953701981347179 0.107554576791239 0.123739146895592 0.141180422640726 0.137085044622601 0.124838366760086 0.114852957437233 0.0997294000571788 0.0922174665178409 0.0857758976557685 0.0737052631031342 0.0605470542090229]; assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001); function test_denoise_udwt_threshold_low signal = makesig('Doppler', 32); noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940]; with_noise = signal + noise / 10; h = daubcqf(6); [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 1, [1 3.0 0 1 0 0]); signal_denoised_corr = [0.135039400483741 0.117805175604609 0.0967709584177031 0.0142060292567307 -0.0239840294603812 0.323425861331697 -0.212285200125643 0.166066657685731 0.136653739821785 -0.0361708285655289 -0.244622217319313 -0.0751486112344819 0.279128997196628 0.299915294672821 0.00822389077239383 -0.232180770499244 -0.330137263335199 -0.293955318206172 -0.175538926380835 -0.0733568677543535 0.049241196655251 0.200165899490694 0.304615650610263 0.337325376378116 0.325593984310807 0.282048956150932 0.228861081870546 0.196656880842149 0.180959366486141 0.175210410022406 0.169828050229736 0.155033256209497]; assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001); function test_denoise_udwt_thresh_multiplier signal = makesig('Doppler', 32); noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940]; with_noise = signal + noise / 10; h = daubcqf(6); [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 1, [1 3.5 0 1 0 0]); signal_denoised_corr = [0.0479478506866607 0.0160653046305043 -0.012660890293452 -0.0292521383561941 -0.0383355043751224 -0.0239494802109215 0.00200042536526626 0.0135636610003902 0.00399637041195728 -0.100521378500944 -0.229923524965501 -0.102614225576592 0.195850596270724 0.197593413336102 -0.100882406775293 -0.291163630119251 -0.318524834100706 -0.324752887320235 -0.288916218874243 -0.176658530913858 -0.028536592326759 0.108409816572649 0.204063702017061 0.239170248556769 0.230108690684778 0.190119394184444 0.14091827822899 0.11174543739754 0.0991301032767805 0.0977198505254529 0.0937639547688583 0.0745251447941448]; assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001); function test_denoise_udwt_std signal = makesig('Doppler', 32); noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940]; with_noise = signal + noise / 10; h = daubcqf(6); [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 1, [0 3.0 1 1 0 0]); signal_denoised_corr = [0.0847626939447046 0.0648669375488877 0.0505127048998841 0.0431477690668965 0.0443458995091662 0.0638361516754724 0.0926698200065443 0.122716357496751 0.135591683864019 0.0377466753027189 -0.0889166586897228 -0.0310700016943258 0.16530654803759 0.237349858169585 0.0577692051497442 -0.137751577705709 -0.18354744395111 -0.188205427540335 -0.157902857480421 -0.055391323576937 0.0791892398460303 0.198068185997372 0.271471422836112 0.282275886815228 0.246689293630916 0.205546705496588 0.16546007731141 0.145130898382968 0.1471329636038 0.142472749823065 0.132163448290946 0.111958195551385]; assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001); function test_denoise_udwt_soft signal = makesig('Doppler', 32); noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940]; with_noise = signal + noise / 10; h = daubcqf(6); [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 1, [0 3.0 0 2 0 0]); signal_denoised_corr = [0.086668016749428 0.078090652632278 0.070455842749544 0.062824684205684 0.064249795534642 0.086899924318641 0.053549539548214 0.100644175366308 0.100726560037458 0.051479406046214 -0.011299945211104 0.036115394710961 0.147624998547612 0.159516308766960 0.059119062682569 -0.020817294484415 -0.042170912413038 -0.046825168298822 -0.027179285827824 0.017379645805457 0.071225126011476 0.123532780238470 0.153926034241219 0.160138755049699 0.153562168658336 0.138748019440599 0.123707805352361 0.115223425612607 0.110890877355381 0.107909648973443 0.103630954238181 0.095849084980685]; assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001); function test_denoise_udwt_levels signal = makesig('Doppler', 32); noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940]; with_noise = signal + noise / 10; h = daubcqf(6); [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 1, [0 3.0 0 1 4 0]); signal_denoised_corr = [0.137633389000662 0.120676804147327 0.099782758215143 0.015698574020267 -0.025118098815379 0.319788331991522 -0.217919217670089 0.160238201773756 0.131270340429534 -0.041415802797292 -0.249853610380694 -0.080126740883778 0.275034335985338 0.296982831400265 0.006200146572810 -0.234309647934845 -0.332731251852120 -0.296826946748889 -0.178550726178275 -0.074849412517890 0.050375266010248 0.203803428830869 0.310249668154709 0.343153832290091 0.330977383703058 0.287293930382695 0.234092474931927 0.201635010491445 0.185054027697432 0.178142873294961 0.171851794429319 0.157162133645098]; assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001); function test_denoise_udwt_actual_thresh signal = makesig('Doppler', 32); noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940]; with_noise = signal + noise / 10; h = daubcqf(6); [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 1, [0 3.0 0 1 0 0.5]); signal_denoised_corr = [0.126244615385152 0.09523197124253 0.0671343607152503 0.0513902979722585 0.0430402732682634 0.0586932575131794 0.0861069751902698 0.0989949047763016 0.0908418658128637 -0.0141454670119059 -0.144791527437026 -0.0185533166035902 0.278351613782131 0.279033706376659 -0.0205012032054263 -0.212367658407976 -0.241484343697995 -0.248582298831059 -0.213374214781743 -0.101963712141109 0.0454248851310567 0.181104333949749 0.275294407293258 0.309076259882059 0.298600450385073 0.259080737796607 0.211123535801717 0.183021783525739 0.171966340866576 0.171616812586097 0.168720006300193 0.151066428184072]; assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/test_makesig.m ================================================ function test_suite = test_makesig initTestSuite; function test_makesig_heavisine x = makesig('HeaviSine', 8); y = [4.0000 0.0000 -6.0000 -2.0000 2.0000 0.0000 -4.0000 -0.0000]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_bumps x = makesig('Bumps', 8); y = [0.3206 5.0527 0.3727 0.0129 0.0295 0.0489 0.0004 0.0000]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_blocks x = makesig('Blocks', 8); y = [4.0000 0.5000 3.0000 0.9000 0.9000 5.2000 -0.0000 -0.0000]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_doppler x = makesig('Doppler', 12); y = [-0.1954 -0.3067 0.0000 -0.4703 0.4930 -0.2703 -0.4127 0.1025 0.4001 0.3454 0.1425 0]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_ramp x = makesig('Ramp', 8); y = [0.1250 0.2500 -0.6250 -0.5000 -0.3750 -0.2500 -0.1250 0]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_cusp x = makesig('Cusp', 8); y = [0.4950 0.3464 0.0707 0.3606 0.5050 0.6164 0.7106 0.7937]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_sing x = makesig('Sing', 8); y = [5.3333 16.0000 16.0000 5.3333 3.2000 2.2857 1.7778 1.4545]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_hisine x = makesig('HiSine', 8); y = [0.8267 -0.9302 0.2200 0.6827 -0.9882 0.4292 0.5053 -0.9977]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_losine x = makesig('LoSine', 8); y = [0.865973039158459 0.866130104544730 0.000314159260191 -0.865815888304075 -0.866287084447387 -0.000628318489377 0.865658651997088 0.866443978850937]; assertVectorsAlmostEqual(x, y, 'relative', 0.0000001); function test_makesig_linchirp x = makesig('LinChirp', 8); y = [0.0491 0.1951 0.4276 0.7071 0.9415 0.9808 0.6716 0.0000]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_twochirp x = makesig('TwoChirp', 8); y = [0.5132 1.5000 0.5412 0.8660 -0.5132 0 0.5132 0.8660]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_quadchirp x = makesig('QuadChirp', 8); y = [0.0164 0.1305 0.4276 0.8660 0.8895 -0.3827 -0.6217 0.8660]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_mishmash x = makesig('MishMash', 8); y = [0.8922 -0.6046 1.0751 2.2558 0.8429 1.0273 0.5551 -0.1317]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_wernersorrows x = makesig('WernerSorrows', 8); y = [1.5545 5.3175 0.8252 1.6956 -1.2678 0.6466 1.7332 -0.9977]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); function test_makesig_leopold x = makesig('Leopold', 8); y = [0 1 0 0 0 0 0 0]; assertVectorsAlmostEqual(x, y, 'relative', 0.0001); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/test_mdwt.m ================================================ function test_suite = test_mdwt initTestSuite; function test_mdwt_1D x = makesig('LinChirp', 8); h = daubcqf(4, 'min'); L = 2; % For 8 values in x we would normally be L=2 [y, L] = mdwt(x, h, L); y_corr = [1.1097 0.8767 0.8204 -0.5201 -0.0339 0.1001 0.2201 -0.1401]; L_corr = 2; assertVectorsAlmostEqual(y, y_corr, 'relative', 0.001); assertEqual(L, L_corr); function test_mdwt_2D x = [1 2 3 4; 5 6 7 8 ; 9 10 11 12; 13 14 15 16]; h = daubcqf(4); y = mdwt(x, h); y_corr = [34.0000 -3.4641 0.0000 -2.0000; -13.8564 0.0000 0.0000 -2.0000; -0.0000 0.0000 -0.0000 -0.0000; -8.0000 -8.0000 0.0000 -0.0000]; assertVectorsAlmostEqual(y, y_corr, 'relative', 0.001); function test_mdwt_compute_L1 x = [1 2]; h = daubcqf(4, 'min'); [y, L] = mdwt(x, h); assertEqual(L, 1); function test_mdwt_compute_L2 x = [1 2 3 4]; h = daubcqf(4, 'min'); [y, L] = mdwt(x, h); assertEqual(L, 2); function test_mdwt_compute_L3 x = [1 2 3 4 5 6 7 8]; h = daubcqf(4, 'min'); [y, L] = mdwt(x, h); assertEqual(L, 3); function test_mdwt_compute_bad_L L = -1; x = [1 2 3 4 5 6 7 8 9]; h = daubcqf(4, 'min'); mdwtHandle = @() mdwt(x, h); assertExceptionThrown(mdwtHandle, ''); function test_mdwt_empty_input mdwtHandle = @() mdwt([], [0 0 0 0]); assertExceptionThrown(mdwtHandle, ''); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/test_midwt.m ================================================ function test_suite = test_midwt initTestSuite; function test_midwt_1D x = makesig('LinChirp',8); h = daubcqf(4,'min'); L = 2; [y,L] = mdwt(x,h,L); [x_new,L] = midwt(y,h,L); assertVectorsAlmostEqual(x, x_new,'relative',0.0001); function test_midwt_2D load lena512; x = lena512; h = daubcqf(6); [y,L] = mdwt(x,h); [x_new,L] = midwt(y,h); assertEqual(L,9); assertVectorsAlmostEqual(x, x_new,'relative',0.0001); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/test_mirdwt.m ================================================ function test_suite = test_mirdwt initTestSuite; function test_mirdwt_1 xin = makesig('Leopold',8); h = daubcqf(4,'min'); Lin = 1; [yl,yh,L] = mrdwt(xin,h,Lin); [x,L] = mirdwt(yl,yh,h,L); assertEqual(L,Lin); assertVectorsAlmostEqual(x, xin,'relative',0.0001); function test_mirdwt_2D load lena512; x = lena512; h = daubcqf(6); [yl,yh,L] = mrdwt(x,h); assertEqual(L,9); [x_new,L] = mirdwt(yl,yh,h); assertEqual(L,9); assertVectorsAlmostEqual(x, x_new,'relative',0.0001); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/test_mrdwt.m ================================================ function test_suite = test_mrdwt initTestSuite; function test_mrdwt_1 x = makesig('Leopold',8); h = daubcqf(4,'min'); L = 1; [yl, yh, L] = mrdwt(x, h, L); yl_corr = [0.8365 0.4830 0 0 0 0 -0.1294 0.2241]; yh_corr = [-0.2241 -0.1294 0 0 0 0 -0.4830 0.8365]; L_corr = 1; assertVectorsAlmostEqual(yl, yl_corr, 'relative', 0.001); assertVectorsAlmostEqual(yh, yh_corr, 'relative', 0.001); assertEqual(L, L_corr); function test_mrdwt_2 x = [1 3 5 2; 3 4 8 1; 3 9 2 0; 1 2 3 0]; h = daubcqf(4, 'min'); [yl, yh, L] = mrdwt(x, h, 1); yl_corr = [ 9.0111 10.7799 5.8795 4.1107; 11.1393 8.7766 2.5502 4.9130; 6.9465 5.7578 1.6630 2.8517; 4.8182 7.7611 4.9922 2.0494]; yh_corr = [ 4.5724 0.4285 -1.8828 2.2611 4.8714 -3.1026 -1.7978 0.0290 -2.9620 -1.1818 -1.1295 5.2733; -2.4441 -2.4318 -1.4465 -1.4587 1.8861 -4.2488 -1.9776 4.3403 -0.0233 0.0356 0.9498 -0.9620; -1.7488 -0.5870 0.5592 -0.6026 1.1663 -2.3550 -1.7398 2.9285 -0.6965 1.8583 -0.7120 -0.4498; -0.3795 2.5903 2.7700 -0.1998 4.1516 -1.2087 -1.5601 -1.3828 3.6818 -0.7120 0.8917 -3.8615]; assertVectorsAlmostEqual(yl, yl_corr, 'relative', 0.001); assertVectorsAlmostEqual(yh, yh_corr, 'relative', 0.001); function test_mrdwt_2L2 x = [1 3 5 2; 3 4 8 1; 3 9 2 0; 1 2 3 0]; h = daubcqf(4, 'min'); [yl, yh, L] = mrdwt(x, h, 2); yl_corr = [ 11.7500 11.7500 11.7500 11.7500; 11.7500 11.7500 11.7500 11.7500; 11.7500 11.7500 11.7500 11.7500; 11.7500 11.7500 11.7500 11.7500]; yh_corr = [ 4.5724 0.4285 -1.8828 2.2611 4.8714 -3.1026 -1.7978 0.0290 -2.9620 -1.1818 -1.1295 5.2733 ... 3.1405 3.1405 3.1405 3.1405 4.2075 4.7877 -4.2075 -4.7877 -1.0760 1.8816 1.0760 -1.8816; -2.4441 -2.4318 -1.4465 -1.4587 1.8861 -4.2488 -1.9776 4.3403 -0.0233 0.0356 0.9498 -0.9620 ... 1.9396 1.9396 1.9396 1.9396 4.2075 4.7877 -4.2075 -4.7877 4.3816 -0.9240 -4.3816 0.9240; -1.7488 -0.5870 0.5592 -0.6026 1.1663 -2.3550 -1.7398 2.9285 -0.6965 1.8583 -0.7120 -0.4498 ... -3.1405 -3.1405 -3.1405 -3.1405 4.2075 4.7877 -4.2075 -4.7877 1.0760 -1.8816 -1.0760 1.8816; -0.3795 2.5903 2.7700 -0.1998 4.1516 -1.2087 -1.5601 -1.3828 3.6818 -0.7120 0.8917 -3.8615 ... -1.9396 -1.9396 -1.9396 -1.9396 4.2075 4.7877 -4.2075 -4.7877 -4.3816 0.9240 4.3816 -0.9240]; assertVectorsAlmostEqual(yl, yl_corr, 'relative', 0.001); assertVectorsAlmostEqual(yh, yh_corr, 'relative', 0.001); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/test_setopt.m ================================================ function test_suite = test_setopt initTestSuite; function test_setopt_all_defaults x = []; default_opts = [5 6 7 8]; z = setopt(x, default_opts); z_corr = [5 6 7 8]; assertVectorsAlmostEqual(z, z_corr, 'relative', 0.0001); function test_setopt_nonzero_becomes_zero x = [1 0 3]; default_opts = [5 6 7 8]; z = setopt(x, default_opts); z_corr = [1 6 3 8]; %z_corr = [1 0 3 8]; % This would be more intuitive assertVectorsAlmostEqual(z, z_corr, 'relative', 0.0001); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/D_lambda.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Quality with No Reference (QNR). Spectral distortion index. % % Interface: % D_lambda_index = D_lambda(I_F,I_MS,I_MS_LR,S,ratio,p) % % Inputs: % I_F: Pansharpened image; % I_MS: MS image resampled to panchromatic scale; % I_MS_LR: Original MS image; % S: Block size (optional); Default value: 32; % ratio: Resolution ratio; % p: Exponent value (optional); Default value: p = 1. % % Outputs: % D_lambda_index: D_lambda index. % % References: % [Alparone08] L. Alparone, B. Aiazzi, S. Baronti, A. Garzelli, F. Nencini, and M. Selva, "Multispectral and panchromatic data fusion assessment without reference," % Photogrammetric Engineering and Remote Sensing, vol. 74, no. 2, pp. 193200, February 2008. % [Vivone14] G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, % IEEE Transaction on Geoscience and Remote Sensing, 2014. (Accepted) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function D_lambda_index = D_lambda(I_F,I_MS,I_MS_LR,S,ratio,p) flag_orig_paper = 0; % if 0, Toolbox 1.0, otherwise, original QNR paper if (size(I_F) ~= size(I_MS)) error('The two input images must have the same dimensions') end [N,M,Nb] = size(I_F); if (rem(N,S) ~= 0) error('The number of rows must be multiple of the block size') end if (rem(M,S) ~= 0) error('The number of columns must be multiple of the block size') end D_lambda_index = 0; for i = 1:Nb-1 for j = i+1:Nb if flag_orig_paper == 0 %%%%%%% Opt. 1 (as toolbox 1.0) band1 = I_MS(:,:,i); band2 = I_MS(:,:,j); fun_uqi = @(bs) uqi(bs.data,... band2(bs.location(1):bs.location(1)+S-1,... bs.location(2):bs.location(2)+S-1)); Qmap_exp = blockproc(band1,[S S],fun_uqi); else %%%%%%% Opt. 2 (as paper QNR) band1 = I_MS_LR(:,:,i); band2 = I_MS_LR(:,:,j); fun_uqi = @(bs) uqi(bs.data,... band2(bs.location(1):bs.location(1)+S/ratio-1,... bs.location(2):bs.location(2)+S/ratio-1)); Qmap_exp = blockproc(band1,[S/ratio S/ratio],fun_uqi); end Q_exp = mean2(Qmap_exp); band1 = I_F(:,:,i); band2 = I_F(:,:,j); fun_uqi = @(bs) uqi(bs.data,... band2(bs.location(1):bs.location(1)+S-1,... bs.location(2):bs.location(2)+S-1)); Qmap_fused = blockproc(band1,[S S],fun_uqi); Q_fused = mean2(Qmap_fused); D_lambda_index = D_lambda_index + abs(Q_fused-Q_exp)^p; end end s = ((Nb^2)-Nb)/2; D_lambda_index = (D_lambda_index/s)^(1/p); end %%%%%%% Q-index on x and y images function Q = uqi(x,y) x = double(x(:)); y = double(y(:)); mx = mean(x); my = mean(y); C = cov(x,y); Q = 4 * C(1,2) * mx * my / (C(1,1)+C(2,2)) / (mx^2 + my^2); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/D_lambda_K.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Spectral distortion index of the Hybrid Quality with No Reference (HQNR). % % Interface: % Dl = D_lambda_K(fused,ms,ratio,sensor,S) % % Inputs: % fused: Pansharpened image; % msexp: MS image resampled to panchromatic scale; % sensor: Type of sensor; % ratio: Resolution ratio; % S: Block size (optional); Default value: 32. % % Outputs: % Dl: D_lambda index. % % Reference: % [Khan09] M. M. Khan, L. Alparone, and J. Chanussot, "Pansharpening quality assessment using the modulation transfer functions of instruments," % IEEE Transactions on Geoscience and Remote Sensing, vol. 47, no. 11, pp. 3880-3891, 2009. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function Dl = D_lambda_K(fused,msexp,ratio,sensor,S) if (size(fused,1) ~= size(msexp,1) || size(fused,2) ~= size(msexp,2)) error('The two images must have the same dimensions') end [N,M,~] = size(fused); if (rem(N,S) ~= 0) error('number of rows must be multiple of the block size') end if (rem(M,S) ~= 0) error('number of columns must be multiple of the block size') end fused_degraded = MTF(fused,sensor,ratio); [Q2n_index,~] = q2n(msexp,fused_degraded,S,S); Dl = 1-Q2n_index; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/D_s.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Quality with No Reference (QNR). Spatial distortion index. % % Interface: % D_s_index = D_s(I_F,I_MS,I_MS_LR,I_PAN,ratio,S,q) % % Inputs: % I_F: Pansharpened image; % I_MS: MS image resampled to panchromatic scale; % I_MS_LR: Original MS image; % I_PAN: Panchromatic image; % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value; % S: Block size (optional); Default value: 32; % q: Exponent value (optional); Default value: q = 1. % % Outputs: % D_s_index: D_s index. % % References: % [Alparone08] L. Alparone, B. Aiazzi, S. Baronti, A. Garzelli, F. Nencini, and M. Selva, "Multispectral and panchromatic data fusion assessment without reference," % Photogrammetric Engineering and Remote Sensing, vol. 74, no. 2, pp. 193200, February 2008. % [Vivone14] G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, % IEEE Transaction on Geoscience and Remote Sensing, 2014. (Accepted) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function D_s_index = D_s(I_F,I_MS,I_MS_LR,I_PAN,ratio,S,q) flag_orig_paper = 0; % if 0, Toolbox 1.0, otherwise, original QNR paper if (size(I_F) ~= size(I_MS)) error('The two images must have the same dimensions') end [N, M, Nb] = size(I_F); if (rem(N,S) ~= 0) error('number of rows must be multiple of the block size') end if (rem(M,S) ~= 0) error('number of columns must be multiple of the block size') end if flag_orig_paper == 0 %%%%%%% Opt. 1 (as toolbox 1.0) pan_filt = interp23tap(imresize(I_PAN,1./ratio),ratio); else %%%%%%% Opt. 2 (as paper QNR) pan_filt = imresize(I_PAN,1./ratio); end D_s_index = 0; for i = 1:Nb band1 = I_F(:,:,i); band2 = I_PAN; fun_uqi = @(bs) uqi(bs.data,... band2(bs.location(1):bs.location(1)+S-1,... bs.location(2):bs.location(2)+S-1)); Qmap_high = blockproc(band1,[S S],fun_uqi); Q_high = mean2(Qmap_high); if flag_orig_paper == 0 %%%%%%% Opt. 1 (as toolbox 1.0) band1 = I_MS(:,:,i); band2 = pan_filt; fun_uqi = @(bs) uqi(bs.data,... band2(bs.location(1):bs.location(1)+S-1,... bs.location(2):bs.location(2)+S-1)); Qmap_low = blockproc(band1,[S S],fun_uqi); else %%%%%%% Opt. 2 (as paper QNR) band1 = I_MS_LR(:,:,i); band2 = pan_filt; fun_uqi = @(bs) uqi(bs.data,... band2(bs.location(1):bs.location(1)+S/ratio-1,... bs.location(2):bs.location(2)+S/ratio-1)); Qmap_low = blockproc(band1,[S/ratio S/ratio],fun_uqi); end Q_low = mean2(Qmap_low); D_s_index = D_s_index + abs(Q_high-Q_low)^q; end D_s_index = (D_s_index/Nb)^(1/q); end %%%%%%% Q-index on x and y images function Q = uqi(x,y) x = double(x(:)); y = double(y(:)); mx = mean(x); my = mean(y); C = cov(x,y); Q = 4 * C(1,2) * mx * my / (C(1,1)+C(2,2)) / (mx^2 + my^2); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/ERGAS.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Erreur Relative Globale Adimensionnelle de Synthse (ERGAS). % % Interface: % ERGAS_index = ERGAS(I1,I2,ratio) % % Inputs: % I1: First multispectral image; % I2: Second multispectral image; % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value. % % Outputs: % ERGAS_index: ERGAS index. % References: % [Ranchin00] T. Ranchin and L. Wald, Fusion of high spatial and spectral resolution images: the ARSIS concept and its implementation, % Photogrammetric Engineering and Remote Sensing, vol. 66, no. 1, pp. 4961, January 2000. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function ERGAS_index = ERGAS(I1,I2,ratio) I1 = double(I1); I2 = double(I2); Err=I1-I2; ERGAS_index=0; for iLR=1:size(Err,3), ERGAS_index = ERGAS_index+mean2(Err(:,:,iLR).^2)/(mean2((I1(:,:,iLR))))^2; end ERGAS_index = (100/ratio) * sqrt((1/size(Err,3)) * ERGAS_index); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/HQNR.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Hybrid Quality with No Reference (HQNR) index. % % Interface: % [HQNR_value,Dl,Ds] = HQNR(ps_ms,ms,msexp,pan,S,sensor,ratio) % % Inputs: % ps_ms: Pansharpened image; % ms: Original MS image; % msexp: MS image resampled to panchromatic scale; % pan: Panchromatic image; % S: Block size (optional); Default value: 32; % sensor: String for type of sensor (e.g. 'WV2','IKONOS'); % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value. % % Outputs: % HQNR_value: QNR index; % Dl: D_lambda index; % Ds: D_s index. % % References: % [Alparone08] L. Alparone, B. Aiazzi, S. Baronti, A. Garzelli, F. Nencini, and M. Selva, "Multispectral and panchromatic data fusion assessment without reference," % Photogrammetric Engineering and Remote Sensing, vol. 74, no. 2, pp. 193200, February 2008. % [Khan09] M. M. Khan, L. Alparone, and J. Chanussot, "Pansharpening quality assessment using the modulation transfer functions of instruments", % IEEE Trans. Geosci. Remote Sens., vol. 11, no. 47, pp. 38803891, Nov. 2009. % [Aiazzi14] B. Aiazzi, L. Alparone, S. Baronti, R. Carl, A. Garzelli, and L. Santurri, % "Full scale assessment of pansharpening methods and data products", % in SPIE Remote Sensing, pp. 924 402 924 402, 2014. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function [HQNR_value,Dl,Ds] = HQNR(ps_ms,ms,msexp,pan,S,sensor,ratio) Dl = D_lambda_K(ps_ms,msexp,ratio,sensor,S); Ds = D_s(ps_ms,msexp,ms,pan,ratio,S,1); HQNR_value = (1-Dl)*(1-Ds); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/Q.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Q/SSIM averaged on all bands. % % Interface: % Q_avg = Q(I1,I2,L) % % Inputs: % I1: First multispectral image; % I2: Second multispectral image; % L: Radiometric resolution. % % Outputs: % Q_avg: Q index averaged on all bands. % % References: % [Wang02] Z. Wang and A. C. Bovik, A universal image quality index, IEEE Signal Processing Letters, vol. 9, no. 3, pp. 8184, March 2002. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function Q_avg = Q(I1,I2,L) Q_orig = zeros(1,size(I1,3)); for idim=1:size(I1,3), % Q_orig(idim) = ssim(I_GT(:,:,idim),I1U(:,:,idim), [0.01 0.03],fspecial('gaussian', 11, 1.5), L); Q_orig(idim) = img_qi(I1(:,:,idim),I2(:,:,idim), 32); end Q_avg = mean(Q_orig); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/QNR.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Quality with No Reference (QNR) index. % % Interface: % [QNR_index,D_lambda_index,D_s_index] = QNR(I_F,I_MS,I_MS_LR,I_PAN,ratio,S,p,q,alpha,beta) % % Inputs: % I_F: Pansharpened image; % I_MS: MS image resampled to panchromatic scale; % I_MS_LR: Original MS image; % I_PAN: Panchromatic image; % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value; % S: Block size (optional); Default value: 32; % p, q, alpha, beta: Exponent values (optional); Default values: p = q = alpha = beta = 1. % % Outputs: % QNR_index: QNR index; % D_lambda_index: D_lambda index; % D_s_index: D_s index. % % References: % [Alparone08] L. Alparone, B. Aiazzi, S. Baronti, A. Garzelli, F. Nencini, and M. Selva, "Multispectral and panchromatic data fusion assessment without reference," % Photogrammetric Engineering and Remote Sensing, vol. 74, no. 2, pp. 193200, February 2008. % [Vivone15] G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, % IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function [QNR_index,D_lambda_index,D_s_index] = QNR(I_F,I_MS,I_MS_LR,I_PAN,ratio,S,p,q,alpha,beta) if nargin < 11, beta=1; end if nargin < 10, alpha=1; end if nargin < 9, q=1; end if nargin < 8, p=1; end if nargin < 7, S=32; end D_lambda_index = D_lambda(I_F,I_MS,I_MS_LR,S,ratio,p); D_s_index = D_s(I_F,I_MS,I_MS_LR,I_PAN,ratio,S,q); QNR_index = (1-D_lambda_index)^alpha * (1-D_s_index)^beta; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/SAM.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Spectral Angle Mapper (SAM). % % Interface: % [SAM_index,SAM_map] = SAM(I1,I2) % % Inputs: % I1: First multispectral image; % I2: Second multispectral image. % % Outputs: % SAM_index: SAM index; % SAM_map: Image of SAM values. % % References: % [Yuhas92] R. H. Yuhas, A. F. H. Goetz, and J. W. Boardman, "Discrimination among semi-arid landscape endmembers using the Spectral Angle Mapper (SAM) algorithm," % in Proceeding Summaries 3rd Annual JPL Airborne Geoscience Workshop, 1992, pp. 147149. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function [SAM_index,SAM_map] = SAM(I1,I2) [M,N,~] = size(I2); prod_scal = dot(I1,I2,3); norm_orig = dot(I1,I1,3); norm_fusa = dot(I2,I2,3); prod_norm = sqrt(norm_orig.*norm_fusa); prod_map = prod_norm; prod_map(prod_map==0)=eps; SAM_map = acos(prod_scal./prod_map); prod_scal = reshape(prod_scal,M*N,1); prod_norm = reshape(prod_norm, M*N,1); z=find(prod_norm==0); prod_scal(z)=[];prod_norm(z)=[]; angolo = sum(sum(acos(prod_scal./prod_norm)))/(size(prod_norm,1)); SAM_index = real(angolo)*180/pi; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/SCC.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % spatial Correlation Coefficient (sCC). % % Interface: % [sCC,SCCMap] = SCC(I_F,I_GT) % % Inputs: % I_F: Fused image; % I_GT: Ground-truth image. % % Outputs: % sCC: spatial correlation coefficient; % SCCMap: Image of sCC values. % % Reference: % [Vivone15] G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, % IEEE Transaction on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 2565-2586, May 2015. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function [sCC,SCCMap]=SCC(I_F,I_GT) Im_Lap_F = zeros(size(I_F,1)-2,size(I_F,2)-2,size(I_F,3)); for idim=1:size(I_F,3) Im_Lap_F_y= imfilter(I_F(2:end-1,2:end-1,idim),fspecial('sobel')); Im_Lap_F_x= imfilter(I_F(2:end-1,2:end-1,idim),fspecial('sobel')'); Im_Lap_F(:,:,idim) = sqrt(Im_Lap_F_y.^2+Im_Lap_F_x.^2); end Im_Lap_GT = zeros(size(I_GT,1)-2,size(I_GT,2)-2,size(I_GT,3)); for idim=1:size(I_GT,3) Im_Lap_GT_y= imfilter(I_GT(2:end-1,2:end-1,idim),fspecial('sobel')); Im_Lap_GT_x= imfilter(I_GT(2:end-1,2:end-1,idim),fspecial('sobel')'); Im_Lap_GT(:,:,idim) = sqrt(Im_Lap_GT_y.^2+Im_Lap_GT_x.^2); end sCC=sum(sum(sum(Im_Lap_F.*Im_Lap_GT))); sCC = sCC/sqrt(sum(Im_Lap_F(:).^2)); sCC = sCC/sqrt(sum(Im_Lap_GT(:).^2)); SCCMap=sum(Im_Lap_F.*Im_Lap_GT,3)/sqrt(sum(Im_Lap_GT(:).^2))... /sqrt(sum(Im_Lap_GT(:).^2)); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/img_qi.m ================================================ function [quality, quality_map] = img_qi(img1, img2, block_size) %======================================================================== % %Copyright (c) 2001 The University of Texas at Austin %All Rights Reserved. % %This program is free software; you can redistribute it and/or modify %it under the terms of the GNU General Public License as published by %the Free Software Foundation; either version 2 of the License, or %(at your option) any later version. % %This program is distributed in the hope that it will be useful, %but WITHOUT ANY WARRANTY; without even the implied warranty of %MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the %GNU General Public License for more details. % %The GNU Public License is available in the file LICENSE, or you %can write to the Free Software Foundation, Inc., 59 Temple Place - %Suite 330, Boston, MA 02111-1307, USA, or you can find it on the %World Wide Web at http://www.fsf.org. % %Author : Zhou Wang %Version : 1.0 % %The authors are with the Laboratory for Image and Video Engineering %(LIVE), Department of Electrical and Computer Engineering, The %University of Texas at Austin, Austin, TX. % %Kindly report any suggestions or corrections to zwang@ece.utexas.edu % %Acknowledgement: %The author would like to thank Mr. Umesh Rajashekar, the Matlab master %in our lab, for spending his precious time and giving his kind help %on writing this program. Without his help, this program would not %achieve its current efficiency. % %======================================================================== % %This is an efficient implementation of the algorithm for calculating %the universal image quality index proposed by Zhou Wang and Alan C. %Bovik. Please refer to the paper "A Universal Image Quality Index" %by Zhou Wang and Alan C. Bovik, published in IEEE Signal Processing %Letters, 2001. In order to run this function, you must have Matlab's %Image Processing Toobox. % %Input : an original image and a test image of the same size %Output: (1) an overall quality index of the test image, with a value % range of [-1, 1]. % (2) a quality map of the test image. The map has a smaller % size than the input images. The actual size is % img_size - BLOCK_SIZE + 1. % %Usage: % %1. Load the original and the test images into two matrices % (say img1 and img2) % %2. Run this function in one of the two ways: % % % Choice 1 (suggested): % [qi qi_map] = img_qi(img1, img2); % % % Choice 2: % [qi qi_map] = img_qi(img1, img2, BLOCK_SIZE); % % The default BLOCK_SIZE is 8 (Choice 1). Otherwise, you can specify % it by yourself (Choice 2). % %3. See the results: % % qi %Gives the over quality index. % imshow((qi_map+1)/2) %Shows the quality map as an image. % %======================================================================== if (nargin == 1 | nargin > 3) quality = -Inf; quality_map = -1*ones(size(img1)); return; end if (size(img1) ~= size(img2)) quality = -Inf; quality_map = -1*ones(size(img1)); return; end if (nargin == 2) block_size = 8; end N = block_size.^2; sum2_filter = ones(block_size); img1_sq = img1.*img1; img2_sq = img2.*img2; img12 = img1.*img2; img1_sum = filter2(sum2_filter, img1, 'valid'); img2_sum = filter2(sum2_filter, img2, 'valid'); img1_sq_sum = filter2(sum2_filter, img1_sq, 'valid'); img2_sq_sum = filter2(sum2_filter, img2_sq, 'valid'); img12_sum = filter2(sum2_filter, img12, 'valid'); img12_sum_mul = img1_sum.*img2_sum; img12_sq_sum_mul = img1_sum.*img1_sum + img2_sum.*img2_sum; numerator = 4*(N*img12_sum - img12_sum_mul).*img12_sum_mul; denominator1 = N*(img1_sq_sum + img2_sq_sum) - img12_sq_sum_mul; denominator = denominator1.*img12_sq_sum_mul; quality_map = ones(size(denominator)); index = (denominator1 == 0) & (img12_sq_sum_mul ~= 0); quality_map(index) = 2*img12_sum_mul(index)./img12_sq_sum_mul(index); index = (denominator ~= 0); quality_map(index) = numerator(index)./denominator(index); quality = mean2(quality_map); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/norm_blocco.m ================================================ %%%%%%%%%%%%%% Q2n aux. function function [y,a,c] = norm_blocco(x) a=mean2(x); c=std2(x); if(c==0) c = eps; end y=((x-a)/c)+1; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/onion_mult.m ================================================ %%%%%%%%%%%%%% Q2n aux. function function ris=onion_mult(onion1,onion2) N=length(onion1); if N>1 L=N/2; a=onion1(1:L); b=onion1(L+1:end); b=[b(1),-b(2:end)]; c=onion2(1:L); d=onion2(L+1:end); d=[d(1),-d(2:end)]; if N==2 ris=[a*c-d*b,a*d+c*b]; else ris1=onion_mult(a,c); ris2=onion_mult(d,[b(1),-b(2:end)]); %% ris3=onion_mult([a(1),-a(2:end)],d); %% ris4=onion_mult(c,b); aux1=ris1-ris2; aux2=ris3+ris4; ris=[aux1,aux2]; end else ris = onion1*onion2; end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/onion_mult2D.m ================================================ %%%%%%%%%%%%%% Q2n aux. function function ris = onion_mult2D(onion1,onion2) [~,~,N3]=size(onion1); if N3>1 L=N3/2; a=onion1(:,:,1:L); b=onion1(:,:,L+1:end); b=cat(3,b(:,:,1),-b(:,:,2:end)); c=onion2(:,:,1:L); d=onion2(:,:,L+1:end); d=cat(3,d(:,:,1),-d(:,:,2:end)); if N3==2 ris=cat(3,a.*c-d.*b,a.*d+c.*b); else ris1=onion_mult2D(a,c); ris2=onion_mult2D(d,cat(3,b(:,:,1),-b(:,:,2:end))); ris3=onion_mult2D(cat(3,a(:,:,1),-a(:,:,2:end)),d); ris4=onion_mult2D(c,b); aux1=ris1-ris2; aux2=ris3+ris4; ris=cat(3,aux1,aux2); end else ris = onion1.*onion2; end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/onions_quality.m ================================================ %%%%%%%%%%%%%% Q2n aux. function function q = onions_quality(dat1,dat2,size1) dat1=double(dat1); dat2=double(dat2); dat2=cat(3,dat2(:,:,1),-dat2(:,:,2:end)); [~,~,N3]=size(dat1); size2=size1; % Block normalization for i=1:N3 [a1,s,t]=norm_blocco(squeeze(dat1(:,:,i))); dat1(:,:,i)=a1; clear a1 if s==0 if i==1 dat2(:,:,i)=dat2(:,:,i)-s+1; else dat2(:,:,i)=-(-dat2(:,:,i)-s+1); end else if i==1 dat2(:,:,i)=((dat2(:,:,i)-s)/t)+1; else dat2(:,:,i)=-(((-dat2(:,:,i)-s)/t)+1); end end end m1=zeros(1,N3); m2=zeros(1,N3); mod_q1m=0; mod_q2m=0; mod_q1=zeros(size1,size2); mod_q2=zeros(size1,size2); for i=1:N3 m1(i)=mean2(squeeze(dat1(:,:,i))); m2(i)=mean2(squeeze(dat2(:,:,i))); mod_q1m=mod_q1m+(m1(i)^2); mod_q2m=mod_q2m+(m2(i)^2); mod_q1=mod_q1+((squeeze(dat1(:,:,i))).^2); mod_q2=mod_q2+((squeeze(dat2(:,:,i))).^2); end mod_q1m=sqrt(mod_q1m); mod_q2m=sqrt(mod_q2m); mod_q1=sqrt(mod_q1); mod_q2=sqrt(mod_q2); termine2 = (mod_q1m*mod_q2m); termine4 = ((mod_q1m^2)+(mod_q2m^2)); int1=(size1*size2)/((size1*size2)-1)*mean2(mod_q1.^2); int2=(size1*size2)/((size1*size2)-1)*mean2(mod_q2.^2); termine3=int1+int2-(size1*size2)/((size1*size2)-1)*((mod_q1m^2)+(mod_q2m^2)); mean_bias=2*termine2/termine4; if termine3==0 q=zeros(1,1,N3); q(:,:,N3)=mean_bias; else cbm=2/termine3; qu=onion_mult2D(dat1,dat2); qm=onion_mult(m1,m2); qv=zeros(1,N3); for i=1:N3 qv(i)=(size1*size2)/((size1*size2)-1)*mean2(squeeze(qu(:,:,i))); end q=qv-(size1*size2)/((size1*size2)-1)*qm; q=q*mean_bias*cbm; end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/q2n.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Q2n index. % % Interface: % [Q2n_index, Q2n_index_map] = q2n(I_GT, I_F, Q_blocks_size, Q_shift) % % Inputs: % I_GT: Ground-Truth image; % I_F: Fused Image; % Q_blocks_size: Block size of the Q-index locally applied; % Q_shift: Block shift of the Q-index locally applied. % % Outputs: % Q2n_index: Q2n index; % Q2n_index_map: Map of Q2n values. % % References: % [Garzelli09] A. Garzelli and F. Nencini, "Hypercomplex quality assessment of multi/hyper-spectral images," % IEEE Geoscience and Remote Sensing Letters, vol. 6, no. 4, pp. 662665, October 2009. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function [Q2n_index, Q2n_index_map] = q2n(I_GT, I_F, Q_blocks_size, Q_shift) [N1,N2,N3]=size(I_GT); size2=Q_blocks_size; stepx=ceil(N1/Q_shift); stepy=ceil(N2/Q_shift); if stepy<=0 stepy=1; stepx=1; end est1=(stepx-1)*Q_shift+Q_blocks_size-N1; est2=(stepy-1)*Q_shift+Q_blocks_size-N2; if sum([(est1~=0),(est2~=0)])>0 refref=[]; fusfus=[]; for i=1:N3 a1=squeeze(I_GT(:,:,1)); ia1=zeros(N1+est1,N2+est2); ia1(1:N1,1:N2)=a1; ia1(:,N2+1:N2+est2)=ia1(:,N2:-1:N2-est2+1); ia1(N1+1:N1+est1,:)=ia1(N1:-1:N1-est1+1,:); refref=cat(3,refref,ia1); if i 5) mssim = -Inf; ssim_map = -Inf; return; end if (size(img1) ~= size(img2)) mssim = -Inf; ssim_map = -Inf; return; end [M N] = size(img1); if (nargin == 2) if ((M < 11) || (N < 11)) mssim = -Inf; ssim_map = -Inf; return end window = fspecial('gaussian', 11, 1.5); % K(1) = 0.01; % default settings K(2) = 0.03; % L = 255; % end if (nargin == 3) if ((M < 11) || (N < 11)) mssim = -Inf; ssim_map = -Inf; return end window = fspecial('gaussian', 11, 1.5); L = 255; if (length(K) == 2) if (K(1) < 0 || K(2) < 0) mssim = -Inf; ssim_map = -Inf; return; end else mssim = -Inf; ssim_map = -Inf; return; end end if (nargin == 4) [H W] = size(window); if ((H*W) < 4 || (H > M) || (W > N)) mssim = -Inf; ssim_map = -Inf; return end L = 255; if (length(K) == 2) if (K(1) < 0 || K(2) < 0) mssim = -Inf; ssim_map = -Inf; return; end else mssim = -Inf; ssim_map = -Inf; return; end end if (nargin == 5) [H W] = size(window); if ((H*W) < 4 || (H > M) || (W > N)) mssim = -Inf; ssim_map = -Inf; return end if (length(K) == 2) if (K(1) < 0 || K(2) < 0) mssim = -Inf; ssim_map = -Inf; return; end else mssim = -Inf; ssim_map = -Inf; return; end end img1 = double(img1); img2 = double(img2); % automatic downsampling f = max(1,round(min(M,N)/256)); %downsampling by f %use a simple low-pass filter if(f>1) lpf = ones(f,f); lpf = lpf/sum(lpf(:)); img1 = imfilter(img1,lpf,'symmetric','same'); img2 = imfilter(img2,lpf,'symmetric','same'); img1 = img1(1:f:end,1:f:end); img2 = img2(1:f:end,1:f:end); end C1 = (K(1)*L)^2; C2 = (K(2)*L)^2; window = window/sum(sum(window)); mu1 = filter2(window, img1, 'valid'); mu2 = filter2(window, img2, 'valid'); mu1_sq = mu1.*mu1; mu2_sq = mu2.*mu2; mu1_mu2 = mu1.*mu2; sigma1_sq = filter2(window, img1.*img1, 'valid') - mu1_sq; sigma2_sq = filter2(window, img2.*img2, 'valid') - mu2_sq; sigma12 = filter2(window, img1.*img2, 'valid') - mu1_mu2; if (C1 > 0 && C2 > 0) ssim_map = ((2*mu1_mu2 + C1).*(2*sigma12 + C2))./((mu1_sq + mu2_sq + C1).*(sigma1_sq + sigma2_sq + C2)); else numerator1 = 2*mu1_mu2 + C1; numerator2 = 2*sigma12 + C2; denominator1 = mu1_sq + mu2_sq + C1; denominator2 = sigma1_sq + sigma2_sq + C2; ssim_map = ones(size(mu1)); index = (denominator1.*denominator2 > 0); ssim_map(index) = (numerator1(index).*numerator2(index))./(denominator1(index).*denominator2(index)); index = (denominator1 ~= 0) & (denominator2 == 0); ssim_map(index) = numerator1(index)./denominator1(index); end mssim = mean2(ssim_map); return ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/RRpansharp.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % This method performs pansharpening. We assume that % the noisy satellite images yi, i=1,...,L, where y1 is the PAN image % and yi, i=2,...,L are the observed MS images, are related to the full % resolution target images by % % yi = Mi*Bi*xi + ni, i=1,...,L % % where Mi is a downsampling operator, Bi is a circulant blurring matrix, % and ni is noise. The method solves % min (1/2) sum_{i=1}^L || y_i - Mi*Bi*G*fi ||^2 + lambda * phi(G) % F, G % where phi is a regularizer function. % The function returns Xhat=G*F'. See [1] and [2] for details. % % Interface: % Xhat_im = RRpansharp(Yim,varargin) % % Inputs: % Yim : 1xL cell array containing the observed images the first image % is the PAN image and the last L-1 images are the MS images; % CDiter: Number of cyclic descent iterations. % CDiter=100 is the default; % r: The subspace dimension; % lambda: The regularization parameter, lambda=0.005 is the % default; % q: penalty weights; % X0: Initial value for X = G * F'. % % Outputs: % Xhat_im: estimated image (3D) at high resolution for each % spectral channel. % % References: % [Ulfarsson19] M.O. Ulfarsson, F. Palsson, M.Dalla Mura, J.R. Sveinsson, "Sentinel-2 Sharpening using a Reduced-Rank Method", % IEEE Transactions on Geoscience and Remote Sensing, vol. 57, no. 9, pp. 6408-6420, 2019. % [Palsson19] F. Palsson, MO. Ulfarsson, and JR. Sveinsson, "Model-Based Reduced-Rank Pansharpening", % IEEE Geoscience and Remote Sensing Letters, 2019 % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function Xhat_im = RRpansharp(Yim,varargin) % import the manopt optimizer addpath('./manopt') p1=pwd; cd('./manopt'); importmanopt cd(p1) % initialization CDiter=10; r=7; lambda=0.005; X0 = ''; tolgradnorm = 0.1; if(r==7) q = [1, 1.5, 4, 8, 15, 15, 20 ]'; else q = ones(r,1); end Gstep_only=0; GCV = 0; for i=1:2:(length(varargin)-1) switch varargin{i} case 'CDiter' CDiter=varargin{i+1}; case 'r' r=varargin{i+1}; case 'lambda' lambda=varargin{i+1}; case 'q' q=varargin{i+1}; case 'X0' X0 = varargin{i+1}; case 'tolgradnorm' tolgradnorm = varargin{i+1}; case 'Gstep_only' Gstep_only = varargin{i+1}; case 'GCV' GCV = varargin{i+1}; case 'd' d = varargin{i+1}; case 'mtf' mtf = varargin{i+1}; end end tic; if(length(q)~=r), error('The length of q has to match r'); end % dimensions of the inputs L=length(Yim); for i=1:L, Yim{i}=double(Yim{i}); end [nl,nc] = size(Yim{1}); n = nl*nc; [Yim2, av] = normaliseData(Yim); % Sequence of bands % [B1 B2 B3 B4 B5 B6 B7 B8 B8A B9 B11 B12] % subsampling factors (in pixels) %d = [6 1 1 1 2 2 2 1 2 6 2 2]'; % convolution operators (Gaussian convolution filters), taken from ref [5] %mtf = [ .32 .26 .28 .24 .38 .34 .34 .26 .33 .26 .22 .23]; sdf = d.*sqrt(-2*log(mtf)/pi^2)'; % Do not sharpen high-res bands sdf(d==1) = 0; % remove border for computing the subspace and the result (because of % circular assumption limsub = 2; % kernel filter support dx = 12; dy = 12; % Define blurring operators FBM = createConvKernel(sdf,d,nl,nc,L,dx,dy); % IMPORTANT!!! % Note that the blur kernels are shifted to accomodate the co-registration % of real images with different resolutions. [Y,M,F]=initialization(Yim2,sdf,nl,nc,L,dx,dy,d,limsub,r); Mask=reshape(M,[n,L])'; % CD if isempty(X0) Z = zeros(r,n); else [X0, ~] = normaliseData(X0); X0 = reshape(X0,[n,L])'; [F,D,V]=svd(X0,'econ'); F = F(:,1:r); Z = D(1:r,1:r)*V(:,1:r)'; end % Operators for differences [FDH,FDV,FDHC,FDVC] = createDiffkernels(nl,nc,r); % Compute weights sigmas = 1; W = computeWeights(Y,d,sigmas,nl); Whalf=W.^(1/2); if( GCV == 1), Gstep_only=1; end if( Gstep_only ~= 0), CDiter=1; end for jCD=1:CDiter [Z,Jcost(jCD),options]=Zstep(Y,FBM,F,lambda,nl,nc,Z,Mask,q,FDH,FDV,FDHC,FDVC,W,Whalf,tolgradnorm); if(Gstep_only==0) F1=Fstep(F,Z,Y,FBM,nl,nc,Mask); F=F1; end if( GCV==1 ) Ynoise = ( abs(Y) > 0 ) .* randn( size(Y) ); [Znoise]=Zstep(Ynoise,FBM,F,lambda,nl,nc,Z,Mask,q,FDH,FDV,FDHC,FDVC,W,Whalf,tolgradnorm); HtHBXnoise = Mask.*ConvCM(F*Znoise,FBM,nl); Ynoise = Ynoise([2:end],:); HtHBXnoise = HtHBXnoise([2:end],:); den = trace(Ynoise*(Ynoise - HtHBXnoise)'); HtHBX=Mask.*ConvCM(F*Z,FBM,nl); num = norm( Y([2:end],:) - HtHBX([2:end],:) , 'fro')^2; end end Xhat_im = conv2im(F*Z,nl,nc,L); Xhat_im = unnormaliseData(Xhat_im,av); Xhat_im = Xhat_im(:,:,2:end); end function [Y,M,F]=initialization(Yim2,sdf,nl,nc,L,dx,dy,d,limsub,r) FBM2 = createConvKernelSubspace(sdf,nl,nc,L,dx,dy); % Generate LR MS image FOR SUBSPACE % Upsample image via interpolation for i=1:L Ylim(:,:,i) = imresize(Yim2{i},d(i)); end Y2im=real(ifft2(fft2(Ylim).*FBM2)); Y2tr=Y2im(limsub+1:end-limsub,limsub+1:end-limsub,:); Y2n = reshape(Y2tr,[(nl-4)*(nc-4),L]); % SVD analysis % Y2n is the image for subspace with the removed border [F,D,P] = svd(Y2n','econ'); F=F(:,1:r); [M, Y] = createSubsampling(Yim2,d,nl,nc,L); end function [Z, xcost,options]=Zstep(Y,FBM,F,tau,nl,nc,Z,Mask,q,FDH,FDV,FDHC,FDVC,W,Whalf,tolgradnorm) r = size(F,2); n = nl*nc; UBTMTy=F'*ConvCM(Y,conj(FBM),nl); [Z] = CG(Z,F,Y,UBTMTy,FBM,Mask,nl,nc,r,tau,q,FDH,FDV,FDHC,FDVC,W); xcost=1; options=[]; end function F1=Fstep(F,Z,Y,FBM,nl,nc,Mask) F0=F;% U; % initialization BTXhat = ConvCM(F0*Z,FBM,nl); MBTXhat=Mask.*BTXhat; [L,r]=size(F); for ii=1:L MBZT(:,:,ii)=repmat(Mask(ii,:),[r,1]).*ConvCM(Z,repmat(FBM(:,:,ii),[1,1,r]),nl); A(:,:,ii)=MBZT(:,:,ii)*MBZT(:,:,ii)'; ZBMTy(:,ii)=MBZT(:,:,ii)*Y(ii,:)'; end ZBYT=ZBMTy';% BTY*Z'; manifold = stiefelfactory(L,r,1); %euclideanfactory(L,r); problem.M = manifold; problem.cost = @(F) costF(F,MBZT,Y); problem.egrad = @(F) egrad(F,A,ZBYT); warning('off', 'manopt:getHessian:approx') options.tolgradnorm = 1e-2; options.verbosity=0; [F1, xcost, info, options] = trustregions(problem,F0,options); end % Cost functions function [Ju]=costF(F,MBZT,Y) L=size(F,1); Ju=0; for i=1:L fi=F(i,:)'; yi=Y(i,:)'; Ju=Ju+0.5*norm(MBZT(:,:,i)'*fi-yi,'fro')^2; end end function [Du]=egrad(F,A,ZBYT) p=size(A,3); Du=0*F; for ii=1:p Du(ii,:)=F(ii,:)*A(:,:,ii)'-ZBYT(ii,:); end end %%% AUXILILARY FUNCTIONS function [FDH,FDV,FDHC,FDVC] = createDiffkernels(nl,nc,r) dh = zeros(nl,nc); dh(1,1) = 1; dh(1,nc) = -1; dv = zeros(nl,nc); dv(1,1) = 1; dv(nl,1) = -1; FDH = repmat(fft2(dh),1,1,r); FDV = repmat(fft2(dv),1,1,r); FDHC = conj(FDH); FDVC = conj(FDV); end function [Yim, av] = normaliseData(Yim) % Normalize each cell to unit power if iscell(Yim) % mean squared power = 1 nb = length(Yim); for i=1:nb av(i,1) = mean2(Yim{i}.^2); Yim{i,1} = sqrt(Yim{i}.^2/av(i,1)); end else nb = size(Yim,3); for i=1:nb av(i,1) = mean2(Yim(:,:,i).^2); Yim(:,:,i) = sqrt(Yim(:,:,i).^2/av(i,1)); end end end function FBM = createConvKernel(sdf,d,nl,nc,L,dx,dy) %-------------------------------------------------------------------------- % Build convolution kernels %-------------------------------------------------------------------------- middlel=((nl)/2); middlec=((nc)/2); % kernel filters expanded to size [nl,nc] B = zeros(nl,nc,L); % fft2 of kernels FBM = zeros(nl,nc,L); for i=1:L if d(i) > 1 h = fspecial('gaussian',[dx,dy],sdf(i)); B((middlel-dy/2+1:middlel+dy/2)-d(i)/2+1,(middlec-dx/2+1:middlec+dx/2)-d(i)/2+1,i) = h; %run % circularly center B(:,:,i)= fftshift(B(:,:,i)); % normalize B(:,:,i) = B(:,:,i)/sum(sum(B(:,:,i))); FBM(:,:,i) = fft2(B(:,:,i)); else B(1,1,i) = 1; FBM(:,:,i) = fft2(B(:,:,i)); end end end function FBM2 = createConvKernelSubspace(sdf,nl,nc,L,dx,dy) %-------------------------------------------------------------------------- % Build convolution kernels FOR SUBSPACE!!!! %-------------------------------------------------------------------------- % middlel=round((nl+1)/2); middlec=round((nc+1)/2); dx = dx+1; dy = dy+1; % kernel filters expanded to size [nl,nc] B = zeros(nl,nc,L); % fft2 of kernels FBM2 = zeros(nl,nc,L); s2 = max(sdf); for i=1:L if sdf(i) < s2 h = fspecial('gaussian',[dx,dy],sqrt(s2^2-sdf(i)^2)); B(middlel-(dy-1)/2:middlel+(dy-1)/2,middlec-(dx-1)/2:middlec+(dx-1)/2,i) = h; %circularly center B(:,:,i)= fftshift(B(:,:,i)); % normalize B(:,:,i) = B(:,:,i)/sum(sum(B(:,:,i))); FBM2(:,:,i) = fft2(B(:,:,i)); else % unit impulse B(1,1,i) = 1; FBM2(:,:,i) = fft2(B(:,:,i)); end end end function X = ConvCM(X,FKM,nl,nc,L) if nargin == 3 [L,n] = size(X); nc = n/nl; end X = conv2mat(real(ifft2(fft2(conv2im(X,nl,nc,L)).*FKM))); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % define a circular convolution (the same for all bands) accepting a % matrix and returnig a matrix % size(X) is [no_bands_ms,n] % FKM is the of the cube containing the fft2 of the convolution kernels % ConvCM = @(X,FKM) reshape(real(ifft2(fft2(reshape(X', nl,nc,nb)).*FKM)), nl*nc,nb)'; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% end function X = conv2mat(X,nl,nc,L) if ndims(X) == 3 [nl,nc,L] = size(X); X = reshape(X,nl*nc,L)'; elseif ndims(squeeze(X)) == 2 L = 1; [nl,nc] = size(X); X = reshape(X,nl*nc,L)'; end end function [M, Y] = createSubsampling(Yim,d,nl,nc,L) % subsampling matrix M = zeros(nl,nc,L); indexes = cell([L 1]); for i=1:L im = ones(floor(nl/d(i)),floor(nc/d(i))); maux = zeros(d(i)); maux(1,1) = 1; M(:,:,i) = kron(im,maux); indexes{i} = find(M(:,:,i) == 1); Y(i,indexes{i}) = conv2mat(Yim{i},nl/d(i),nc/d(i),1); end end function [Yim] = unnormaliseData(Yim, av) if iscell(Yim) % mean squared power = 1 nb = length(Yim); for i=1:nb Yim{i,1} = sqrt(Yim{i}.^2*av(i,1)); end else nb = size(Yim,3); for i=1:nb Yim(:,:,i) = sqrt(Yim(:,:,i).^2*av(i,1)); end end end function W = computeWeights(Y,d,sigmas,nl) % As in eq. (14) and (15) % Compute weigts for each pixel based on HR bands hr_bands = d==1; hr_bands = find(hr_bands)'; for i=hr_bands % grad(:,:,i) = imgradient(conv2im(Y(i,:),nl),'prewitt').^2; % Intermediate gives also good results compared to prewitt grad(:,:,i) = imgradient(conv2im(Y(i,:),nl),'intermediate').^2; end grad = sqrt(max(grad,[],3)); grad = grad / quantile(grad(:),0.95); Wim = exp(-grad.^2/2/sigmas^2); Wim(Wim<0.5) = 0.5; W = conv2mat(Wim,nl); end function X = conv2im(X,nl,nc,L) if size(X,2)==1 X = conv2mat(X,nl,nc,L); end if nargin == 2 [L,n] = size(X); if n==1 X = conv2mat(X,nl,nc,L); end nc = n/nl; end X = reshape(X',nl,nc,L); end function [J,gradJ,AtAg] = grad_cost_G(Z,F,Y,UBTMTy,FBM,Mask,nl,nc,r,tau,q,FDH,FDV,FDHC,FDVC,W) X=F*Z; BX=ConvCM(X,FBM,nl); HtHBX=Mask.*BX; ZH=ConvCM(Z,FDHC,nl); Zv=ConvCM(Z,FDVC,nl); ZHW=ZH.*W; ZVW=Zv.*W; grad_pen=ConvCM(ZHW,FDH,nl)+ConvCM(ZVW,FDV,nl); AtAg = F'*ConvCM(HtHBX,conj(FBM),nl)+2*tau*(q*ones(1,nl*nc)).*grad_pen; gradJ=AtAg-UBTMTy; J = 1/2 * sum( sum( Z .* AtAg ) ) - sum( sum( Z.*UBTMTy ) ); end function [ Z ] = CG(Z,F,Y,UBTMTy,FBM,Mask,nl,nc,r,tau,q,FDH,FDV,FDHC,FDVC,W) maxiter = 1000; tolgradnorm = 0.1;%1e-6; [cost,grad] = grad_cost_G(Z,F,Y,UBTMTy,FBM,Mask,nl,nc,r,tau,q,FDH,FDV,FDHC,FDVC,W); gradnorm = norm(grad(:)); iter = 0; res = -grad; while ( gradnorm > tolgradnorm & iter < maxiter ) iter = iter + 1; % fprintf('%5d\t%+.16e\t%.8e\n', iter, cost, gradnorm); if( iter == 1 ) desc_dir = res; else beta = ( res(:).' * res(:) ) / ( old_res(:).' * old_res(:) ); desc_dir = res + beta * desc_dir; end [~, ~, AtAp] = grad_cost_G(desc_dir,F,Y,UBTMTy,FBM,Mask,nl,nc,r,tau,q,FDH,FDV,FDHC,FDVC,W); alpha = ( res(:).' * res(:) ) / ( desc_dir(:).' * AtAp(:) ); Z1 = Z + alpha * desc_dir; old_res = res; res = res - alpha* AtAp; gradnorm = norm( res(:) ); % Transfer iterate info Z = Z1; end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/CLA.txt ================================================ Thank you for your interest in Manopt. The purpose of this Contributor License Agreement is to clarify the intellectual property license granted with contributions of software from any person or entity (the "Contributor") to the owners of Manopt. This license is for your protection as a Contributor of software to Manopt and does not change your right to use your own contributions for any other purpose. The owners of Manopt are the copyright holders of Manopt indicated in the license files distributed with the software. You and the owners of Manopt hereby accept and agree to the following terms and conditions: Your "Contributions" means all of your past, present and future contributions of object code, source code and documentation to Manopt, however submitted to Manopt, excluding any submissions that are conspicuously marked or otherwise designated in writing by You as "Not a Contribution." You hereby grant to the owners of Manopt a non-exclusive, irrevocable, worldwide, no-charge, transferable copyright license to use, execute, prepare derivative works of, and distribute (internally and externally, in object code and, if included in your Contributions, source code form) your Contributions. Except for the rights granted to the owners of Manopt in this paragraph, You reserve all right, title and interest in and to your Contributions. You represent that you are legally entitled to grant the above license. If your employer(s) have rights to intellectual property that you create, you represent that you have received permission to make the Contibutions on behalf of that employer, or that your employer has waived such rights for your Contributions to Manopt. You represent that, except as disclosed in your Contribution submission(s), each of your Contributions is your original creation. You represent that your Contribution submissions(s) included complete details of any license or other restriction (including, but not limited to, related patents and trademarks) associated with any part of your Contribution(s) (including a copy of any applicable license agreement). You agree to notify the owners of Manopt of any facts or circumstances of which you become aware that would make Your representations in the Agreement inaccurate in any respect. You are not expected to provide support for your Contributions, except to the extent you desire to provide support. Your may provide support for free, for a fee, or not at all. Your Contributions are provided as-is, with all faults, defects and errors, and without any warranty of any kind (either express or implied) including, without limitation, any implied warranty of merchantability and fitness for a particular purpose and any warranty of non-infringement. This CLA is a modification of the CLA used by the UW Calendar project of the University of Washington: ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/COPYING.txt ================================================ GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/CREDITS.txt ================================================ The core developers of Manopt are * Nicolas Boumal * Bamdev Mishra Through the RANSO group, Manopt is supported by * Pierre-Antoine Absil * Yurii Nesterov * Rodolphe Sepulchre We are grateful for the excellent contributions of * Pierre Borckmans * Bart Vandereycken * Hiroyuki Sato * Roberto Tron * Sarod Yatawatta * Hiroyuki Kasai * Bruno Iannazzo * Margherita Procelli * Jesus Briales * Changshuo Liu Furthermore, code written by the following people can be found in Manopt: * Chris Baker * Pierre-Antoine Absil * Kyle Gallivan * Paolo de Leva * Wynton Moore * Michael Kleder Each person's contribution is marked by their name in the relevant files. See http://www.manopt.org/about.html for a more precise breakdown. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/LICENSE.txt ================================================ Manopt, a Matlab toolbox for optimization on manifolds, is copyright by Nicolas Boumal and is distributed under the terms of the GNU General Public License (GPL) version 3 (or later). See accompanying file or . In short, this means that everyone is free to use Manopt, to modify it and to redistribute it on a free basis. Manopt is not in the public domain; it is copyrighted and there are restrictions on its distribution (see the license). For example, you cannot integrate this version of Manopt (in full or in parts) in any closed-source software you plan to distribute (commercially or not). Please contact us for more information. Contact: http://www.manopt.org manopttoolbox@gmail.com The documentation of Manopt (the website) is copyright by Nicolas Boumal, all rights reserved. THIRD-PARTY CODE The following files contain third-party code or extensively rely on third-party code, and their specific license should be considered before modifying and/or redistributing them. The license information can be found either in the comments in the code or in a separate text file in the same directory as the Matlab files. /manopt/solvers/trustregions/trustregions.m /manopt/solvers/trustregions/tCG.m /manopt/tools/multitransp.m /manopt/tools/multiprod.m /manopt/tools/diagsum.m /manopt/tools/hashmd5.m CONTRIBUTIONS Contributions are licensed to the owners of Manopt under the Contributor License Agreement, see accompanying file . Be sure to check the header comments of Matlab files and look for the "original author" tag. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/README.txt ================================================ Manopt is a Matlab toolbox for optimization on manifolds. Installation instructions, documentation and updates are available online: http://www.manopt.org Manopt is copyright by Nicolas Boumal (nicolasboumal@gmail.com) and is distributed under the terms of the GNU General Public License (GPL) version 3 (or later). See the files LICENSE.TXT, COPYING.TXT and CREDITS.TXT. Contact: manopttoolbox@gmail.com Quick installation guide ------------------------ * Unzip and copy the whole manopt directory you just downloaded in a location of your choice on disk, say, in /my/directory/. * Go to /my/directory/manopt/ at the command prompt and execute importmanopt. You may save this path for your next Matlab sessions: follow the menu File Set Path... and save. * Go to /my/directory/manopt/checkinstall/ and run the script basicexample.m. If there are no errors, you are done! Otherwise, feel free to contact us. Feedback -------- Please let us know how you use Manopt: it helps us develop a better toolbox. Please cite the Manopt paper in your work (as well as relevant solvers/geometries): http://jmlr.org/papers/v15/boumal14a.html @article{manopt, author = {Nicolas Boumal and Bamdev Mishra and P.-A. Absil and Rodolphe Sepulchre}, title = {{M}anopt, a {M}atlab Toolbox for Optimization on Manifolds}, journal = {Journal of Machine Learning Research}, year = {2014}, volume = {15}, pages = {1455--1459}, url = {http://www.manopt.org} } For more info or help: http://www.manopt.org -- we are active on the forum! This version: Manopt 4.0, released Sep. 9, 2017. GitHub: https://github.com/NicolasBoumal/manopt ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/checkinstall/basicexample.m ================================================ function basicexample() % Verify that Manopt was indeed added to the Matlab path. if isempty(which('spherefactory')) error(['You should first add Manopt to the Matlab path.\n' ... 'Please run importmanopt.']); end % Generate the problem data. n = 1000; A = randn(n); A = .5*(A+A'); % Create the problem structure. manifold = spherefactory(n); problem.M = manifold; % Define the problem cost function and its gradient. problem.cost = @(x) -x'*(A*x); problem.egrad = @(x) -2*A*x; problem.ehess = @(x, xdot) -2*A*xdot; % Numerically check gradient and Hessian consistency. figure; checkgradient(problem); figure; checkhessian(problem); % Solve. [x, xcost, info] = trustregions(problem); %#ok % Display some statistics. figure; semilogy([info.iter], [info.gradnorm], '.-'); xlabel('Iteration #'); ylabel('Gradient norm'); title('Convergence of the trust-regions algorithm on the sphere'); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/PCA_stochastic.m ================================================ function [X, A] = PCA_stochastic(A, k) % Example of stochastic gradient algorithm in Manopt on a PCA problem. % % PCA (principal component analysis) on a dataset A of size nxd consists % in solving % % minimize_X f(X) = -.5*norm(A*X, 'fro')^2 / n, % % where X is a matrix of dimension dxk with orthonormal columns. This % is equivalent to finding k dominant singular vectors of A, or k top % eigenvectors of A'*A. % % If n is large, this computation can be expensive. Thus, stochastic % gradient algorithms take the point of view that f(X) is a sum of many (n) % terms: each term involves only one of the n rows of A. % % To make progress, it may be sufficient to optimize with respect to a % subset of the terms at each iteration. This way, each individual % iteration can be very cheap. In particular, individual operations have % cost independent of n, because f or its gradient need never be evaluated % completely (or at all in the case of f.) % % Stochastic gradient algorithms (this implementation in particular) are % sensitive to proper parameter tuning. See in code. % This file is part of Manopt and is copyrighted. See the license file. % % Main author: Bamdev Mishra and Nicolas Boumal, Sept. 6, 2017 % Contributors: % % Change log: % % If none is given, generate a random data set: n samples in R^d if ~exist('A', 'var') || isempty(A) d = 1000; n = 100000; fprintf('Generating data...'); A = randn(n, d)*diag([[15 10 5], ones(1, d-3)]); fprintf(' done (size: %d x %d).\n', size(A)); else [n, d] = size(A); end % Pick a number of component to compute if ~exist('k', 'var') || isempty(k) k = 3; end % We are looking for k orthonormal vectors in R^d: Stiefel manifold. problem.M = stiefelfactory(d, k); % The cost function to minimize is a sum of n terms. This parameter % must be set for stochastic algorithms. problem.ncostterms = n; % We do not need to specify how to compute the value of the cost % function (stochastic algorithms never use this). All we need is to % specify how to compute the gradient of the cost function, where the % sum is restricted to a subset of the terms (a sample). Notice that we % specify a partial Euclidean gradient (hence the 'e' in partialegrad). % This way, Manopt will automatically convert the Euclidean vector into % a proper Riemannian partial gradient, in the tangent space at X. % In particular, if sample = 1:n, then the partial gradient corresponds % to the actual (complete) gradient. problem.partialegrad = @partialegrad; function G = partialegrad(X, sample) % X is an orthonormal matrix of size dxk % sample is a vector if indices between 1 and n: a subset % Extract a subset of the dataset Asample = A(sample, :); % Compute the gradient of f restricted to that sample G = -Asample'*(Asample*X); G = G / n; end % If one wants to use checkgradient to verify one's work, then it is % necessary to specify the cost function as well, as below. % problem.cost = @(X) -.5*norm(A*X, 'fro')^2 / n; % checkgradient(problem); pause; % To have the solver record statistics every x iterations, set % options.checkperiod to x. This will record simple quantities which % are almost free to compute (namely, elapsed time and step size of the % last step.) To record more sophisticated quantities, you can use % options.statsfun as usual. Time spent computing these statistics is % not counted in times reported in the info structure returned by the % solver. options.checkperiod = 10; options.statsfun = statsfunhelper('metric', @(X) norm(A*X, 'fro')); % Set the parameters for the solver: stochastic gradient algorithms % tend to be quite sensitive to proper tuning, especially regarding % step size selection. See the solver's documentation for details. options.maxiter = 200; options.batchsize = 10; % options.stepsize_type = 'decay'; options.stepsize_init = 1e2; options.stepsize_lambda = 1e-3; options.verbosity = 2; % Run the solver [X, info] = stochasticgradient(problem, [], options); % Plot the special metric recorded by options.statsfun plot([info.iter], [info.metric], '.-'); xlabel('Iteration #'); ylabel('Frobenius norm of A*X'); title('Convergence of stochasticgradient on stiefelfactory for PCA'); % Add to that plot a reference: the globally optimal value attained if % the true dominant singular vectors are computed. fprintf('Running svds... '); t = tic(); [V, ~] = svds(A', k); fprintf('done: %g [s] (note: svd may be faster)\n', toc(t)); hold all; bound = norm(A*V, 'fro'); plot([info.iter], bound*ones(size([info.iter])), '--'); hold off; legend('Algorithm', 'SVD bound', 'Location', 'SouthEast'); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/dominant_invariant_subspace.m ================================================ function [X, info] = dominant_invariant_subspace(A, p) % Returns an orthonormal basis of the dominant invariant p-subspace of A. % % function X = dominant_invariant_subspace(A, p) % % Input: A real, symmetric matrix A of size nxn and an integer p < n. % Output: A real, orthonormal matrix X of size nxp such that trace(X'*A*X) % is maximized. That is, the columns of X form an orthonormal basis % of a dominant subspace of dimension p of A. These are thus % eigenvectors associated with the largest eigenvalues of A (in no % particular order). Sign is important: 2 is deemed a larger % eigenvalue than -5. % % The optimization is performed on the Grassmann manifold, since only the % space spanned by the columns of X matters. The implementation is short to % show how Manopt can be used to quickly obtain a prototype. To make the % implementation more efficient, one might first try to use the caching % system, that is, use the optional 'store' arguments in the cost, grad and % hess functions. Furthermore, using egrad2rgrad and ehess2rhess is quick % and easy, but not always efficient. Having a look at the formulas % implemented in these functions can help rewrite the code without them, % possibly more efficiently. % % See also: dominant_invariant_subspace_complex % This file is part of Manopt and is copyrighted. See the license file. % % Main author: Nicolas Boumal, July 5, 2013 % Contributors: % % Change log: % % NB Dec. 6, 2013: % We specify a max and initial trust region radius in the options. % Generate some random data to test the function if ~exist('A', 'var') || isempty(A) A = randn(128); A = (A+A')/2; end if ~exist('p', 'var') || isempty(p) p = 3; end % Make sure the input matrix is square and symmetric n = size(A, 1); assert(isreal(A), 'A must be real.') assert(size(A, 2) == n, 'A must be square.'); assert(norm(A-A', 'fro') < n*eps, 'A must be symmetric.'); assert(p<=n, 'p must be smaller than n.'); % Define the cost and its derivatives on the Grassmann manifold Gr = grassmannfactory(n, p); problem.M = Gr; problem.cost = @(X) -trace(X'*A*X); problem.grad = @(X) -2*Gr.egrad2rgrad(X, A*X); problem.hess = @(X, H) -2*Gr.ehess2rhess(X, A*X, A*H, H); % Execute some checks on the derivatives for early debugging. % These can be commented out. % checkgradient(problem); % pause; % checkhessian(problem); % pause; % Issue a call to a solver. A random initial guess will be chosen and % default options are selected except for the ones we specify here. options.Delta_bar = 8*sqrt(p); [X, costX, info, options] = trustregions(problem, [], options); %#ok fprintf('Options used:\n'); disp(options); % For our information, Manopt can also compute the spectrum of the % Riemannian Hessian on the tangent space at (any) X. Computing the % spectrum at the solution gives us some idea of the conditioning of % the problem. If we were to implement a preconditioner for the % Hessian, this would also inform us on its performance. % % Notice that (typically) all eigenvalues of the Hessian at the % solution are positive, i.e., we find an isolated minimizer. If we % replace the Grassmann manifold by the Stiefel manifold, hence still % optimizing over orthonormal matrices but ignoring the invariance % cost(XQ) = cost(X) for all Q orthogonal, then we see % dim O(p) = p(p-1)/2 zero eigenvalues in the Hessian spectrum, making % the optimizer not isolated anymore. if Gr.dim() < 512 evs = hessianspectrum(problem, X); stairs(sort(evs)); title(['Eigenvalues of the Hessian of the cost function ' ... 'at the solution']); xlabel('Eigenvalue number (sorted)'); ylabel('Value of the eigenvalue'); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/dominant_invariant_subspace_complex.m ================================================ function [X, info] = dominant_invariant_subspace_complex(A, p) % Returns a unitary basis of the dominant invariant p-subspace of A. % % function X = dominant_invariant_subspace(A, p) % % Input: A complex, Hermitian matrix A of size nxn and an integer p < n. % Output: A complex, unitary matrix X of size nxp such that trace(X'*A*X) % is maximized. That is, the columns of X form a unitary basis % of a dominant subspace of dimension p of A. % % The optimization is performed on the complex Grassmann manifold, since % only the space spanned by the columns of X matters. % % See dominant_invariant_subspace for more details in the real case. % % See also: dominant_invariant_subspace grassmanncomplexfactory % This file is part of Manopt and is copyrighted. See the license file. % % Main author: Nicolas Boumal, June 30, 2015 % Contributors: % % Change log: % Generate some random data to test the function if ~exist('A', 'var') || isempty(A) A = randn(128) + 1i*randn(128); A = (A+A')/2; end if ~exist('p', 'var') || isempty(p) p = 3; end % Make sure the input matrix is Hermitian n = size(A, 1); assert(size(A, 2) == n, 'A must be square.'); assert(norm(A-A', 'fro') < n*eps, 'A must be Hermitian.'); assert(p<=n, 'p must be smaller than n.'); % Define the cost and its derivatives on the complex Grassmann manifold Gr = grassmanncomplexfactory(n, p); problem.M = Gr; problem.cost = @(X) -real(trace(X'*A*X)); problem.egrad = @(X) -2*A*X; problem.ehess = @(X, H) -2*A*H; % Execute some checks on the derivatives for early debugging. % These can be commented out. % checkgradient(problem); % pause; % checkhessian(problem); % pause; % Issue a call to a solver. A random initial guess will be chosen and % default options are selected except for the ones we specify here. options.Delta_bar = 8*sqrt(p); [X, costX, info, options] = trustregions(problem, [], options); %#ok fprintf('Options used:\n'); disp(options); % For our information, Manopt can also compute the spectrum of the % Riemannian Hessian on the tangent space at (any) X. Computing the % spectrum at the solution gives us some idea of the conditioning of % the problem. If we were to implement a preconditioner for the % Hessian, this would also inform us on its performance. % % Notice that (typically) all eigenvalues of the Hessian at the % solution are positive, i.e., we find an isolated minimizer. If we % replace the Grassmann manifold by the Stiefel manifold, hence still % optimizing over orthonormal matrices but ignoring the invariance % cost(XQ) = cost(X) for all Q orthogonal, then we see % dim O(p) = p(p-1)/2 zero eigenvalues in the Hessian spectrum, making % the optimizer not isolated anymore. if Gr.dim() < 512 evs = hessianspectrum(problem, X); stairs(sort(evs)); title(['Eigenvalues of the Hessian of the cost function ' ... 'at the solution']); xlabel('Eigenvalue number (sorted)'); ylabel('Value of the eigenvalue'); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/elliptope_SDP.m ================================================ function [Y, problem, S] = elliptope_SDP(A, p, Y0) % Solver for semidefinite programs (SDP's) with unit diagonal constraints. % % function [Y, problem, S] = elliptope_SDP(A) % function [Y, problem, S] = elliptope_SDP(A, p) % function [Y, problem, S] = elliptope_SDP(A, p, Y0) % % A is a real, symmetric matrix of size n. % % This function uses a local optimization method in Manopt to solve the SDP % % min_X trace(A*X) s.t. diag(X) = 1 and X is positive semidefinite. % % In practice, the symmetric matrix X of size n is parameterized % as X = Y*Y', where Y has size n x p. By default, p is taken large enough % (about sqrt(2n)) to ensure that there exists an optimal X whose rank is % smaller than p. This ensures that the SDP is equivalent to the new % problem in Y: % % min_Y trace(Y'*A*Y) s.t. diag(Y*Y') = 1. % % The constraints on Y require each row of Y to have unit norm, which is % why Manopt is appropriate software to solve this problem. An optional % initial guess can be specified via the input Y0. % % See the paper below for theory, specifically, for a proof that, for % almost all A, second-order critical points of the problem in Y are % globally optimal. In other words: there are no local traps in Y, despite % non-convexity. % % Outputs: % % Y: is the best point found (an nxp matrix with unit norm rows.) % To find X, form Y*Y' (or, more efficiently, study X through Y.) % % problem: is the Manopt problem structure used to produce Y. % % S: is a dual optimality certificate (a symmetric matrix of size n, % sparse if A is sparse). The optimality gap (in the cost % function) is at most n*min(eig(S)), for both Y and X = Y*Y'. % Hence, if min(eig(S)) is close to zero, Y is close to globally % optimal. This can be computed via eigs(S, 1, 'SR'). % % Paper: https://arxiv.org/abs/1606.04970 % % @inproceedings{boumal2016bmapproach, % author = {Boumal, N. and Voroninski, V. and Bandeira, A.S.}, % title = {The non-convex {B}urer-{M}onteiro approach works on smooth semidefinite programs}, % booktitle={Neural Information Processing Systems (NIPS 2016)}, % year = {2016} % } % % See also: maxcut elliptope_SDP_complex % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, June 28, 2016 % Contributors: % Change log: % If no inputs are provided, since this is an example file, generate % a random Erdos-Renyi graph. This is for illustration purposes only. if ~exist('A', 'var') || isempty(A) n = 100; A = triu(rand(n) <= .1, 1); A = (A+A.')/(2*n); end n = size(A, 1); assert(n >= 2, 'A must be at least 2x2.'); assert(isreal(A), 'A must be real.'); assert(size(A, 2) == n, 'A must be square.'); % Force A to be symmetric A = (A+A.')/2; % By default, pick a sufficiently large p (number of columns of Y). if ~exist('p', 'var') || isempty(p) p = ceil(sqrt(8*n+1)/2); end assert(p >= 2 && p == round(p), 'p must be an integer >= 2.'); % Pick the manifold of n-by-p matrices with unit norm rows. manifold = obliquefactory(p, n, true); problem.M = manifold; % These three, quick commented lines of code are sufficient to define % the cost function and its derivatives. This is good code to write % when prototyping. Below, a more advanced use of Manopt is shown, % where the redundant computation A*Y is avoided between the gradient % and the cost evaluation. % % problem.cost = @(Y) .5*sum(sum((A*Y).*Y)); % % problem.egrad = @(Y) A*Y; % % problem.ehess = @(Y, Ydot) A*Ydot; % Products with A dominate the cost, hence we store the result. % This allows to share the results among cost, grad and hess. % This is completely optional. function store = prepare(Y, store) if ~isfield(store, 'AY') AY = A*Y; store.AY = AY; store.diagAYYt = sum(AY .* Y, 2); end end % Define the cost function to be /minimized/. problem.cost = @cost; function [f, store] = cost(Y, store) store = prepare(Y, store); f = .5*sum(store.diagAYYt); end % Define the Riemannian gradient. problem.grad = @grad; function [G, store] = grad(Y, store) store = prepare(Y, store); G = store.AY - bsxfun(@times, Y, store.diagAYYt); end % If you want to, you can specify the Riemannian Hessian as well. problem.hess = @hess; function [H, store] = hess(Y, Ydot, store) store = prepare(Y, store); SYdot = A*Ydot - bsxfun(@times, Ydot, store.diagAYYt); H = manifold.proj(Y, SYdot); end % If no initial guess is available, tell Manopt to use a random one. if ~exist('Y0', 'var') || isempty(Y0) Y0 = []; end % Call your favorite solver. opts = struct(); opts.verbosity = 0; % Set to 0 for no output, 2 for normal output opts.maxinner = 500; % maximum Hessian calls per iteration opts.tolgradnorm = 1e-6; % tolerance on gradient norm Y = trustregions(problem, Y0, opts); % If required, produce an optimality certificate. if nargout >= 3 S = A - spdiags(sum((A*Y).*Y, 2), 0, n, n); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/elliptope_SDP_complex.m ================================================ function [Y, problem, S] = elliptope_SDP_complex(A, p, Y0) % Solver for complex semidefinite programs (SDP's) with unit diagonal. % % function [Y, problem, S] = elliptope_SDP_complex(A) % function [Y, problem, S] = elliptope_SDP_complex(A, p) % function [Y, problem, S] = elliptope_SDP_complex(A, p, Y0) % % A is a Hermitian matrix of size n. % % This function uses a local optimization method in Manopt to solve the SDP % % min_X trace(A*X) s.t. diag(X) = 1, X is complex, positive semidefinite. % % In practice, the Hermitian matrix X of size n is parameterized as % X = Y*Y', where Y has size n x p. By default, p is taken large enough % (that is, sqrt(n)) to ensure that there exists an optimal X whose rank is % smaller than p. This ensures that the SDP is equivalent to the new % problem in Y: % % min_Y trace(Y'*A*Y) s.t. diag(Y*Y') = 1, Y complex % % The constraints on Y require each row of Y to have unit norm, which is % why Manopt is appropriate software to solve this problem. An optional % initial guess can be specified via the input Y0. % % See the paper below for theory, specifically, for a proof that, for % almost all A, second-order critical points of the problem in Y are % globally optimal. In other words: there are no local traps in Y, despite % non-convexity. % % Outputs: % % Y: is the best point found (an nxp matrix with unit norm rows.) % To find X, form Y*Y' (or, more efficiently, study X through Y.) % % problem: is the Manopt problem structure used to produce Y. % % S: is a dual optimality certificate (a Hermitian matrix of size n, % sparse if A is sparse). The optimality gap (in the cost % function) is at most n*min(eig(S)), for both Y and X = Y*Y'. % Hence, if min(eig(S)) is close to zero, Y is close to globally % optimal. This can be computed via eigs(S, 1, 'SR'). % % Paper: https://arxiv.org/abs/1606.04970 % % @inproceedings{boumal2016bmapproach, % author = {Boumal, N. and Voroninski, V. and Bandeira, A.S.}, % title = {The non-convex {B}urer-{M}onteiro approach works on smooth semidefinite programs}, % booktitle={Neural Information Processing Systems (NIPS 2016)}, % year = {2016} % } % % See also: maxcut elliptope_SDP % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Oct. 21, 2016 % Contributors: % Change log: % If no inputs are provided, since this is an example file, generate % a random complex matrix. This is for illustration purposes only. if ~exist('A', 'var') || isempty(A) n = 100; A = randn(n) + 1i*randn(n); A = (A+A')/sqrt(2*n); end n = size(A, 1); assert(n >= 2, 'A must be at least 2x2.'); assert(size(A, 2) == n, 'A must be square.'); % Force A to be Hermitian A = (A+A')/2; % By default, pick a sufficiently large p (number of columns of Y). if ~exist('p', 'var') || isempty(p) p = floor(sqrt(n)+1); end assert(p >= 1 && p == round(p), 'p must be an integer >= 1.'); % Pick the manifold of complex n-by-p matrices with unit norm rows. manifold = obliquecomplexfactory(p, n, true); problem.M = manifold; % These three, quick commented lines of code are sufficient to define % the cost function and its derivatives. This is good code to write % when prototyping. Below, a more advanced use of Manopt is shown, % where the redundant computation A*Y is avoided between the gradient % and the cost evaluation. % % problem.cost = @(Y) .5*sum(sum(real((A*Y).*conj(Y)))); % % problem.egrad = @(Y) A*Y; % % problem.ehess = @(Y, Ydot) A*Ydot; % Products with A dominate the cost, hence we store the result. % This allows to share the results among cost, grad and hess. % This is completely optional. function store = prepare(Y, store) if ~isfield(store, 'AY') AY = A*Y; store.AY = AY; store.diagAYYt = sum(real(AY .* conj(Y)), 2); end end % Define the cost function to be /minimized/. problem.cost = @cost; function [f, store] = cost(Y, store) store = prepare(Y, store); f = .5*sum(store.diagAYYt); end % Define the Riemannian gradient. problem.grad = @grad; function [G, store] = grad(Y, store) store = prepare(Y, store); G = store.AY - bsxfun(@times, Y, store.diagAYYt); end % If you want to, you can specify the Riemannian Hessian as well. problem.hess = @hess; function [H, store] = hess(Y, Ydot, store) store = prepare(Y, store); SYdot = A*Ydot - bsxfun(@times, Ydot, store.diagAYYt); H = manifold.proj(Y, SYdot); end % If no initial guess is available, tell Manopt to use a random one. if ~exist('Y0', 'var') || isempty(Y0) Y0 = []; end % Call your favorite solver. opts = struct(); opts.verbosity = 0; % Set to 0 for no output, 2 for normal output opts.maxinner = 500; % maximum Hessian calls per iteration opts.tolgradnorm = 1e-6; % tolerance on gradient norm Y = trustregions(problem, Y0, opts); % If required, produce an optimality certificate. if nargout >= 3 S = A - spdiags(sum(real((A*Y).*conj(Y)), 2), 0, n, n); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/essential_svd.m ================================================ function essential_svd % Sample solution of an optimization problem on the essential manifold. % % Solves the problem \sum_{i=1}^N ||E_i-A_i||^2, where E_i are essential % matrices. Essential matrices are used in computer vision to represent the % epipolar constraint between projected points in two perspective views. % % Note: the essentialfactory file uses a quotient R1/R2 representation to % work with essential matrices. On the other hand, from a user point of % view, it is convenient to use the E representation (a matrix of size % 3-by-3) to give cost, gradient, and Hessian information. To this end, we % provide auxiliary files essential_costE2cost, essential_egradE2egrad, and % essential_ehessE2ehess that convert these ingredients to their R1/R2 % counterparts. % % See also: essentialfactory essential_costE2cost essential_egradE2egrad % essential_ehessE2ehess % This file is part of Manopt: www.manopt.org. % Original author: Roberto Tron, Aug. 8, 2014 % Contributors: Bamdev Mishra, May 15, 2015. % Make data for the test N = 2; % Number of matrices to process in parallel. A = multiprod(multiprod(randrot(3, N), essential_hat3([0; 0; 1])), randrot(3, N)); % The essential manifold M = essentialfactory(N); problem.M = M; % Function handles of the essential matrix E and Euclidean gradient and Hessian costE = @(E) 0.5*sum(multisqnorm(E-A)); egradE = @(E) E - A; ehessE = @(E, U) U; % Manopt descriptions problem.cost = @cost; function val = cost(X) val = essential_costE2cost(X, costE); % Cost end problem.egrad = @egrad; function g = egrad(X) g = essential_egradE2egrad(X, egradE); % Converts gradient in E to X. end problem.ehess = @ehess; function gdot = ehess(X, S) gdot = essential_ehessE2ehess(X, egradE, ehessE, S); % Converts Hessian in E to X. end % Numerically check the differentials. % checkgradient(problem); pause; % checkhessian(problem); pause; %Solve the problem Xsol = trustregions(problem); % Distance between original matrices and decompositions val = essential_costE2cost(Xsol, costE); fprintf('Distance between original matrices and decompositions is %e \n', val); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/generalized_eigenvalue_computation.m ================================================ function [Xsol, Ssol] = generalized_eigenvalue_computation(A, B, p) % Returns orthonormal basis of the dominant invariant p-subspace of B^-1 A. % % function [Xsol, Ssol] = generalized_eigenvalue_computation(A, B, p) % % Input: A is a real, symmetric matrix of size nxn, % B is a symmetric positive definite matrix, same size as A % p is an integer such that p <= n. % % Output: Xsol: a real, B-orthonormal matrix X of size nxp such that % trace(X'*A*X) is maximized, subject to X'*B*X = identity. % That is, the columns of X form a B-orthonormal basis of a % dominant subspace of dimension p of B^(-1)*A. These are thus % generalized eigenvectors associated with the largest generalized % eigenvalues of B^(-1)*A (in no particular order). Sign is % important: 2 is deemed a larger eigenvalue than -5. % Ssol: the eigenvalues associated with the eigenvectors Xsol, in a % vector. % % We intend to solve the homogeneous system A*X = B*X*S, % where S is a diagonal matrix of dominant eigenvalues of B^-1 A. % % % The optimization is performed on the generalized Grassmann manifold, % since only the space spanned by the columns of X matters in the % optimization problem. % % The optimization problem that we are solving here is % maximize trace(X'*A*X) subject to X'*B*X = eye(p). % Consequently, the solutions remain invariant to transformation % X --> XQ, where Q is a p-by-p orthogonal matrix. The search space, in % essence, is set of equivalence classes % [X] = {XQ : X'*B*X = I and Q is orthogonal matrix}. This space is called % the generalized Grassmann manifold. % Before returning, Q is chosen such that Xsol = Xq matches the output one % would expect from eigs. % % See also dominant_invariant_subspace nonlinear_eigenspace % This file is part of Manopt and is copyrighted. See the license file. % % Main author: Bamdev Mishra, June 30, 2015. % Contributors: % Change log: % % Aug. 10, 2016 (NB): the eigenvectors Xsol are now rotated by Vsol % before they are returned, to ensure the output matches what you would % normally expect calling eigs. % Generate some random data to test the function if ~exist('A', 'var') || isempty(A) n = 128; A = randn(n); A = (A+A')/2; end if ~exist('B', 'var') || isempty(B) n = size(A, 1); e = ones(n, 1); B = spdiags([-e 2*e -e], -1:1, n, n); % Symmetric positive definite end if ~exist('p', 'var') || isempty(p) p = 3; end % Make sure the input matrix is square and symmetric n = size(A, 1); assert(isreal(A), 'A must be real.') assert(size(A, 2) == n, 'A must be square.'); assert(norm(A-A', 'fro') < n*eps, 'A must be symmetric.'); assert(p <= n, 'p must be smaller than n.'); % Define the cost and its derivatives on the generalized % Grassmann manifold, i.e., the column space of all X such that % X'*B*X is identity. gGr = grassmanngeneralizedfactory(n, p, B); problem.M = gGr; problem.cost = @(X) -trace(X'*A*X); problem.egrad = @(X) -2*(A*X); % Only Euclidean gradient needed. problem.ehess = @(X, H) -2*(A*H); % Only Euclidean Hessian needed. % Execute some checks on the derivatives for early debugging. % These things can be commented out of course. % checkgradient(problem); % pause; % checkhessian(problem); % pause; % Issue a call to a solver. A random initial guess will be chosen and % default options are selected except for the ones we specify here. options.Delta_bar = 8*sqrt(p); options.tolgradnorm = 1e-7; options.verbosity = 2; % set to 0 to silence the solver, 2 for normal output. [Xsol, costXsol, info] = trustregions(problem, [], options); %#ok % To extract the eigenvalues, solve the small p-by-p symmetric % eigenvalue problem. [Vsol, Dsol] = eig(Xsol'*(A*Xsol)); Ssol = diag(Dsol); % To extract the eigenvectors, rotate Xsol by the p-by-p orthogonal % matrix Vsol. Xsol = Xsol*Vsol; % This quantity should be small. % norm(A*Xsol - B*Xsol*diag(Ssol)); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/generalized_procrustes.m ================================================ function [A, R] = generalized_procrustes(A_measure) % Rotationally align clouds of points (generalized Procrustes problem) % % function X = generalized_procrustes(A_measure) % % The input is a 3D matrix A_measure of size nxmxN. Each of the N slices % A_measure(:, :, i) is a cloud of m points in R^n. These clouds are % assumed to be (noisy) rotated versions of a reference cloud Atrue. % This algorithm tries to find the optimal rotations to apply to the % individual clouds such that they will match each other as much as % possible following a least-squares cost. % % The output A is an estimate of the cloud Atrue (up to rotation). The % output R is a 3D matrix of size nxnxN containing the rotation matrices % such that R(:, :, i) * A is approximately equal to A_measure(:, :, i). % This file is part of Manopt and is copyrighted. See the license file. % % Main author: Nicolas Boumal, July 8, 2013 % Contributors: % % Change log: % if ~exist('A_measure', 'var') % Generate random data to test the method. % There are N clouds of m points in R^n. Each of them is a noisy, % rotated version of a reference cloud A. Rotations are uniformly % random and noise on each rotated cloud is iid normal with % standard deviation sigma. n = 3; m = 10; N = 50; % The reference cloud Atrue = randn(n, m); % A 3D matrix containing the N measured clouds sigma = .3; A_measure = multiprod(randrot(n, N), Atrue) + sigma*randn(n, m, N); else [n, m, N] = size(A_measure); end % Construct a manifold structure representing the product of groups of % rotations with the Euclidean space for A. We optimize simultaneously % for the reference cloud and for the rotations that affect each of the % measured clouds. Notice that there is a group invariance because % there is no way of telling which orientation the reference cloud % should be in. tuple.R = rotationsfactory(n, N); tuple.A = euclideanfactory(n, m); M = productmanifold(tuple); % Define the cost function here. Points on the manifold M are % structures with fields X.A and X.R, containing matrices of sizes % respectively nxm and nxnxN. The store structure (the caching system) % is used to keep the residue matrix E in memory, as it is also used in % the computation of the gradient and of the Hessian. This way, we % prevent redundant computations. function [f, store] = cost(X, store) if ~isfield(store, 'E') R = X.R; A = X.A; store.E = multiprod(R, A) - A_measure; end E = store.E; f = (E(:)'*E(:))/(2*N); end % Riemannian gradient of the cost function. function [g, store] = grad(X, store) R = X.R; A = X.A; if ~isfield(store, 'E') [~, store] = cost(X, store); end E = store.E; % Compute the Euclidean gradient of the cost wrt the rotations R % and wrt the cloud A, egrad.R = multiprod(E, A'/N); egrad.A = A - mean(multiprod(multitransp(R), A_measure), 3); % then transform this Euclidean gradient into the Riemannian % gradient. g = M.egrad2rgrad(X, egrad); store.egrad = egrad; end % It is not necessary to define the Hessian of the cost. We do it % mostly to illustrate how to do it and to study the spectrum of the % Hessian at the solution (see further down). function [h, store] = hess(X, Xdot, store) R = X.R; A = X.A; % Careful: tangent vectors on the rotation group are represented as % skew symmetric matrices. To obtain the corresponding vectors in % the ambient space, we need a little transformation. This % transformation is typically not needed when we compute the % formulas for the gradient and the Hessian directly in Riemannian % form instead of resorting the egrad2rgrad and ehess2rhess. These % latter tools are convenient for prototyping but are not always % the most efficient form to execute the computations. Rdot = tuple.R.tangent2ambient(R, Xdot.R); Adot = Xdot.A; if ~isfield(store, 'egrad') [~, store] = grad(X, store); end E = store.E; egrad = store.egrad; ehess.R = multiprod(multiprod(Rdot, A) + multiprod(R, Adot), A') + ... multiprod(E, Adot'); ehess.R = ehess.R / N; ehess.A = Adot-mean(multiprod(multitransp(Rdot), A_measure), 3); h = M.ehess2rhess(X, egrad, ehess, Xdot); end % Setup the problem structure with manifold M and cost+grad functions. problem.M = M; problem.cost = @cost; problem.grad = @grad; problem.hess = @hess; % For debugging, it's always nice to check the gradient a few times. % checkgradient(problem); % pause; % checkhessian(problem); % pause; % Call a solver on our problem. This can probably be much improved if a % clever initial guess is used instead of a random one. X = trustregions(problem); A = X.A; R = X.R; % To evaluate the performance of the algorithm, see how well Atrue (the % reference cloud) matches A (the found cloud). Since the recovery is % up to rotation, apply Kabsch algorithm (or standard Procrustes), % i.e., compute the polar factorization to best align Atrue and A. if exist('Atrue', 'var') [U, ~, V] = svd(Atrue*A'); Ahat = (U*V')*A; fprintf('Registration error: %g.\n', norm(Atrue-Ahat, 'fro')); end % Plot the spectrum of the Hessian at the solution found. % Notice that the invariance of f under a rotation yields dim SO(n), % that is, n*(n-1)/2 zero eigenvalues in the Hessian spectrum at the % solution. This indicates that critical points are not isolated and % can theoretically prevent quadratic convergence. One solution to % circumvent this would be to fix one rotation arbitrarily. Another % solution would be to work on a quotient manifold. Both can be % achieved in Manopt: they simply require a little more work on the % manifold description side. if M.dim() <= 512 stairs(sort(hessianspectrum(problem, X))); title('Spectrum of the Hessian at the solution found.'); xlabel('Eigenvalue number (sorted)'); ylabel('Value of the eigenvalue'); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/low_rank_dist_completion.m ================================================ function [Y, infos, problem_description] = low_rank_dist_completion(problem_description) % Perform low-rank distance matrix completion w/ automatic rank detection. % % function Y = low_rank_dist_completion(problem_description) % function [Y, infos, out_problem_description] = low_rank_dist_completion(problem_description) % % It implements the ideas of Journee, Bach, Absil and Sepulchre, SIOPT, 2010, % applied to the problem of low-rank Euclidean distance matrix completion. % The details are in the paper "Low-rank optimization for distance matrix completion", % B. Mishra, G. Meyer, and R. Sepulchre, IEEE CDC, 2011. % % Paper link: http://arxiv.org/abs/1304.6663. % % Input: % ------- % % problem_description: The problem structure with the description of the problem. % % % - problem_description.data_train: Data structure for known distances that are used to learn a low-rank model. % It contains the 3 fields that are shown % below. An empty "data_train" structure % will generate the 3d Helix instance. % % -- data_train.entries: A column vector consisting of known % distances. An empty "data_train.entries" % field will generate the 3d Helix % instance. % % -- data_train.rows: The row position of th corresponding % distances. An empty "data_train.rows" % field will generate the 3d Helix % instance. % % -- data_train.cols: The column position of th corresponding % distances. An empty "data_train.cols" % field will generate the 3d Helix % instance. % % % % - problem_description.data_test: Data structure to compute distances for the "unknown" (to the algorithm) distances. % It contains the 3 fields that are shown % below. An empty "data_test" structure % will not compute the test error. % % -- data_test.entries: A column vector consisting of "unknown" (to the algorithm) % distances. An empty "data_test.entries" % field will not compute the test error. % -- data_test.rows: The row position of th corresponding % distances. An empty "data_test.rows" % field will not compute the test error. % -- data_test.cols: The column position of th corresponding % distances. An empty "data_test.cols" % field will not compute the test error. % % % % - problem_description.n: The number of data points. An empty % "n", but complete "data_train" structure % will lead to an error, to avoid % potential data inconsistency. % % % % % % - problem_description.rank_initial: Starting rank. By default, it is 1. % % % % - problem_description.rank_max: Maximum rank. By default, it is equal to % "problem_description.n". % % % % % - problem_description.params: Structure array containing algorithm % parameters for stopping criteria. % -- params.abstolcost: Tolerance on absolute value of cost. % By default, it is 1e-3. % % % -- params.reltolcost: Tolerance on absolute value of cost. % By default, it is 1e-3. % -- params.tolgradnorm: Tolerance on the norm of the gradient. % By default, it is 1e-5. % -- params.maxiter: Maximum number of fixe-rank iterations. % By default, it is 100. % -- params.tolSmin: Tolerance on smallest eigenvalue of Sy, % the dual variable. % By default, it is 1e-5. % -- params.tolrankdeficiency: Tolerance on the % smallest singular value of Y. % By default, it is 1e-3. % -- params.solver: Fixed-rank algorithm. Options are % '@trustregions' for trust-regions, % '@conjugategradient' for conjugate gradients, % '@steepestdescent' for steepest descent. % By default, it is '@trustregions'. % % % Output: % -------- % % Y: n-by-r solution matrix of rank r. % infos: Structure array with computed statistics. % problem_description: Structure array with used problem description. % % % % Please cite the Manopt paper as well as the research paper: % @InProceedings{mishra2011dist, % Title = {Low-rank optimization for distance matrix completion}, % Author = {Mishra, B. and Meyer, G. and Sepulchre, R.}, % Booktitle = {{50th IEEE Conference on Decision and Control}}, % Year = {2011}, % Organization = {{IEEE CDC}} % } % This file is part of Manopt: www.manopt.org. % Original author: Bamdev Mishra, April 06, 2015. % Contributors: Nicolas Boumal. % Change log: % August 30 2016 (BM): % Corrected some logic flaws while plotting and storing % rank information. A typo was also corrected. % Check problem description if ~exist('problem_description', 'var') problem_description = struct(); end problem_description = check_problem_description(problem_description); % Check the problem description; % Common quantities data_train = problem_description.data_train; data_test = problem_description.data_test; n = problem_description.n; rank_initial = problem_description.rank_initial; rank_max = problem_description.rank_max; params = problem_description.params; N = data_train.nentries; % Number of known distances EIJ = speye(n); EIJ = EIJ(:, data_train.rows) - EIJ(:, data_train.cols); rr = rank_initial; % Starting rank. Y = randn(n, rr); % Random starting initialization. % Information time = []; % Time for each iteration per rank cost = []; % Cost at each iteration per rank test_error = []; % Test error at each iteration per rank rank = []; % Rank at each iteration rank_change_stats = []; % Some stats relating the change of ranks % Main loop rank search rank_search = 0; while (rr <= rank_max), % When r = n a global min is attained for sure. rank_search = rank_search + 1; fprintf('>> Rank %d <<\n', rr); % Follow the descent direction to compute an iterate in a higher dimension if (rr > rank_initial), if isempty(restartDir), % If no restart dir avail. do a random restart disp('No restart dir available, random restart is performed'); Y = randn(n, rr); else % Perform a simple line-search based on the restart direction disp('>> Line-search with restart direction'); Y(:, rr) = 0; % Append a column of zeroes Z = Y(data_train.rows, :) - Y(data_train.cols,:); estimDists = sum(Z.^2, 2); errors = (estimDists - data_train.entries); costBefore = 0.5*mean(errors.^2); fprintf('>> Cost before = %f\n',costBefore); % Simple linesearch to maintain monotonicity problem.M = symfixedrankYYfactory(n, rr); problem.cost = @(Y) cost_evaluation(Y, data_train); d = zeros(size(Y)); d(:, rr) = restartDir; [unused, Y] = linesearch_decrease(problem, Y, d, costBefore); %#ok Z = Y(data_train.rows, :) - Y(data_train.cols,:); estimDists = sum(Z.^2, 2); errors = (estimDists - data_train.entries); costAfter = 0.5*mean(errors.^2); % Check for decrease if costAfter >= costBefore - 1e-8 disp('Decrease is not sufficient, random restart'); Y = randn(n, rr); end end end % Fixed-rank optimization with Manopt [Y, infos_fixedrank] = low_rank_dist_completion_fixedrank(data_train, data_test, Y, params); % Some info logging thistime = [infos_fixedrank.time]; if ~isempty(time) thistime = time(end) + thistime; end time = [time thistime]; %#ok cost = [cost [infos_fixedrank.cost]]; %#ok rank = [rank [infos_fixedrank.rank]]; %#ok rank_change_stats(rank_search).rank = rr; %#ok rank_change_stats(rank_search).iter = length([infos_fixedrank.cost]); %#ok rank_change_stats(rank_search).Y = Y; %#ok if isfield(infos_fixedrank, 'test_error') test_error = [test_error [infos_fixedrank.test_error]]; %#ok end % Evaluate gradient of the convex cost function (i.e. wrt X). Z = Y(data_train.rows, :) - Y(data_train.cols,:); estimDists = sum(Z.^2,2); errors = (estimDists - data_train.entries); % Dual variable and its minimum eigenvalue that is used to guarantee convergence. Sy = (0.5)*EIJ * sparse(1:N,1:N,2 * errors / N,N,N) * EIJ'; % "0.5" comes from 0.5 in cost evaluation % Compute smallest algebraic eigenvalue of Sy, % this gives us a descent direction for the next rank (v) % as well as a way to control progress toward the global % optimum (s_min). % Make eigs silent. opts.disp = 0; opts.issym = true; [v, s_min] = eigs(Sy, 1, 'SA', opts); % Check whether Y is rank deficient. vp = svd(Y); % Stopping criterion. fprintf('>> smin = %.3e, and min(vp) = %.3e\n',s_min,min(vp)); if (s_min > params.tolSmin) || (min(vp) < params.tolrankdeficiency), break; end % Update rank rr = rr + 1; % Compute descent direction if (s_min < -1e-10), restartDir = v; else restartDir = []; end end % Collect relevant statistics infos.time = time; infos.cost = cost; infos.rank = rank; infos.test_error = test_error; infos.rank_change_stats = rank_change_stats; % Few plots. show_plots(problem_description, infos); end %% Cost function evaluation. function val = cost_evaluation(Y, data_train) Z = Y(data_train.rows, :) - Y(data_train.cols,:); estimDists = sum(Z.^2, 2); errors = (estimDists - data_train.entries); val = 0.5*mean(errors.^2); end %% Local defaults function localdefaults = getlocaldefaults() localdefaults.abstolcost = 1e-3; localdefaults.reltolcost = 1e-3; localdefaults.tolSmin = -1e-3; localdefaults.tolrankdeficiency = 1e-3; localdefaults.tolgradnorm = 1e-5; localdefaults.maxiter = 100; localdefaults.solver = @trustregions; % Trust-regions end %% Fixed-rank optimization function [Yopt, infos] = low_rank_dist_completion_fixedrank(data_train, data_test, Y_initial, params) % Common quantities that are used often in the optimization process. [n, r] = size(Y_initial); EIJ = speye(n); EIJ = EIJ(:, data_train.rows) - EIJ(:, data_train.cols); % Create problem structure problem.M = symfixedrankYYfactory(n, r); % Cost evaluation problem.cost = @cost; function [f, store] = cost(Y, store) if ~isfield(store, 'xij') store.xij = EIJ'*Y; end xij = store.xij; estimDists = sum(xij.^2,2); f = 0.5*mean((estimDists - data_train.entries).^2); end % Gradient evaluation problem.grad = @grad; function [g, store] = grad(Y, store) N = data_train.nentries; if ~isfield(store, 'xij') store.xij = EIJ'*Y; end xij = store.xij; estimDists = sum(xij.^2,2); g = EIJ * sparse(1:N,1:N,2 * (estimDists - data_train.entries) / N, N, N) * xij; end % Hessian evaluation problem.hess = @hess; function [Hess, store] = hess(Y, eta, store) N = data_train.nentries; if ~isfield(store, 'xij') store.xij = EIJ'*Y; end xij = store.xij; zij = EIJ'*eta; estimDists = sum(xij.^2,2); crossYZ = 2*sum(xij .* zij,2); Hess = (EIJ*sparse(1:N,1:N,2 * (estimDists - data_train.entries) / N,N,N))*zij + (EIJ*sparse(1:N,1:N,2 * crossYZ / N,N,N))*xij; Hess = problem.M.proj(Y, Hess); end % % Check numerically whether gradient and Hessian are correct % checkgradient(problem); % drawnow; % pause; % checkhessian(problem); % drawnow; % pause; % When asked, ask Manopt to compute the test error at every iteration. if ~isempty(data_test) options.statsfun = @compute_test_error; EIJ_test = speye(n); EIJ_test = EIJ_test(:, data_test.rows) - EIJ_test(:, data_test.cols); end function stats = compute_test_error(problem, Y, stats) %#ok xij = EIJ_test'*Y; estimDists_test = sum(xij.^2,2); stats.test_error = 0.5*mean((estimDists_test - data_test.entries).^2); end % Stopping criteria options options.stopfun = @mystopfun; function stopnow = mystopfun(problem, Y, info, last) %#ok stopnow = (last >= 5 && (info(last-2).cost - info(last).cost < params.abstolcost || abs(info(last-2).cost - info(last).cost)/info(last).cost < params.reltolcost)); end options.tolgradnorm = params.tolgradnorm; options.maxiter = params.maxiter; % Call appropriate algorithm options.solver = params.solver; [Yopt, ~, infos] = manoptsolve(problem, Y_initial, options); [infos.rank] = deal(r); end %% 3d Helix problem instance function problem_description = get_3d_Helix_instance() % Helix curve in 3d tvec = 0:2*pi/100:2*pi; tvec = tvec'; % column vector xvec = 4*cos(3*tvec); yvec = 4*sin(3*tvec); zvec = 2*tvec; Yo = [xvec, yvec, zvec]; n = size(Yo, 1); % Number of points % Fraction of unknown distances fractionOfUnknown = 0.85; % True distances among points in 3d Helix trueDists = pdist(Yo)'.^2; % True distances % Add noise (set noise_level = 0 for clean measurements) noise_level = 0; % 0.01; trueDists = trueDists + noise_level * std(trueDists) * randn(size(trueDists)); % Compute all pairs of indices H = tril(true(n), -1); [I, J] = ind2sub([n, n], find(H(:))); clear 'H'; % Train data train = false(length(trueDists), 1); train(1:floor(length(trueDists)*(1- fractionOfUnknown))) = true; train = train(randperm(length(train))); data_train.rows = I(train); data_train.cols = J(train); data_train.entries = trueDists(train); data_train.nentries = length(data_train.entries); % Test data data_test.nentries = 1*data_train.nentries; % Depends how big data that we can handle. test = false(length(trueDists),1); test(1 : floor(data_test.nentries)) = true; test = test(randperm(length(test))); data_test.rows = I(test); data_test.cols = J(test); data_test.entries = trueDists(test); % Rank bounds rank_initial = 1; % Starting rank rank_max = n; % Maximum rank % Basic parameters used in optimization params = struct(); params = mergeOptions(getlocaldefaults, params); % Problem description problem_description.data_train = data_train; problem_description.data_test = data_test; problem_description.n = n; problem_description.rank_initial = rank_initial; problem_description.rank_max = rank_max; problem_description.params = params; problem_description.Yo = Yo; % Store original Helix structure end %% Problem description check function checked_problem_description = check_problem_description(problem_description) checked_problem_description = problem_description; % Check train data if isempty(problem_description)... || ~all(isfield(problem_description,{'data_train'}) == 1)... || ~all(isfield(problem_description.data_train,{'cols', 'rows', 'entries'}) == 1)... || isempty(problem_description.data_train.cols)... || isempty(problem_description.data_train.rows)... || isempty(problem_description.data_train.entries) warning('low_rank_dist_completion:problem_description', ... 'The training set is empty or not properly defined. We work with the default 3d Helix example.\n'); checked_problem_description = get_3d_Helix_instance(); checked_problem_description.helix_example = true; return; % No need for further check end % Check number of data points if ~isfield(problem_description, 'n') error('low_rank_dist_completion:problem_description',... 'Error. The scalar corresponding to field "n" of problem description must be given. \n'); end % Check initial rank if ~isfield(problem_description, 'rank_initial')... || isempty(problem_description.rank_initial)... || ~(floor(problem_description.rank_initial) == problem_description.rank_initial) warning('low_rank_dist_completion:problem_description', ... 'The field "rank_initial" is not properly defined. We work with the default "1".\n'); rank_initial = 1; else rank_initial = problem_description.rank_initial; end checked_problem_description.rank_initial = rank_initial; % Check maximum rank if ~isfield(problem_description, 'rank_max')... || isempty(problem_description.rank_max)... || ~(floor(problem_description.rank_max) == problem_description.rank_max)... || problem_description.rank_max > problem_description.n warning('low_rank_dist_completion:problem_description', ... 'The field "rank_max" is not properly defined. We work with the default "n".\n'); rank_max = problem_description.n; else rank_max = problem_description.rank_max; end checked_problem_description.rank_max = rank_max; % Check testing dataset if ~isfield(problem_description,{'data_test'})... || ~all(isfield(problem_description.data_test,{'cols', 'rows', 'entries'}) == 1)... || isempty(problem_description.data_test.cols)... || isempty(problem_description.data_test.rows)... || isempty(problem_description.data_test.entries) warning('low_rank_dist_completion:problem_description', ... 'The field "data_test" is not properly defined. We work with the default "[]".\n'); data_test = []; else data_test = problem_description.data_test; end checked_problem_description.data_test = data_test; % Check parameters if isfield(problem_description, 'params') params = problem_description.params; else params = struct(); end params = mergeOptions(getlocaldefaults, params); checked_problem_description.params = params; end %% Show plots function show_plots(problem_description, infos) solver = problem_description.params.solver; rank_change_stats = infos.rank_change_stats; rank_change_stats_rank = [rank_change_stats.rank]; rank_change_stats_iter = [rank_change_stats.iter]; rank_change_stats_iter = cumsum(rank_change_stats_iter); N = problem_description.data_train.nentries; n = problem_description.n; % Plot: train error fs = 20; figure('name', 'Training on the known distances'); line(1:length([infos.cost]),log10([infos.cost]),'Marker','O','LineStyle','-','Color','blue','LineWidth',1.5); ax1 = gca; set(ax1,'FontSize',fs); xlabel(ax1,'Number of iterations','FontSize',fs); ylabel(ax1,'Cost (log scale) on known distances','FontSize',fs); ax2 = axes('Position',get(ax1,'Position'),... 'XAxisLocation','top',... 'YAxisLocation','right',... 'Color','none',... 'XColor','k'); set(ax2,'FontSize',fs); line(1:length([infos.cost]),log10([infos.cost]),'Marker','O','LineStyle','-','Color','blue','LineWidth',1.5,'Parent',ax2); set(ax2,'XTick',rank_change_stats_iter(1:max(1,end-1)),... 'XTickLabel',rank_change_stats_rank(1) + 1 : rank_change_stats_rank(max(1,end-1)) + 1,... 'YTick',[]); set(ax2,'XGrid','on'); legend(func2str(solver)); title('Rank'); legend 'boxoff'; % Plot: test error if isfield(infos, 'test_error') && ~isempty(infos.test_error) Yo = problem_description.Yo; fs = 20; figure('name','Test error on a set of distances different from the training set'); line(1:length([infos.test_error]),log10([infos.test_error]),'Marker','O','LineStyle','-','Color','blue','LineWidth',1.5); ax1 = gca; set(ax1,'FontSize',fs); xlabel(ax1,'Number of iterations','FontSize',fs); ylabel(ax1,'Cost (log scale) on testing set','FontSize',fs); ax2 = axes('Position',get(ax1,'Position'),... 'XAxisLocation','top',... 'YAxisLocation','right',... 'Color','none',... 'XColor','k'); set(ax2,'FontSize',fs); line(1:length([infos.test_error]),log10([infos.test_error]),'Marker','O','LineStyle','-','Color','blue','LineWidth',1.5,'Parent',ax2); set(ax2,'XTick',rank_change_stats_iter(1:max(1,end-1)),... 'XTickLabel',rank_change_stats_rank(1) + 1 : rank_change_stats_rank(max(1,end-1)) + 1,... 'YTick',[]); set(ax2,'XGrid','on'); legend(func2str(solver)); title('Rank'); legend 'boxoff'; end % Plot: visualize Helix curve if isfield(problem_description, 'helix_example') jj = ceil((length(rank_change_stats_rank) + 1)/2); figure('name',['3D structure with ', num2str(N/((n^2 -n)/2)),' fraction known distances']) fs = 20; ax1 = gca; set(ax1,'FontSize',fs); subplot(jj,2,1); plot3(Yo(:,1), Yo(:,2), Yo(:,3),'*','Color', 'b','LineWidth',1.0); title('Original 3D structure'); for kk = 1 : length(rank_change_stats_rank) subplot(jj, 2, kk + 1); rank_change_stats_kk = rank_change_stats(kk); Ykk = rank_change_stats_kk.Y; if size(Ykk, 2) == 1, plot(Ykk(:,1), zeros(size(Ykk, 1)),'*','Color', 'r','LineWidth',1.0); legend(func2str(solver)) title(['Recovery at rank ',num2str(size(Ykk, 2))]); elseif size(Ykk, 2) == 2 plot(Ykk(:,1), Ykk(:,2),'*','Color', 'r','LineWidth',1.0); title(['Recovery at rank ',num2str(size(Ykk, 2))]); else % Project onto dominant 3Dsubspace [U1, S1, V1] = svds(Ykk, 3); Yhat = U1*S1*V1'; plot3(Yhat(:,1), Yhat(:,2), Yhat(:,3),'*','Color', 'r','LineWidth',1.0); title(['Recovery at rank ',num2str(size(Ykk, 2))]); end axis equal; end % Trick to add a global title to the whole subplot collection. % HitTest is disabled to make it easier to select the individual % subplots (for example, to rotate the viewing angle). ha = axes('Position',[0 0 1 1],'Xlim',[0 1],'Ylim',[0 1],'Box','off','Visible','off','Units','normalized', 'clipping' , 'off' ); set(ha, 'HitTest', 'off'); text(0.5, 1,['Recovery of Helix from ',num2str(N/((n^2 -n)/2)),' fraction known distances'],'HorizontalAlignment','center','VerticalAlignment', 'top'); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/low_rank_matrix_completion.m ================================================ function low_rank_matrix_completion() % Given partial observation of a low rank matrix, attempts to complete it. % % function low_rank_matrix_completion() % % This example demonstrates how to use the geometry factory for the % embedded submanifold of fixed-rank matrices, fixedrankembeddedfactory. % This geometry is described in the paper % "Low-rank matrix completion by Riemannian optimization" % Bart Vandereycken - SIAM Journal on Optimization, 2013. % % This can be a starting point for many optimization problems of the form: % % minimize f(X) such that rank(X) = k, size(X) = [m, n]. % % Note that the code is long because it showcases quite a few features of % Manopt: most of the code is optional. % % Input: None. This example file generates random data. % % Output: None. % This file is part of Manopt and is copyrighted. See the license file. % % Main author: Nicolas Boumal, July 15, 2014 % Contributors: Bart Vandereycken % % Change log: % % Random data generation. First, choose the size of the problem. % We will complete a matrix of size mxn of rank k: m = 200; n = 500; k = 10; % Generate a random mxn matrix A of rank k L = randn(m, k); R = randn(n, k); A = L*R'; % Generate a random mask for observed entries: P(i, j) = 1 if the entry % (i, j) of A is observed, and 0 otherwise. fraction = 4 * k*(m+n-k)/(m*n); P = sparse(rand(m, n) <= fraction); % Hence, we know the nonzero entries in PA: PA = P.*A; % Pick the manifold of matrices of size mxn of fixed rank k. problem.M = fixedrankembeddedfactory(m, n, k); % Define the problem cost function. The input X is a structure with % fields U, S, V representing a rank k matrix as U*S*V'. % f(X) = 1/2 * || P.*(X-A) ||^2 problem.cost = @cost; function f = cost(X) % Note that it is very much inefficient to explicitly construct the % matrix X in this way. Seen as we only need to know the entries % of Xmat corresponding to the mask P, it would be far more % efficient to compute those only. Xmat = X.U*X.S*X.V'; f = .5*norm( P.*Xmat - PA , 'fro')^2; end % Define the Euclidean gradient of the cost function, that is, the % gradient of f(X) seen as a standard function of X. % nabla f(X) = P.*(X-A) problem.egrad = @egrad; function G = egrad(X) % Same comment here about Xmat. Xmat = X.U*X.S*X.V'; G = P.*Xmat - PA; end % This is optional, but it's nice if you have it. % Define the Euclidean Hessian of the cost at X, along H, where H is % represented as a tangent vector: a structure with fields Up, Vp, M. % This is the directional derivative of nabla f(X) at X along Xdot: % nabla^2 f(X)[Xdot] = P.*Xdot problem.ehess = @euclidean_hessian; function ehess = euclidean_hessian(X, H) % The function tangent2ambient transforms H (a tangent vector) into % its equivalent ambient vector representation. The output is a % structure with fields U, S, V such that U*S*V' is an mxn matrix % corresponding to the tangent vector H. Note that there are no % additional guarantees about U, S and V. In particular, U and V % are not orthonormal. ambient_H = problem.M.tangent2ambient(X, H); Xdot = ambient_H.U*ambient_H.S*ambient_H.V'; % Same comment here about explicitly constructing the ambient % vector as an mxn matrix Xdot: we only need its entries % corresponding to the mask P, and this could be computed % efficiently. ehess = P.*Xdot; end % Check consistency of the gradient and the Hessian. Useful if you % adapt this example for a new cost function and you would like to make % sure there is no mistake. % warning('off', 'manopt:fixedrankembeddedfactory:exp'); % checkgradient(problem); pause; % checkhessian(problem); pause; % Compute an initial guess. Points on the manifold are represented as % structures with three fields: U, S and V. U and V need to be % orthonormal, S needs to be diagonal. [U, S, V] = svds(PA, k); X0.U = U; X0.S = S; X0.V = V; % Minimize the cost function using Riemannian trust-regions, starting % from the initial guess X0. X = trustregions(problem, X0); % The reconstructed matrix is X, represented as a structure with fields % U, S and V. Xmat = X.U*X.S*X.V'; fprintf('||X-A||_F = %g\n', norm(Xmat - A, 'fro')); % Alternatively, we could decide to use a solver such as % steepestdescent or conjugategradient. These solvers need to solve a % line-search problem at each iteration. Standard line searches in % Manopt have generic purpose systems to do this. But for the problem % at hand, it so happens that we can rather accurately guess how far % the line-search should look, and it would be a waste to not use that. % Look up the paper referenced above for the mathematical explanation % of the code below. % % To tell Manopt about this special information, we specify the % linesearch hint function in the problem structure. Notice that this % is not the same thing as specifying a linesearch function in the % options structure. % % Both the SD and the CG solvers will detect that we % specify the hint function below, and they will use an appropriate % linesearch algorithm by default, as a result. Typically, they will % try the step t*H first, then if it does not satisfy an Armijo % criterion, they will decrease t geometrically until satisfaction or % failure. % % Just like the cost, egrad and ehess functions, the linesearch % function could use a store structure if you like. The present code % does not use the store structure, which means quite a bit of the % computations are made redundantly, and as a result a better method % could appear slower. See the Manopt tutorial about caching when you % are ready to switch from a proof-of-concept code to an efficient % code. % % The inputs are X (a point on the manifold) and H, a tangent vector at % X that is assumed to be a descent direction. That is, there exists a % positive t such that f(Retraction_X(tH)) < f(X). The function below % is supposed to output a "t" that it is a good "guess" at such a t. problem.linesearch = @linesearch_helper; function t = linesearch_helper(X, H) % Note that you would not usually need the Hessian for this. residual_omega = nonzeros(problem.egrad(X)); dir_omega = nonzeros(problem.ehess(X, H)); t = - dir_omega \ residual_omega ; end % Notice that for this solver, the Hessian is not needed. [Xcg, xcost, info, options] = conjugategradient(problem, X0); %#ok fprintf('Take a look at the options that CG used:\n'); disp(options); fprintf('And see how many trials were made at each line search call:\n'); info_ls = [info.linesearch]; disp([info_ls.costevals]); fprintf('Try it again without the linesearch helper.\n'); % Remove the linesearch helper from the problem structure. problem = rmfield(problem, 'linesearch'); [Xcg, xcost, info, options] = conjugategradient(problem, X0); %#ok fprintf('Take a look at the options that CG used:\n'); disp(options); fprintf('And see how many trials were made at each line search call:\n'); info_ls = [info.linesearch]; disp([info_ls.costevals]); % If the problem has a small enough dimension, we may (for analysis % purposes) compute the spectrum of the Hessian at a point X. This may % help in studying the conditioning of a problem. If you don't provide % the Hessian, Manopt will approximate the Hessian with finite % differences of the gradient and try to estimate its "spectrum" (it's % not a proper linear operator). This can give some intuition, but % should not be relied upon. if problem.M.dim() < 100 fprintf('Computing the spectrum of the Hessian...'); s = hessianspectrum(problem, X); hist(s); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/low_rank_tensor_completion.m ================================================ function low_rank_tensor_completion() % Given partial observation of a low rank tensor, attempts to complete it. % % function low_rank_tensor_completion() % % This example demonstrates how to use the geometry factory for the % quotient manifold of fixed-rank tensors, % fixedrankfactory_tucker_preconditioned. % % This geometry is described in the technical report % "Riemannian preconditioning for tensor completion" % Hiroyuki Kasai and Bamdev Mishra, arXiv:1506.02159, 2015. % % This can be a starting point for many optimization problems of the form: % % minimize f(X) such that rank(X) = [r1 r2 r3], size(X) = [n1, n2, n3]. % % Input: None. This example file generates random data. % % Output: None. % % Please cite the Manopt paper as well as the research paper: % @Techreport{kasai2015, % Title = {{R}iemannian preconditioning for tensor completion}, % Author = {Kasai, H. and Mishra, B.}, % Journal = {Arxiv preprint arXiv:1506.02159}, % Year = {2015} % } % This file is part of Manopt and is copyrighted. See the license file. % % Main authors: Hiroyuki Kasai and Bamdev Mishra, June 16, 2015. % Contributors: % % Change log: % % Random data generation with pseudo-random numbers from a % uniform distribution on [0, 1]. % First, choose the size of the problem. % We will complete a tensor of size n1-by-n2-by-n3 of rank (r1, r2, r3): n1 = 70; n2 = 60; n3 = 50; r1 = 3; r2 = 4; r3 = 5; tensor_dims = [n1 n2 n3]; core_dims = [r1 r2 r3]; total_entries = n1*n2*n3; % Generate a random tensor A of size n1-by-n2-by-n3 of rank (r1, r2, r3). [U1,R1] = qr(rand(n1, r1), 0); [U2,R2] = qr(rand(n2, r2), 0); [U3,R3] = qr(rand(n3, r3), 0); Z.U1 = R1; Z.U2 = R2; Z.U3 = R3; Z.G = rand( core_dims ); Core = tucker2multiarray(Z); % Converts tucker format tensor to full tensor. Y.U1 = U1; Y.U2 = U2; Y.U3 = U3; Y.G = Core; A = tucker2multiarray(Y); % Generate a random mask P for observed entries: P(i, j, k) = 1 if the entry % (i, j, k) of A is observed, and 0 otherwise. % Observation ratio fraction = 0.1; % Fraction of known entries. nr = round(fraction * total_entries); ind = randperm(total_entries); ind = ind(1 : nr); P = false(tensor_dims); P(ind) = true; % Hence, we know the nonzero entries in PA: PA = P.*A; % Pick the manifold of tensors of size n1-by-n2-by-n3 of rank (r1, r2, r3). problem.M = fixedrankfactory_tucker_preconditioned(tensor_dims, core_dims); % Define the problem cost function. The input X is a structure with % fields U1, U2, U3, G representing a rank (r1,r2,r3) tensor. % f(X) = 1/2 * || P.*(X - A) ||^2 problem.cost = @cost; function f = cost(X) Xmultiarray = tucker2multiarray(X); Diffmultiarray = P.*Xmultiarray - PA; Diffmultiarray_flat = reshape(Diffmultiarray, n1, n2*n3); f = .5*norm(Diffmultiarray_flat , 'fro')^2; end % Define the Euclidean gradient of the cost function, that is, the % gradient of f(X) seen as a standard function of X. % nabla f(X) = P.*(X-A) % We only need to give the Euclidean gradient. Manopt converts it % internally to the Riemannian counterpart. problem.egrad = @egrad; function [g] = egrad(X) Xmultiarray = tucker2multiarray(X); Smultiarray = P.*Xmultiarray - PA; % BM: computation of S, S1, S2, and S3 S1multiarray = reshape(Smultiarray, [n1, n2*n3]); S2multiarray = reshape(permute(Smultiarray, [2 1 3]),[n2, n1*n3]); S3multiarray = reshape(permute(Smultiarray, [3 1 2]),[n3, n1*n2]); g.U1 = double(S1multiarray) * kron(X.U3, X.U2) * reshape(X.G, r1, r2*r3)'; g.U2 = double(S2multiarray) * kron(X.U3, X.U1) * reshape(permute(X.G, [2 1 3]), r2, r1*r3)'; g.U3 = double(S3multiarray) * kron(X.U2, X.U1) * reshape(permute(X.G, [3 1 2]), r3, r1*r2)'; g.G = reshape(X.U1' * reshape(double(Smultiarray),n1,n2*n3) * kron(X.U3', X.U2')', r1, r2, r3); end % Define the Euclidean Hessian of the cost at X, along eta, where eta is % represented as a tangent vector: a structure with fields U1, U2, U3, G. % This is the directional derivative of nabla f(X) at X along Xdot: % nabla^2 f(X)[Xdot] = P.*Xdot % We only need to give the Euclidean Hessian. Manopt converts it % internally to the Riemannian counterpart. problem.ehess = @ehess; function [Hess] = ehess(X, eta) % Computing S, and its unfolding matrices, S1, S2, and S3. Xmultiarray = tucker2multiarray(X); S = P.*Xmultiarray - PA; S1 = reshape(S, [n1, n2*n3]); S2 = reshape(permute(S, [2 1 3]),[n2, n1*n3]); S3 = reshape(permute(S, [3 1 2]),[n3, n1*n2]); % Computing Sdot, S1dot, S2dot and S3dot. XG = X.G; etaG = eta.G; G1 = zeros(4*size(X.G)); G1(1:r1, 1:r2, 1:r3) = XG; G1(r1 + 1 : 2*r1, r2 + 1 : 2*r2, r3 + 1 : 2*r3) = XG; G1(2*r1 + 1 : 3*r1, 2*r2 + 1 : 3*r2, 2*r3 + 1 : 3*r3) = XG; G1(3*r1 + 1 : 4*r1, 3*r2 + 1 : 4*r2, 3*r3 + 1 : 4*r3) = etaG; X1.G = G1; X1.U1 = [eta.U1 X.U1 X.U1 X.U1]; X1.U2 = [X.U2 eta.U2 X.U2 X.U2]; X1.U3 = [X.U3 X.U3 eta.U3 X.U3]; X1multiarray = tucker2multiarray(X1); Sdot = P.*X1multiarray; S1dot = reshape(Sdot, [n1, n2*n3]); S2dot = reshape(permute(Sdot, [2 1 3]),[n2, n1*n3]); S3dot = reshape(permute(Sdot, [3 1 2]),[n3, n1*n2]); % Computing unfolding matrices of X.G and eta.G. X_G1 = reshape(double(X.G),r1, r2*r3); X_G2 = reshape(permute(double(X.G),[2 1 3]),r2, r1*r3); X_G3 = reshape(permute(double(X.G),[3 1 2]),r3, r1*r2); eta_G1 = reshape(double(eta.G),r1, r2*r3); eta_G2 = reshape(permute(double(eta.G),[2 1 3]),r2, r1*r3); eta_G3 = reshape(permute(double(eta.G),[3 1 2]),r3, r1*r2); % Computing Hessians for U1, U2 and U3. T1 = double(S1dot) * (kron(X.U3,X.U2)*X_G1') ... + double(S1) * (kron(eta.U3,X.U2)*X_G1' ... + kron(X.U3,eta.U2)*X_G1' + kron(X.U3,X.U2)*eta_G1'); T2 = double(S2dot) * (kron(X.U3,X.U1)*X_G2') ... + double(S2) * (kron(eta.U3,X.U1)*X_G2' ... + kron(X.U3,eta.U1)*X_G2' + kron(X.U3,X.U1)*eta_G2'); T3 = double(S3dot) * (kron(X.U2,X.U1)*X_G3') ... + double(S3) * (kron(eta.U2,X.U1)*X_G3' ... + kron(X.U2,eta.U1)*X_G3' + kron(X.U2,X.U1)*eta_G3'); Hess.U1 = T1; Hess.U2 = T2; Hess.U3 = T3; % Computing Hessian for G N.U1 = X.U1'; N.U2 = X.U2'; N.U3 = X.U3'; N.G = Sdot; M0array = tucker2multiarray(N); M1.U1 = eta.U1'; M1.U2 = X.U2'; M1.U3 = X.U3'; M1.G = S; M1array = tucker2multiarray(M1); M2.U1 = X.U1'; M2.U2 = eta.U2'; M2.U3 = X.U3'; M2.G = S; M2array = tucker2multiarray(M2); M3.U1 = X.U1'; M3.U2 = X.U2'; M3.U3 = eta.U3'; M3.G = S; M3array = tucker2multiarray(M3); Hess.G = M0array + M1array + M2array + M3array; end % Check consistency of the gradient and the Hessian. Useful if you % adapt this example for a new cost function and you would like to make % sure there is no mistake. % % Notice that the checkhessian test fails: the slope is not right. % This is because the retraction is not second-order compatible with % the Riemannian exponential on this manifold, making % the checkhessian tool unusable. The Hessian is correct though. % % warning('off', 'manopt:fixedrankfactory_tucker_preconditioned:exp'); % % checkgradient(problem); % % drawnow; % % pause; % % checkhessian(problem); % % drawnow; % % pause; % options options.maxiter = 200; options.maxinner = 30; options.maxtime = inf; options.tolgradnorm = 1e-5; % Minimize the cost function using Riemannian trust-regions Xtr = trustregions(problem, [], options); % The reconstructed tensor is X, represented as a structure with fields % U1, U2, U3 and G. Xtrmultiarray = tucker2multiarray(Xtr); fprintf('||X-A||_F = %g\n', norm(reshape(Xtrmultiarray - A, [n1 n2*n3]), 'fro')); % Alternatively, we could decide to use a solver such as steepestdescent (SD) % or conjugategradient (CG). These solvers need to solve a % line-search problem at each iteration. Standard line searches in % Manopt have generic purpose systems to do this. But for the problem % at hand, we could exploit the least-squares structure to compute an % approximate stepsize for the line-search problem. The approximation % is obtained by linearizing the nonlinear manifold locally and further % approximating it with a degree 2 polynomial approximation. % The specific derivation is in the paper referenced above. problem.linesearch = @linesearch_helper; function tmin = linesearch_helper(X, eta) % term0 Xmultiarray = tucker2multiarray(X); residual_mat = P.*Xmultiarray - PA; residual_vec = residual_mat(:); term0 = residual_vec; % term1 XG = X.G; etaG = eta.G; G1 = zeros(4*size(X.G)); G1(1:r1, 1:r2, 1:r3) = XG; G1(r1 + 1 : 2*r1, r2 + 1 : 2*r2, r3 + 1 : 2*r3) = XG; G1(2*r1 + 1 : 3*r1, 2*r2 + 1 : 3*r2, 2*r3 + 1 : 3*r3) = XG; G1(3*r1 + 1 : 4*r1, 3*r2 + 1 : 4*r2, 3*r3 + 1 : 4*r3) = etaG; X1.U1 = [eta.U1 X.U1 X.U1 X.U1]; X1.U2 = [X.U2 eta.U2 X.U2 X.U2]; X1.U3 = [X.U3 X.U3 eta.U3 X.U3]; X1.G = G1; X1multiarray = tucker2multiarray(X1); term1_mat = P.*X1multiarray; term1 = term1_mat(:); % tmin is the solution to the problem argmin a2*t^2 + a1*t, where % the coefficients a1 and a2 are shown below. a2 = (term1'*term1); a1 = 2*(term1'*term0); tmin = - 0.5*(a1 / a2); end % Notice that for this solver, the Hessian is not needed. [Xcg, costcg, infocg] = conjugategradient(problem, [], options); fprintf('Take a look at the options that CG used:\n'); disp(options); fprintf('And see how many trials were made at each line search call:\n'); info_ls = [infocg.linesearch]; disp([info_ls.costevals]); fprintf('Try it again without the linesearch helper.\n'); % Remove the linesearch helper from the problem structure. problem = rmfield(problem, 'linesearch'); [Xcg, xcost, info, options] = conjugategradient(problem, []); %#ok fprintf('Take a look at the options that CG used:\n'); disp(options); fprintf('And see how many trials were made at each line search call:\n'); info_ls = [info.linesearch]; disp([info_ls.costevals]); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/maxcut.m ================================================ function [x, cutvalue, cutvalue_upperbound, Y] = maxcut(L, r) % Algorithm to (try to) compute a maximum cut of a graph, via SDP approach. % % function x = maxcut(L) % function [x, cutvalue, cutvalue_upperbound, Y] = maxcut(L, r) % % L is the Laplacian matrix describing the graph to cut. The Laplacian of a % graph is L = D - A, where D is the diagonal degree matrix (D(i, i) is the % sum of the weights of the edges adjacent to node i) and A is the % symmetric adjacency matrix of the graph (A(i, j) = A(j, i) is the weight % of the edge joining nodes i and j). If L is sparse, this will be % exploited. % % If the graph has n nodes, then L is nxn and the output x is a vector of % length n such that x(i) is +1 or -1. This partitions the nodes of the % graph in two classes, in an attempt to maximize the sum of the weights of % the edges that go from one class to the other (MAX CUT problem). % % cutvalue is the sum of the weights of the edges 'cut' by the partition x. % % If the algorithm reached the global optimum of the underlying SDP % problem, then it produces an upperbound on the maximum cut value. This % value is returned in cutvalue_upperbound if it is found. Otherwise, that % output is set to NaN. % % If r is specified (by default, r = n), the algorithm will stop at rank r. % This may prevent the algorithm from reaching a globally optimal solution % for the underlying SDP problem (but can greatly help in keeping the % execution time under control). If a global optimum of the SDP is reached % before rank r, the algorithm will stop of course. % % Y is a matrix of size nxp, with p <= r, such that X = Y*Y' is the best % solution found for the underlying SDP problem. If cutvalue_upperbound is % not NaN, then Y*Y' is optimal for the SDP and cutvalue_upperbound is its % cut value. % % By Goemans and Williamson 1995, it is known that if the optimal value of % the SDP is reached, then the returned cut, in expectation, is at most at % a fraction 0.878 of the optimal cut. (This is not exactly valid because % we do not use random projection here; sign(Y*randn(size(Y, 2), 1)) will % give a cut that respects this statement -- it's usually worse though). % % The algorithm is essentially that of: % Journee, Bach, Absil and Sepulchre, SIAM 2010 % Low-rank optimization on the cone of positive semidefinite matrices. % % It is itself based on the famous SDP relaxation of MAX CUT: % Goemans and Williamson, 1995 % Improved approximation algorithms for maximum cut and satisfiability % problems using semidefinite programming. % % See also: elliptope_SDP % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, July 18, 2013 % Contributors: % Change log: % % April 3, 2015 (NB): % L products now counted with the new shared memory system. This is % more reliable and more flexible than using a global variable. % If no inputs are provided, generate a random graph Laplacian. % This is for illustration purposes only. if ~exist('L', 'var') || isempty(L) n = 20; A = triu(randn(n) <= .4, 1); A = A+A'; D = diag(sum(A, 2)); L = D-A; end n = size(L, 1); assert(size(L, 2) == n, 'L must be square.'); if ~exist('r', 'var') || isempty(r) || r > n r = n; end % We will let the rank increase. Each rank value will generate a cut. % We have to go up in the rank to eventually find a certificate of SDP % optimality. This in turn will provide an upperbound on the MAX CUT % value and ensure that we're doing well, according to Goemans and % Williamson's argument. In practice though, the good cuts often come % up for low rank values, so we better keep track of the best one. best_x = ones(n, 1); best_cutvalue = 0; cutvalue_upperbound = NaN; time = []; cost = []; for rr = 2 : r manifold = elliptopefactory(n, rr); if rr == 2 % At first, for rank 2, generate a random point. Y0 = manifold.rand(); else % To increase the rank, we could just add a column of zeros to % the Y matrix. Unfortunately, this lands us in a saddle point. % To escape from the saddle, we may compute an eigenvector of % Sy associated to a negative eigenvalue: that will yield a % (second order) descent direction Z. See Journee et al ; Sy is % linked to dual certificates for the SDP. Y0 = [Y zeros(n, 1)]; LY0 = L*Y0; Dy = spdiags(sum(LY0.*Y0, 2), 0, n, n); Sy = (Dy - L)/4; % Find the smallest (the "most negative") eigenvalue of Sy. eigsopts.issym = true; eigsopts.isreal = true; [v, s] = eigs(Sy, 1, 'SA', eigsopts); % If there is no negative eigenvalue for Sy, than we are not at % a saddle point: we're actually done! if s >= -1e-8 % We can stop here: we found the global optimum of the SDP, % and hence the reached cost is a valid upper bound on the % maximum cut value. cutvalue_upperbound = max(-[info.cost]); break; end % This is our escape direction. Z = manifold.proj(Y0, [zeros(n, rr-1) v]); % % These instructions can be uncommented to see what the cost % % function looks like at a saddle point. But will require the % % problem structure which is not defined here: see the helper % % function. % plotprofile(problem, Y0, Z, linspace(-1, 1, 101)); % drawnow; pause; % Now make a step in the Z direction to escape from the saddle. % It is not obvious that it is ok to do a unit step ... perhaps % need to be cautious here with the stepsize. It's not too % critical though: the important point is to leave the saddle % point. But it's nice to guarantee monotone decrease of the % cost, and we can't do that with a constant step (at least, % not without a proper argument to back it up). stepsize = 1; Y0 = manifold.retr(Y0, Z, stepsize); end % Use the Riemannian optimization based algorithm lower in this % file to reach a critical point (typically a local optimizer) of % the max cut cost with fixed rank, starting from Y0. [Y, info] = maxcut_fixedrank(L, Y0); % Some info logging. thistime = [info.time]; if ~isempty(time) thistime = time(end) + thistime; end time = [time thistime]; %#ok cost = [cost [info.cost]]; %#ok % Time to turn the matrix Y into a cut. % We can either do the random rounding as follows: % x = sign(Y*randn(rr, 1)); % or extract the "PCA direction" of the points in Y and cut % orthogonally to that direction, as follows (seems faster than % calling svds): [U, ~, ~] = svd(Y, 0); u = U(:, 1); x = sign(u); cutvalue = (x'*L*x)/4; if cutvalue > best_cutvalue best_x = x; best_cutvalue = cutvalue; end end x = best_x; cutvalue = best_cutvalue; plot(time, -cost, '.-'); xlabel('Time [s]'); ylabel('Relaxed cut value'); title('The relaxed cut value is an upper bound on the optimal cut value.'); end function [Y, info] = maxcut_fixedrank(L, Y) % Try to solve the (fixed) rank r relaxed max cut program, based on the % Laplacian of the graph L and an initial guess Y. L is nxn and Y is nxr. [n, r] = size(Y); assert(all(size(L) == n)); % The fixed rank elliptope geometry describes symmetric, positive % semidefinite matrices of size n with rank r and all diagonal entries % are 1. manifold = elliptopefactory(n, r); % % If you want to compare the performance of the elliptope geometry % % against the (conceptually simpler) oblique manifold geometry, % % uncomment this line. % manifold = obliquefactory(r, n, true); problem.M = manifold; % % For rapid prototyping, these lines suffice to describe the cost % % function and its gradient and Hessian (here expressed using the % % Euclidean gradient and Hessian). % problem.cost = @(Y) -trace(Y'*L*Y)/4; % problem.egrad = @(Y) -(L*Y)/2; % problem.ehess = @(Y, U) -(L*U)/2; % Instead of the prototyping version, the functions below describe the % cost, gradient and Hessian using the caching system (the store % structure). This alows to execute exactly the required number of % multiplications with the matrix L. These multiplications are counted % using the shared memory in the store structure: that memory is % shared , so we get access to the same data, regardless of the % point Y currently visited. % For every visited point Y, we will need L*Y. This function makes sure % the quantity L*Y is available, but only computes it if it wasn't % already computed. function store = prepare(Y, store) if ~isfield(store, 'LY') % Compute and store the product for the current point Y. store.LY = L*Y; % Create / increment the shared counter (independent of Y). if isfield(store.shared, 'counter') store.shared.counter = store.shared.counter + 1; else store.shared.counter = 1; end end end problem.cost = @cost; function [f, store] = cost(Y, store) store = prepare(Y, store); LY = store.LY; f = -(Y(:)'*LY(:))/4; % = -trace(Y'*LY)/4; but faster end problem.egrad = @egrad; function [g, store] = egrad(Y, store) store = prepare(Y, store); LY = store.LY; g = -LY/2; end problem.ehess = @ehess; function [h, store] = ehess(Y, U, store) store = prepare(Y, store); % this line is not strictly necessary LU = L*U; store.shared.counter = store.shared.counter + 1; h = -LU/2; end % statsfun is called exactly once after each iteration (including after % the evaluation of the cost at the initial guess). We then register % the value of the L-products counter (which counts how many products % were needed so far). % options.statsfun = @statsfun; % function stats = statsfun(problem, Y, stats, store) %#ok % stats.Lproducts = store.shared.counter; % end % Equivalent, but simpler syntax: options.statsfun = statsfunhelper('Lproducts', ... @(problem, Y, stats, store) store.shared.counter ); % % Diagnostics tools: to make sure the gradient and Hessian are % % correct during the prototyping stage. % checkgradient(problem); pause; % checkhessian(problem); pause; % % To investigate the effect of the rotational invariance when using % % the oblique or the elliptope geometry, or to study the saddle point % % issue mentioned above, it is sometimes interesting to look at the % % spectrum of the Hessian. For large dimensions, this is slow! % stairs(sort(hessianspectrum(problem, Y))); % drawnow; pause; % % When facing a saddle point issue as described in the master % % function, and when no sure mechanism exists to find an escape % % direction, it may be helpful to set useRand to true and raise % % miniter to more than 1, when using trustregions. This will tell the % % solver to not stop before at least miniter iterations were % % accomplished (thus disregarding the zero gradient at the saddle % % point) and to use random search directions to kick start the inner % % solve (tCG) step. It is not as efficient as finding a sure escape % % direction, but sometimes it's the best we have. % options.useRand = true; % options.miniter = 5; options.verbosity = 2; [Y, Ycost, info] = trustregions(problem, Y, options); %#ok fprintf('Products with L: %d\n', max([info.Lproducts])); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/nonlinear_eigenspace.m ================================================ function Xsol = nonlinear_eigenspace(L, k, alpha) % Example of nonlinear eigenvalue problem: total energy minimization. % % function Xsol = nonlinear_eigenspace(L, k, alpha) % % L is a discrete Laplacian operator, % alpha is a given constant, and % k corresponds to the dimension of the least eigenspace sought. % % This example demonstrates how to use the Grassmann geometry factory % to solve the nonlinear eigenvalue problem as the optimization problem: % % minimize 0.5*trace(X'*L*X) + (alpha/4)*(rho(X)*L\(rho(X))) % over X such that X'*X = Identity, % % where L is of size n-by-n, % X is an n-by-k matrix, and % rho(X) is the diagonal part of X*X'. % % This example is motivated in the paper % "A Riemannian Newton Algorithm for Nonlinear Eigenvalue Problems", % Zhi Zhao, Zheng-Jian Bai, and Xiao-Qing Jin, % SIAM Journal on Matrix Analysis and Applications, 36(2), 752-774, 2015. % % This file is part of Manopt and is copyrighted. See the license file. % % Main author: Bamdev Mishra, June 19, 2015. % Contributors: % % Change log: % If no inputs are provided, generate a discrete Laplacian operator. % This is for illustration purposes only. % The default example corresponds to Case (c) of Example 6.2 of the % above referenced paper. if ~exist('L', 'var') || isempty(L) n = 100; L = gallery('tridiag', n, -1, 2, -1); end n = size(L, 1); assert(size(L, 2) == n, 'L must be square.'); if ~exist('k', 'var') || isempty(k) || k > n k = 10; end if ~exist('alpha', 'var') || isempty(alpha) alpha = 1; end % Grassmann manifold description Gr = grassmannfactory(n, k); problem.M = Gr; % Cost function evaluation problem.cost = @cost; function val = cost(X) rhoX = sum(X.^2, 2); % diag(X*X'); val = 0.5*trace(X'*(L*X)) + (alpha/4)*(rhoX'*(L\rhoX)); end % Euclidean gradient evaluation % Note: Manopt automatically converts it to the Riemannian counterpart. problem.egrad = @egrad; function g = egrad(X) rhoX = sum(X.^2, 2); % diag(X*X'); g = L*X + alpha*diag(L\rhoX)*X; end % Euclidean Hessian evaluation % Note: Manopt automatically converts it to the Riemannian counterpart. problem.ehess = @ehess; function h = ehess(X, U) rhoX = sum(X.^2, 2); %diag(X*X'); rhoXdot = 2*sum(X.*U, 2); h = L*U + alpha*diag(L\rhoXdot)*X + alpha*diag(L\rhoX)*U; end % Check whether gradient and Hessian computations are correct. % checkgradient(problem); % pause; % checkhessian(problem); % pause; % Initialization as suggested in above referenced paper. X = randn(n, k); [U, S, V] = svd(X, 0); %#ok X = U*V'; [U0, S0, V0] = eigs(L + alpha*diag(L\(sum(X.^2, 2))), k,'sm'); %#ok X0 = U0; % Call manoptsolve to automatically call an appropriate solver. % Note: it calls the trust regions solver as we have all the required % ingredients, namely, gradient and Hessian, information. Xsol = manoptsolve(problem, X0); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/packing_on_the_sphere.m ================================================ function [X, maxdot] = packing_on_the_sphere(d, n, epsilon, X0) % Return a set of points spread out on the sphere. % % function [X, maxdot] = packing_on_the_sphere(d, n, epsilon, X0) % % Using optimization on the oblique manifold, that is, the product of % spheres, this function returns a set of n points with unit norm in R^d in % the form of a matrix X of size nxd, such that the points are spread out % on the sphere. Ideally, we would minimize the maximum inner product % between any two points X(i, :) and X(j, :), i~=j, but that is a nonsmooth % cost function. Instead, we replace the max function by a classical % log-sum-exp approximation and (attempt to) solve: % % min_{X in OB(d, n)} log( .5*sum_{i~=j} exp( xi'*xj/epsilon ) ), % % with xi = X(:, i) and epsilon is some "diffusion constant". As epsilon % goes to zero, the cost function is a sharper approximation of the max % function (under some assumptions), but the cost function becomes stiffer % and hence harder to optimize. % % The second output, maxdot, is the maximum inner product between any two % points in the returned X. This number is the one we truly are trying to % minimize. % % Notice that this cost function is invariant under rotation of X: % f(X) = f(XQ) for all orthogonal Q in O(d). % This calls for optimization over the set of symmetric positive % semidefinite matrices of size n and rank d with unit diagonal, which can % be thought of as the quotient of the oblique manifold OB(d, n) by O(d): % See elliptopefactory. % % This is known as the Thomson or, more specifically, the Tammes problem: % http://en.wikipedia.org/wiki/Tammes_problem % An interesting page by Neil Sloane collecting best known packings is % available here http://neilsloane.com/packings/ % This file is part of Manopt and is copyrighted. See the license file. % % Main author: Nicolas Boumal, July 2, 2013 % Contributors: % % Change log: % Aug. 14, 2013 (NB) : Code now compatible to experiment with both the % obliquefactory and the elliptopefactory. % % Jan. 7, 2014 (NB) : Added reference to Neil Sloane's page and the % maxdot output. % % June 24, 2014 (NB) : Now shifting exponentials to alleviate numerical % trouble when epsilon is too small. % if ~exist('d', 'var') || isempty(d) % Dimension of the embedding space: R^d d = 3; end if ~exist('n', 'var') || isempty(n) % Number n of points to place of the sphere in R^d. % For example, n=12 yields an icosahedron: % https://en.wikipedia.org/wiki/Icosahedron % Notice though that platonic solids are not always optimal. % Try for example n = 8: you don't get a cube. n = 24; end if ~exist('epsilon', 'var') || isempty(epsilon) % This value should be as close to 0 as affordable. % If it is too close to zero, optimization first becomes much % slower, than simply doesn't work anymore becomes of floating % point overflow errors (NaN's and Inf's start to appear). % If it is too large, then log-sum-exp is a poor approximation of % the max function, and the spread will be less uniform. % An okay value seems to be 0.01 or 0.001 for example. Note that a % better strategy than using a small epsilon straightaway is to % reduce epsilon bit by bit and to warm-start subsequent % optimization in that way. Trustregions will be more appropriate % for these fine tunings. epsilon = 0.0015; end % Pick your manifold (the elliptope factory quotients out the global % rotation invariance of the problem, which is more natural but % conceptually a bit more complicated --- for usage with the toolbox it % is the same though: just uncomment the appropriate line). manifold = obliquefactory(d, n, true); % manifold = elliptopefactory(n, d); % Generate a random initial guess if none was given. if ~exist('X0', 'var') || isempty(X0) X0 = manifold.rand(); end % Define the cost function with caching system used: the store % structure we receive as input is tied to the input point X. Everytime % this cost function is called at this point X, we will receive the % same store structure back. We may modify the store structure inside % the function and return it: the changes will be remembered for next % time. function [f, store] = cost(X, store) if ~isfield(store, 'ready') XXt = X*X'; % Shift the exponentials by the maximum value to reduce % numerical trouble due to possible overflows. s = max(max(triu(XXt, 1))); expXXt = exp((XXt-s)/epsilon); % Zero out the diagonal expXXt(1:(n+1):end) = 0; u = sum(sum(triu(expXXt, 1))); store.XXt = XXt; store.s = s; store.expXXt = expXXt; store.u = u; store.ready = true; end u = store.u; s = store.s; f = s + epsilon*log(u); end % Define the gradient of the cost. When the gradient is called at a % point X for which the cost was already called, the store structure we % receive remember everything that the cost function stored in it, so % we can reuse previously computed elements. function [g, store] = grad(X, store) if ~isfield(store, 'ready') [~, store] = cost(X, store); end % Compute the Euclidean gradient eg = store.expXXt*X / store.u; % Convert to the Riemannian gradient (by projection) g = manifold.egrad2rgrad(X, eg); end % Setup the problem structure with its manifold M and cost+grad % functions. problem.M = manifold; problem.cost = @cost; problem.grad = @grad; % For debugging, it's always nice to check the gradient a few times. % checkgradient(problem); % pause; % Call a solver on our problem with a few options defined. We did not % specify the Hessian but it is still okay to call trustregion: Manopt % will approximate the Hessian with finite differences of the gradient. opts.tolgradnorm = 1e-8; opts.maxtime = 1200; opts.maxiter = 1e5; % X = trustregions(problem, X0, opts); X = conjugategradient(problem, X0, opts); % Evaluate the maximum inner product between any two points of X. XXt = X*X'; dots = XXt(find(triu(ones(n), 1))); %#ok maxdot = max(dots); % Similarly, even though we did not specify the Hessian, we may still % estimate its spectrum at the solution. It should reflect the % invariance of the cost function under a global rotatioon of the % sphere, which is an invariance under the group O(d) of dimension % d(d-1)/2 : this translates into d(d-1)/2 zero eigenvalues in the % spectrum of the Hessian. % The approximate Hessian is not a linear operator, and is it a % fortiori not symmetric. The result of this computation is thus not % reliable. It does display the zero eigenvalues as expected though. if manifold.dim() < 300 evs = real(hessianspectrum(problem, X)); figure; stem(1:length(evs), sort(evs), '.'); title(['Eigenvalues of the approximate Hessian of the cost ' ... 'function at the solution']); end % Show how the inner products X(:, i)'*X(:, j) are distributed. figure; hist(real(acos(dots)), 20); title('Histogram of the geodesic distances'); % This is the quantity we actually want to minimize. fprintf('Maximum inner product between two points: %g\n', maxdot); % Give some visualization if the dimension allows if d == 2 % For the circle, the optimal solution consists in spreading the % points with angles uniformly sampled in (0, 2pi). This % corresponds to the following value for the max inner product: fprintf('Optimal value for the max inner product: %g\n', cos(2*pi/n)); figure; t = linspace(-pi, pi, 201); plot(cos(t), sin(t), '-', 'LineWidth', 3, 'Color', [152,186,220]/255); daspect([1 1 1]); box off; axis off; hold on; plot(X(:, 1), X(:, 2), 'r.', 'MarkerSize', 25); hold off; end if d == 3 figure; % Plot the sphere [sphere_x, sphere_y, sphere_z] = sphere(50); handle = surf(sphere_x, sphere_y, sphere_z); set(handle, 'FaceColor', [152,186,220]/255); set(handle, 'FaceAlpha', .5); set(handle, 'EdgeColor', [152,186,220]/255); set(handle, 'EdgeAlpha', .5); daspect([1 1 1]); box off; axis off; hold on; % Add the chosen points Y = 1.02*X'; plot3(Y(1, :), Y(2, :), Y(3, :), 'r.', 'MarkerSize', 25); % And connect the points which are at minimal distance, % within some tolerance. min_distance = real(acos(maxdot)); connected = real(acos(XXt)) <= 1.20*min_distance; [Ic, Jc] = find(triu(connected, 1)); for k = 1 : length(Ic) i = Ic(k); j = Jc(k); plot3(Y(1, [i j]), Y(2, [i j]), Y(3, [i j]), 'k-'); end hold off; end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/positive_definite_karcher_mean.m ================================================ function X = positive_definite_karcher_mean(A) % Computes a Karcher mean of a collection of positive definite matrices. % % function X = positive_definite_karcher_mean(A) % % Input: A 3D matrix A of size nxnxm such that each slice A(:,:,k) is a % positive definite matrix of size nxn. % % Output: A positive definite matrix X of size nxn which is a Karcher mean % of the m matrices in A, that is, X minimizes the sum of squared % Riemannian distances to the matrices in A: % f(X) = sum_k=1^m .5*dist^2(X, A(:, :, k)) % The distance is defined by the natural metric on the set of % positive definite matrices: dist(X,Y) = norm(logm(X\Y), 'fro'). % % This simple example is not the best way to compute Karcher means. Its % purpose it to serve as base code to explore other algorithms. In % particular, in the presence of large noise, this algorithm seems to not % be able to reach points with a very small gradient norm. This may be % caused by insufficient accuracy in the gradient computation. % This file is part of Manopt and is copyrighted. See the license file. % % Main author: Nicolas Boumal, Sept. 3, 2013 % Contributors: % % Change log: % % Generate some random data to test the function if none is given. if ~exist('A', 'var') || isempty(A) n = 5; m = 50; A = zeros(n, n, m); ref = diag(max(.1, 1+.1*randn(n, 1))); for i = 1 : m noise = 0.01*randn(n); noise = (noise + noise')/2; [V, D] = eig(ref + noise); A(:, :, i) = V*diag(max(.01, diag(D)))*V'; end end % Retrieve the size of the problem: % There are m matrices of size nxn to average. n = size(A, 1); m = size(A, 3); assert(n == size(A, 2), ... ['The slices of A must be square, i.e., the ' ... 'first and second dimensions of A must be equal.']); % Our search space is the set of positive definite matrices of size n. % Notice that this is the only place we specify on which manifold we % wish to compute Karcher means. Replacing this factory for another % geometry will yield code to compute Karcher means on that other % manifold, provided that manifold is equipped with a dist function and % a logarithmic map log. M = sympositivedefinitefactory(n); % Define a problem structure, specifying the manifold M, the cost % function and its gradient. problem.M = M; problem.cost = @cost; problem.grad = @grad; % Explicitly pick an approximate Hessian for the trust-region method problem.approxhess = approxhessianFD(problem, struct('stepsize', 1e-4)); % The functions below make many redundant computations. This % performance hit can be alleviated by using the caching system. We go % for a simple implementation here, as a tutorial example. % Cost function function f = cost(X) f = 0; for k = 1 : m f = f + M.dist(X, A(:, :, k))^2; end f = f/(2*m); end % Riemannian gradient of the cost function function g = grad(X) g = M.zerovec(X); for k = 1 : m % Update g in a linear combination of the form % g = g - [something]/m. g = M.lincomb(X, 1, g, -1/m, M.log(X, A(:, :, k))); end end % Execute some checks on the derivatives for early debugging. % These things can be commented out of course. % The slopes should agree on part of the plot at least. In this case, % it is sometimes necessary to inspect the plot visually to make the % call, but it is indeed correct. % checkgradient(problem); % pause; % Execute this if you want to force using a proper parallel vector % transport. This is not necessary. If you omit this, the default % vector transport is the identity map, which is (of course) cheaper % and seems to perform well in practice. % M.transp = M.paralleltransp; % Issue a call to a solver. Default options are selected. % Our initial guess is the first data point. Most solvers work well % with this problem. Limited-memory BFGS is one good example: X = rlbfgs(problem, A(:, :, 1)); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/radio_interferometric_calibration.m ================================================ function xsol = radio_interferometric_calibration(N, K) % Returns the gain matrices of N stations with K receivers. % % function xsol = radio_interferometric_calibration(N, K) % % N >= K is always assumed. % % The example considers calibration of an array of N stations. % We simulate a system with N stations, each having K receivers. % For radio astronomy, K = 2. % % For a detailed exposition of the problem at hand, refer to the paper: % "Radio interferometric calibration using a Riemannian manifold", % Sarod Yatawatta, ICASSP, 2013. % Available at http://dx.doi.org/10.1109/ICASSP.2013.6638382. % % The source of the signal is unpolarized (given by the matrix C). % The measured data is the cross correlation of the signals at each receiver. % So there will be N(N-1)/2 possible cross correlations. % Noise with given SNR is added to the signal. % % The objective is to estimate the gains of each receiver (K x K) matrix, % so the total size of the solutions is N x (K x K), which is written % as an NK x K matrix. % % Note: each station gain matrix (KxK) can have a KxK unitary ambiguity, % therefore we use the quotient manifold structure. The unitary ambiguity % is common to all stations, so the solution obtained by % optimization routine always has an unkown unitary matrix that makes the % solution different from the true solution. % % This file is part of Manopt: www.manopt.org. % Original author: Sarod Yatawatta, June 29, 2015. % Contributors: Bamdev Mishra. % Change log: % % June 28, 2016 (BM): % Modified the egrad and ehess operations according to % the modified metric in the symfixedrankYYcomplexfactory file, % where a factor of 2 was removed from the metric. Accordingly, % a factor of 2 was added to egrad and ehess operations. % Generate some random data to test the function if ~exist('N', 'var') || isempty(N) N = 10; end if ~exist('K', 'var') || isempty(K) K = 2; end assert(N >= K, 'N must be larger than or equal to K.'); % Baselines (pairs of correlations) B = N*(N-1)/2; % Source coherence, at phase center C = eye(K); % Random J (gains) of all stations J = 0.2*rand(K*N,K) + 1i*rand(K*N,K); % Visibilities (cross correlations) V = zeros(K*B,K); ck = 1; for ci = 1 : N -1, for cj = ci + 1 : N, % Compute cross correlation of each receiver pair. V(K*(ck-1)+1:K*ck,:) = J(K*(ci-1)+1:K*ci,:)*C*J(K*(cj-1)+1:K*cj,:)'; ck = ck + 1; end end % Generate noise SNR = 10000;% inf; nn = randn(K*B,K)+1i*randn(K*B,K); noise_var = norm(V)^2/(norm(nn)^2*SNR); nn = nn*sqrt(noise_var); % Add noise to signal V = V + nn; % Optimization part by creating the problem structure. % First, we use the manifold desctription. % Second, we define the problem cost, gradient and Hessian functions. % Manifold description % Note that the actual dimension is KN x K. problem.M = symfixedrankYYcomplexfactory(K*N, K); % Cost function problem.cost = @cost; function fval = cost(x) fval = 0.0; ck = 1; for p = 1 : N - 1, for q = p + 1 : N, res = V(K*(ck-1)+1:K*ck,:) - x(K*(p-1)+1:K*p,:)*C*x(K*(q-1)+1:K*q,:)'; % Residual fval = fval + real(res(:)'*res(:)); % Add norm of the residual. ck = ck + 1; end end end % Euclidean gradient of the cost function. % Manopt automatically converts it to the Riemannian couterpart. % The code involves for-loops for readability, but could be vectorized % for improved speed. problem.egrad = @egrad; function grad = egrad(x) grad = zeros(K*N, K); ck = 1; for p = 1 : N - 1, for q = p+1 : N, res = 2*(V(K*(ck-1)+1:K*ck,:) - x(K*(p-1)+1:K*p,:)*C*x(K*(q-1)+1:K*q,:)'); % Residual grad(K*(p-1)+1:K*p,:) = grad(K*(p-1)+1:K*p,:) - res*x(K*(q-1)+1:K*q,:)*C'; grad(K*(q-1)+1:K*q,:) = grad(K*(q-1)+1:K*q,:) - res'*x(K*(p-1)+1:K*p,:)*C; ck = ck + 1; end end end % Euclidean Hessian of the cost function along a search direction eta. % Manopt automatically converts it to the Riemannian couterpart. problem.ehess = @ehess; function hess = ehess(x, eta) hess = zeros(K*N, K); ck = 1; for p = 1 : N-1, for q = p+1:N, res = 2*(V(K*(ck-1)+1:K*ck,:) -x(K*(p-1)+1:K*p,:)*C*x(K*(q-1)+1:K*q,:)'); % Residual resdot = 2*(-x(K*(p-1)+1:K*p,:)*C*eta(K*(q-1)+1:K*q,:)' - eta(K*(p-1)+1:K*p,:)*C*x(K*(q-1)+1:K*q,:)'); % Residual derivative hess(K*(p-1)+1:K*p,:) = hess(K*(p-1)+1:K*p,:) - (res*eta(K*(q-1)+1:K*q,:) + resdot*x(K*(q-1)+1:K*q,:))*C'; hess(K*(q-1)+1:K*q,:) = hess(K*(q-1)+1:K*q,:) - (res'*eta(K*(p-1)+1:K*p,:) + resdot'*x(K*(p-1)+1:K*p,:))*C; ck = ck + 1; end end end % Execute some checks on the derivatives for early debugging. % checkgradient(problem); % pause; % checkhessian(problem); % pause; % Solve. [xsol, xcost, info] = trustregions(problem); fprintf('Final cost: %g.\n', xcost); % Display some statistics. fs = 11; figure; semilogy([info.iter], [info.gradnorm], 'o-.','Color','blue', 'MarkerSize',6, 'LineWidth',1.1); ax1 = gca; set(ax1,'FontSize',fs); xlabel(ax1, 'Iteration #', 'FontSize',fs); ylabel(ax1, 'Gradient norm', 'FontSize',fs); title('Convergence of the trust-regions algorithm'); % Make a plot of estimation error (only for K = 2). if K == 2, % Find unitary ambiguity first by solving min ||J - xsol U||. % This has a closed-form solution. [u, ignore, v] = svd(xsol'*J); %#ok % Error in position E = J - xsol*u*v'; % Normalize error E = E/norm(J); % Plot figure; ax1 = subplot(1,2,1); quiver(real(J(:,1)), imag(J(:,1)),real(E(:,1)),imag(E(:,1))); hold all; scatter(real(J(:,1)), imag(J(:,1))); set(ax1,'FontSize',fs); xlabel('Real E_1'); ylabel('Imag E_1'); title('Position error 1st coordinate'); axis equal; ax2 = subplot(1,2,2); quiver(real(J(:,2)),imag(J(:,2)),real(E(:,2)),imag(E(:,2))); hold all; scatter(real(J(:,2)),imag(J(:,2))); set(ax2,'FontSize',fs); xlabel('Real E_2'); ylabel('Imag E_2'); title('Position error 2nd coordinate'); axis equal; end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/robust_pca.m ================================================ function [U, cost] = robust_pca(X, d) % Computes a robust version of PCA (principal component analysis) on data. % % function [U, cost] = robustpca(X, d) % % Given a matrix X of size p by n, such that each column represents a % point in R^p, this computes U: an orthonormal basis of size p by d such % that the column space of U captures the points X as well as possible. % More precisely, the function attempts to compute U as the minimizer % over the Grassmann manifold (the set of linear subspaces) of: % % f(U) = (1/n) Sum_{i = 1:n} dist(X(:, i), the space spanned by U) % = (1/n) Sum_{i = 1:n} || U*U'*X(:, i) - X(:, i) || % % The output cost represents the average distance achieved with the % returned U. Notice that norms are not squared, for robustness. % % In practice, because this function is nonsmooth, it is smoothed with a % pseudo-Huber loss function of parameter epsilon (noted e for short), and % the smoothing parameter is iteratively reduced (with warm starts): % % f_e(U) = (1/n) Sum_{i = 1:n} l_e(|| U*U'*X(:, i) - X(:, i) ||) % % with l_e(x) = sqrt(x^2 + e^2) - e (for e = 0, this is absolute value). % % The intermediate optimization of the smooth cost over the Grassmann % manifold is performed using the Manopt toolbox. % % Ideally, the non-outlier data should be centered. If not, this % pre-processing centers all the data, but bear in mind that outliers will % shift the center of mass too. % X = X - repmat(mean(X, 2), [1, size(X, 2)]); % % There are no guarantees that this code will return the optimal U. % This code is distributed to illustrate one possible way of optimizing % a nonsmooth cost function over a manifold, using Manopt with smoothing. % For practical use, the constants in the code would need to be tuned. % This file is part of Manopt and is copyrighted. See the license file. % % Main author: Nicolas Boumal and Teng Zhang, May 2, 2014 % Contributors: % % Change log: % % March 4, 2015 (NB): % Uses a pseudo-Huber loss rather than a Huber loss: this has the % nice advantage of being smooth and simpler to code (no if's). % % April 8, 2015 (NB): % Built-in test data for quick tests; added comment about centering. % If no inputs, generate random data for illustration purposes. if nargin == 0 % Generate some data points aligned on a subspace X = rand(2, 1)*(1:30) + .05*randn(2, 30).*[(1:30);(1:30)]; % And add some random outliers to the mix P = randperm(size(X, 2)); outliers = 10; X(:, P(1:outliers)) = 30*randn(2, outliers); % Center the data % X = X - repmat(mean(X, 2), [1, size(X, 2)]); d = 1; end % Prepare a Manopt problem structure for optimization of the given % cost (defined below) over the Grassmann manifold. [p, n] = size(X); manifold = grassmannfactory(p, d); problem.M = manifold; problem.cost = @robustpca_cost; problem.egrad = @robustpca_gradient; % Do classical PCA for the initial guess. % This is just one idea: it is not necessarily useful or ideal. % Using a random initial guess, and starting over for a few different % ones is probably much better. For this example, we keep it simple. [U, ~, ~] = svds(X, d); % Iteratively reduce the smoothing constant epsilon and optimize % the cost function over Grassmann. epsilon = 1; n_iterations = 6; reduction = .5; options.verbosity = 2; % Change this number for more or less output warning('off', 'manopt:getHessian:approx'); for iter = 1 : n_iterations U = trustregions(problem, U, options); epsilon = epsilon * reduction; end warning('on', 'manopt:getHessian:approx'); % Return the cost as the actual sum of distances, not smoothed. epsilon = 0; cost = robustpca_cost(U); % If working with the auto-generated input, plot the results. if nargin == 0 scatter(X(1,:), X(2,:)); hold on; plot(U(1)*[-1, 1]*100, U(2)*[-1 1]*100, 'r'); hold off; % Compare to a standard PCA [Upca, ~, ~] = svds(X,1); hold on; plot(Upca(1)*[-1, 1]*100, Upca(2)*[-1 1]*100, 'k'); hold off; xlim(1.1*[min(X(1,:)), max(X(1,:))]); ylim(1.1*[min(X(2,:)), max(X(2,:))]); legend('data points', 'Robust PCA fit', 'Standard PCA fit'); end % Smoothed cost function value = robustpca_cost(U) vecs = U*(U'*X) - X; sqnrms = sum(vecs.^2, 1); vals = sqrt(sqnrms + epsilon^2) - epsilon; value = mean(vals); end % Euclidean gradient of the smoothed cost (it will be transformed into % the Riemannian gradient automatically by Manopt). function G = robustpca_gradient(U) % Note that the computation of vecs and sqnrms is redundant % with their computation in the cost function. To speed % up the code, it would be wise to use the caching capabilities % of Manopt (the store structure). See online documentation. % It is not done here to keep the code a bit simpler. UtX = U'*X; vecs = U*UtX-X; sqnrms = sum(vecs.^2, 1); % This explicit loop is a bit slow: the code below is equivalent % and faster to compute the gradient. % G = zeros(p, d); % for i=1:n % G = G + (1/sqrt(sqnrms(i) + epsilon^2)) * vecs(:,i) * UtX(:,i)'; % end % G = G/n; G = mean(multiscale(1./sqrt(sqnrms + epsilon^2), ... multiprod(reshape(vecs, [p, 1, n]), ... multitransp(reshape(UtX, [d, 1, n])))), 3); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/shapefit_smoothed.m ================================================ function [T_hub, T_lsq, T_cvx] = shapefit_smoothed(V, J) % ShapeFit formulation for sensor network localization from pair directions % % function [T_hub, T_lsq, T_cvx] = shapefit_smoothed(V, J) % % This example in based on the paper http://arxiv.org/abs/1506.01437: % ShapeFit: Exact location recovery from corrupted pairwise directions, 2015 % by Paul Hand, Choongbum Lee and Vladislav Voroninski. % % The problem is the following: there are n points t_1, ..., t_n in R^d % which need to be estimated (localized). To this end, we are given % measurements of some of the pairwise directions, % v_ij = (t_i - t_j) / norm(t_i - t_j) + noise. % Assume there are m such pairwise measurements, defining a graph with m % edges over n nodes. J is the signed incidence matrix of this graph (see % in code). To build J from lists I, J in R^m of nodes, use: % J = sparse([I ; J], [(1:m)' ; (1:m)'], [ones(m, 1), -ones(m, 1)], n, m, 2*m); % % The measurements are arranged in the matrix V of size d x m. From V, we % attempt to estimate t_1, ..., t_n, arranged in T, a matrix of size d x n. % The estimation can only be done up to translation and scaling. The % returned T's are centered: the columns sum to zero. % % ShapeFit is a formulation of this estimation problem which is robust to % outliers. It is a nonsmooth, convex optimization problem over an affine % space, i.e., a linear manifold. We smooth the cost using the pseudo-Huber % loss cost and solve the problem using Manopt. This requires two % ingredients: (1) a factory to describe the affine space, see % shapefitfactory; (2) defining the cost and its derivative, and minimizing % it while progressively tightening the smooth approximation (with % warm-start). % % Simply run the example to see the results on random data. It compares the % smoothed ShapeFit formulation against a least-squares formulation, when % the measurements include outliers. See in code to vary the noise % parameters, dimension d, number of nodes n, number of measurements m, ... % % Note: since the problem is convex, this returns the global optimum. % This example also illustrates the use of Manopt for optimization under % linear constraints: admittedly a simple subcase of optimization on % manifolds. % % % See also: shapefitfactory % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, June 18, 2015. % Contributors: % Change log: % Generic useful functions center_cols = @(A) bsxfun(@minus, A, mean(A, 2)); normalize_cols = @(A) bsxfun(@times, A, 1./sqrt(sum(A.^2, 1))); sqnorm_cols = @(A) sum(A.^2, 1); % DATA GENERATION % % If no inputs are specified, generate some random data for % illustration purposes. if nargin == 0 % We estimate n points in R^d d = 2; n = 500; % Those points are the columns of T : they are what we need to % estimate, up to scaling and translation. We center T for % convenience. T_tru = center_cols(rand(d, n)); % We get a measurement of some pairs of relative directions. % Which pairs is encoded in this graph, with J being the (signed, % transposed) incidence matrix. J is n x m, sparse. % There are roughly edge_fraction * n * (n-1) / 2 measurements. edge_fraction = 0.10; % [ii, jj] = erdosrenyi(n, edge_fraction); [ii, jj] = randomgraph(n, edge_fraction*nchoosek(n, 2)); m = length(ii); J = sparse([ii ; jj], [(1:m)' ; (1:m)'], ... [ones(m, 1), -ones(m, 1)], n, m, 2*m); % The measurements give the directions from one point to another. % That is: we get the position difference, normalized. Here, with % Gaussian noise. Least-squares will be well-suited for this. sigma = .0; V = normalize_cols(T_tru*J + sigma*randn(d, m)); % d x m % Outliers: we replace some of the direction measurements by % uniformly random unit-norm vectors. outlier_fraction = .3; outliers = rand(1, m) < outlier_fraction; V(:, outliers) = normalize_cols(randn(d, sum(outliers))); end % done generating random data [d, m] = size(V); n = size(J, 1); assert(size(J, 2) == m, 'J must be n x m, with V of size d x m.'); VJt = full(V*J'); % This "manifold" describes the Euclidean space of matrices T of size % d x n such that = 1 and T has centered columns: T1 = 0. problem.M = shapefitfactory(VJt); % This linear operator computes the orthogonal projection of each % difference ti - tj on the orthogonal space to v_ij. % If the alignment is compatible with the data, then this is zero. % A(T) is a d x m matrix. function AT = A(T) TJ = T*J; AT = TJ - bsxfun(@times, V, sum(V .* TJ, 1)); end % Need the adjoint of A, too. Input is d x m, output is d x n. Astar = @(W) (W - bsxfun(@times, V, sum(V.*W, 1)))*J'; % LEAST-SQUARES % % First, work with a least-squares formulation of the problem. % That is, we minimize a (very nice) convex cost over an affine space. % Since the smooth solvers in Manopt converge to critical points, this % means they converge to global optimizers. problem.cost = @(T) 0.5*norm(A(T), 'fro')^2; problem.egrad = @(T) Astar(A(T)); problem.ehess = @(T, Tdot) Astar(A(Tdot)); T_lsq = trustregions(problem); % PSEUDO-HUBER SMOOTHED SHAPEFIT % % Now solve the same, but with a pseudo-Huber loss instead of % least-squares. % We iteratively sharpen the Huber function, i.e., reduce delta. % It is important to warm start in such a fashion: trying to optimize % with a random initial guess and a very small delta is typically slow. % How fast one should decrease delta, and how accurately one should % optimize at each intermediate stage, is open for research. delta = 1; T_hub = []; % We could use T_lsq as initial guess, too. problem = rmfield(problem, 'ehess'); warning('off', 'manopt:getHessian:approx'); for iter = 1 : 12 delta = delta / 2; h = @(x2) sqrt(x2 + delta^2) - delta; % pseudo-Huber loss problem.cost = @(T) sum(h(sqnorm_cols(A(T)))); problem.egrad = @(T) Astar(bsxfun(@times, A(T), ... 1./sqrt(sqnorm_cols(A(T)) + delta^2))); % Solve, using the previous solution as initial guess. T_hub = trustregions(problem, T_hub); end % CVX SHAPEFIT % % Actual ShapeFit cost (nonsmooth), with CVX. % You can get CVX from http://cvxr.com/. use_cvx_if_available = false; if use_cvx_if_available && exist('cvx_version', 'file') T_cvx = shapefit_cvx(V, J); else T_cvx = NaN(d, n); end % VISUALIZATION % % If T_true is available, for display, we scale the estimators to match % the norm of the target. The scaling factor is obtained by minimizing % the norm of the discrepancy : norm(T_tru - scale*T_xxx, 'fro'). % A plot is produced if d is 2 or 3. if exist('T_tru', 'var') && (d == 2 || d == 3) T_lsq = T_lsq * trace(T_tru'*T_lsq) / norm(T_lsq, 'fro')^2; T_hub = T_hub * trace(T_tru'*T_hub) / norm(T_hub, 'fro')^2; T_cvx = T_cvx * trace(T_tru'*T_cvx) / norm(T_cvx, 'fro')^2; switch d case 2 plot(T_tru(1, :), T_tru(2, :), 'bo', ... T_lsq(1, :), T_lsq(2, :), 'rx', ... T_hub(1, :), T_hub(2, :), 'k.', ... T_cvx(1, :), T_cvx(2, :), 'g.'); case 3 plot3(T_tru(1, :), T_tru(2, :), T_tru(3, :), 'bo', ... T_lsq(1, :), T_lsq(2, :), T_lsq(3, :), 'rx', ... T_hub(1, :), T_hub(2, :), T_hub(3, :), 'k.', ... T_cvx(1, :), T_cvx(2, :), T_cvx(3, :), 'g.'); end legend('ground truth', 'least squares', ... sprintf('pseudo-huber, \\delta = %.1e', delta), ... 'CVX ShapeFit'); title(sprintf(['ShapeFit problem : d = %d, n = %d, edge ' ... 'fraction = %.2g, sigma = %.2g, outlier ' ... 'fraction = %.2g'], d, n, edge_fraction, sigma, ... outlier_fraction)); axis equal; end end % If CVX is available, it can be used to solve the nonsmooth problem % directly, very elegantly. function T_cvx = shapefit_cvx(V, J) d = size(V, 1); n = size(J, 1); %#ok VJt = full(V*J'); cvx_begin variable T_cvx(d, n) % We want to minimize this: % minimize sum( norms( A(T_cvx), 2, 1 ) ) % But unfortunately, CVX doesn't handle bsxfun. Instead, we use % repmat, which is slower, and hence hurts the comparison in % disfavor of CVX. minimize sum( norms( T_cvx*J - V .* repmat(sum(V .* (T_cvx*J), 1), [d, 1]) , 2, 1 ) ) sum(T_cvx, 2) == zeros(d, 1); %#ok VJt(:).' * T_cvx(:) == 1; %#ok cvx_end end function [I, J, A] = erdosrenyi(n, p) %#ok % Generate a random Erdos-Renyi graph with n nodes and edge probability p. % % [I, J, A] = erdosrenyi(n, p) % % Returns a list of edges (I(k), J(k)) for a random, undirected Erdos-Renyi % graph with n nodes and edge probability p. A is the adjacency matrix. % % I(k) < J(k) for all k, i.e., all(I % The optimization takes place over a Stiefel manifold whose dimension % is independent of n. This is especially useful when there are many % more variables than samples. St = stiefelfactory(p, m); problem.M = St; % In this helper function, given a point 'X' on the manifold we check % whether the caching structure 'store' has been populated with % quantities that are useful to compute at X or not. If they were not, % then we compute and store them now. function store = prepare(X, store) if ~isfield(store, 'ready') || ~store.ready store.AtX = A'*X; store.absAtX = abs(store.AtX); store.pos = max(0, store.absAtX - gamma); store.ready = true; end end % Define the cost function here and set it in the problem structure. problem.cost = @cost; function [f, store] = cost(X, store) store = prepare(X, store); pos = store.pos; f = -.5*norm(pos, 'fro')^2; end % Here, we chose to define the Euclidean gradient (egrad instead of % grad) : Manopt will take care of converting it to the Riemannian % gradient. problem.egrad = @egrad; function [G, store] = egrad(X, store) if ~isfield(store, 'G') store = prepare(X, store); pos = store.pos; AtX = store.AtX; sgAtX = sign(AtX); factor = pos.*sgAtX; store.G = -A*factor; end G = store.G; end % checkgradient(problem); % pause; % The optimization happens here. To improve the method, it may be % interesting to investigate better-than-random initial iterates and, % possibly, to fine tune the parameters of the solver. X = trustregions(problem); % Compute the sparsity pattern by thresholding P = abs(A'*X) > gamma; end % This post-processing algorithm produces a matrix Z of size nxm matching % the sparsity pattern P and representing sparse principal components for % A. This is to be called with the output of the main algorithm. This % algorithm is described in the reference paper by Journee et al. function Z = postprocess(A, P, X) fprintf('Post-processing... '); counter = 0; maxiter = 1000; tolerance = 1e-8; while counter < maxiter Z = A'*X; Z(~P) = 0; Z = Z*diag(1./sqrt(diag(Z'*Z))); X = ufactor(A*Z); counter = counter + 1; if counter > 1 && norm(Z0-Z, 'fro') < tolerance*norm(Z0, 'fro') break; end Z0 = Z; end fprintf('done, in %d iterations (max = %d).\n', counter, maxiter); end % Returns the U-factor of the polar decomposition of X function U = ufactor(X) [W, S, V] = svd(X, 0); %#ok U = W*V'; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/thomson_problem.m ================================================ function X = thomson_problem(n, d) % Simple attempt at computing n well distributed points on a sphere in R^d. % % This is an example of how Manopt can approximate the gradient and even % the Hessian of a cost function based on finite differences, even if only % the cost function is specified without its derivatives. % % This functionality is provided only as a help for prototyping, and should % not be used to compare algorithms in terms of computation time or % accuracy, because the underlying gradient approximation scheme is slow. % % See also the derivative free solvers for an alternative: % pso and neldermead. % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Nov. 1, 2016 % Contributors: % Change log: if ~exist('n', 'var') || isempty(n) n = 50; end if ~exist('d', 'var') || isempty(d) d = 3; end % Define the Thomson problem with 1/r^2 potential. That is: find n points % x_i on a sphere in R^d such that the sum over all pairs (i, j) of the % potentials 1/||x_i - x_j||^2 is minimized. Since the points are on a % sphere, each potential is .5/(1-x_i'*x_j). problem.M = obliquefactory(d, n); problem.cost = @(X) sum(sum(triu(1./(1-X'*X), 1))) / n^2; % Attempt to minimize the cost. Since the gradient is not provided, Manopt % approximates it with finite differences. This is /slow/, since for each % gradient approximation, problem.M.dim()+1 calls to the cost function are % necessary, on top of generating an orthonormal basis of the tangent space % at each iterate. % % Note that it is difficult to reach high accuracy critical points with an % approximate gradient, hence it may be required to set a less ambitious % value for the gradient norm tolerance. opts.tolgradnorm = 1e-4; % Pick a solver. Both work fairly well on this problem. % X = conjugategradient(problem, [], opts); X = rlbfgs(problem, [], opts); % Plot the points on a translucid sphere if nargout == 0 && d == 3 [x, y, z] = sphere(50); surf(x, y, z, 'FaceAlpha', .5); hold all; plot3(X(1, :), X(2, :), X(3, :), '.', 'MarkerSize', 20); axis equal; box off; axis off; end % For much better performance, after an early prototyping phase, the % gradient of the cost function should be specified, typically in % problem.grad or problem.egrad. See the online document at % % http://www.manopt.org % % for more information. end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/truncated_svd.m ================================================ function [U, S, V, info] = truncated_svd(A, p) % Returns an SVD decomposition of A truncated to rank p. % % function [U, S, V, info] = truncated_svd(A, p) % % Input: A real matrix A of size mxn and an integer p <= min(m, n). % Output: An orthonormal matrix U of size mxp, an orthonormal matrix Y of % size nxp and a diagonal matrix S of size pxp with nonnegative and % decreasing diagonal entries such that USV.' is the best rank p % approximation of A according to the Frobenius norm. All real. % This function produces an output akin to svds. % % The decomposition is obtained by maximizing % f(U, V) = .5*norm(U'*A*V, 'fro')^2 % where U, V are orthonormal. Notice that f(U*Q, V*R) = f(U, V) for all % Q, R orthogonal pxp matrices. Hence, only the column spaces of U and V % matter and we may perform the optimization over a product of two % Grassmannian manifolds. % % It is easy to show that maximizing f is equivalent to minimizing g with % g(U, V) = min_S norm(U*S*V' - A, 'fro')^2, % which confirms that we are going for a best low-rank approximation of A. % % The inner workings of the Grassmann manifold use the built-in svd % function of Matlab but only for matrices of size mxp and nxp to % re-orthonormalize them. % % Notice that we are actually chasing a best fixed-rank approximation of a % matrix, which is best obtained by working directly over a manifold of % fixed-rank matrices. This is simply an example script to demonstrate some % functionalities of the toolbox. % % The code can be modified to accept a function handle for A(x) = A*x % instead of a matrix A, which is often useful. This would further require % a function handle At for the transpose of A, such that At(x) = A.'*x. % This file is part of Manopt and is copyrighted. See the license file. % % Main author: Nicolas Boumal, July 5, 2013 % Contributors: % % Change log: % % Generate some random data to test the function if none is given. if ~exist('A', 'var') || isempty(A) A = randn(42, 60); end if ~exist('p', 'var') || isempty(p) p = 5; end % Retrieve the size of the problem and make sure the requested % approximation rank is at most the maximum possible rank. [m, n] = size(A); assert(p <= min(m, n), 'p must be smaller than the smallest dimension of A.'); % Define the cost and its derivatives on the Grassmann manifold tuple.U = grassmannfactory(m, p); tuple.V = grassmannfactory(n, p); % All of the code will work just as well if we ignore the invariance % property of the cost function indicated above and thus place U and V % on the Stiefel manifold (orthonormal matrices) instead of the % Grassmann manifold. Working on Stiefel is expected to be slower % though, partly because de search space is higher dimensional and % partly because the optimizers are not isolated. % tuple.U = stiefelfactory(m, p); % tuple.V = stiefelfactory(n, p); M = productmanifold(tuple); % Define a problem structure, specifying the manifold M, the cost % function and its derivatives. Here, to demonstrate the rapid % prototyping capabilities of Manopt, we directly define the Euclidean % gradient and the Euclidean Hessian egrad and ehess instead of the % Riemannian gradient and Hessian grad and hess. Manopt will take care % of the conversion. This automatic conversion is usually not % computationally optimal though, because much of the computations % involved in obtaining the gradient could be reused to obtain the % Hessian. After the prototyping stage, when efficiency becomes % important, it makes sense to define grad and hess rather than egrad % an ehess, and to use the caching system (the store structure). problem.M = M; problem.cost = @cost; problem.egrad = @egrad; problem.ehess = @ehess; % The functions below make many redundant computations. This % performance hit can be alleviated by using the caching system. % Cost function function f = cost(X) U = X.U; V = X.V; f = -.5*norm(U'*A*V, 'fro')^2; end % Euclidean gradient of the cost function function g = egrad(X) U = X.U; V = X.V; AV = A*V; AtU = A'*U; g.U = -AV*(AV'*U); g.V = -AtU*(AtU'*V); end % Euclidean Hessian of the cost function function h = ehess(X, H) U = X.U; V = X.V; Udot = H.U; Vdot = H.V; AV = A*V; AtU = A'*U; AVdot = A*Vdot; AtUdot = A'*Udot; h.U = -(AVdot*AV'*U + AV*AVdot'*U + AV*AV'*Udot); h.V = -(AtUdot*AtU'*V + AtU*AtUdot'*V + AtU*AtU'*Vdot); end % Execute some checks on the derivatives for early debugging. % These things can be commented out of course. % checkgradient(problem); % pause; % checkhessian(problem); % pause; % Issue a call to a solver. A random initial guess will be chosen and % default options are selected. Here, we specify a maximum trust % region radius (which in turn induces an initial trust region radius). % Note that this is not required: default values are used if we omit % this. The diameter of the manifold scales like sqrt(2*p), hence the % form of our (empirical) choice. options.Delta_bar = 4*sqrt(2*p); [X, Xcost, info] = trustregions(problem, [], options); %#ok U = X.U; V = X.V; % Finish the job by rotating U and V such that the middle matrix S can % be diagonal with nonnegative, decreasing entries. This requires a % small svd of size pxp. Spp = U'*A*V; [Upp, Spp, Vpp] = svd(Spp); U = U*Upp; S = Spp; V = V*Vpp; % For our information, Manopt can also compute the spectrum of the % Riemannian Hessian on the tangent space at (any) X. Computing the % spectrum at the solution gives us some idea of the conditioning of % the problem. If we were to implement a preconditioner for the % Hessian, this would also inform us on its performance. % % Notice that if the optimization is performed on a product of Stiefel % manifolds instead of a product of Grassmannians, the double % invariance under the orthogonal group O(p) will appear as twice % p*(p-1)/2, thus p*(p-1) zero eigenvalues in the spectrum of the % Hessian. This means that the minimizers are not isolated, which % typically hinders convergence of second order algorithms. if M.dim() < 512 evs = hessianspectrum(problem, X); stairs(sort(evs)); title(['Eigenvalues of the Hessian of the cost function ' ... 'at the solution']); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/importmanopt.m ================================================ % Add Manopt to the path to make all manopt components available. % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Jan. 3, 2013. % Contributors: % Change log: % Aug. 7, 2013 (NB): Changed to work without the import command % (new structure of the toolbox). % Aug. 8, 2013 (NB): Changed to use addpath_recursive, home brewed. % Aug. 22, 2013 (NB): Using genpath instead of home cooked % addpath_recursive. addpath(pwd); % Recursively add Manopt directories to the Matlab path. cd manopt; addpath(genpath(pwd)); cd ..; ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/StoreDB.m ================================================ classdef StoreDB < handle_light % The StoreDB class is a handle class to manage caching in Manopt. % % To create an object, call: storedb = StoreDB(); % Alternatively, call: storedb = StoreDB(storedepth); to instruct % the database to keep at most storedepth store's in its history. % (Note that clean up only happens when purge() is called). % % The storedb object is passed by reference: when it is passed to a % function as an input, and that function modifies it, the original % object is modified. % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, April 3, 2015. % Contributors: % Change log: % TODO : protect get/setWithShared calls: limit to one, and forbid access % to shared memory while it has not been returned. % Do think of the applyStatsFun case : calls a getWithShared, does % not need a setWithShared. I think for statsfun there should be a % method "forfeitWithShared". properties(Access = public) % This memory is meant to be shared at all times. Users can modify % this at will. It is the same for all points x. shared = struct(); % This memory is used by the toolbox for, e.g., automatic caching % and book keeping. Users should not overwrite this. It is the % same for all points x. internal = struct(); % When calling purge(), only a certain number of stores will be % kept in 'history'. This parameter fixes that number. The most % recently modified stores are kept. Set to inf to keep all stores. storedepth = inf; end properties(Access = private) % This structure holds separate memories for individual points. % Use get and set to interact with this. The field name 'shared' is % reserved, for use with get/setWithShared. history = struct(); % This internal counter is used to obtain unique key's for points. counter = uint32(0); % This internal counter is used to time calls to 'set', and hence % keep track of which stores in 'history' were last updated. timer = uint32(0); end methods(Access = public) % Constructor function storedb = StoreDB(storedepth) if nargin >= 1 storedb.storedepth = storedepth; end end % Return the store associated to a given key. % If the key is unknown, returns an empty structure. function store = get(storedb, key) if isfield(storedb.history, key) store = storedb.history.(key); else store = struct(); end end % Same as get, but adds the shared memory in store.shared. function store = getWithShared(storedb, key) store = storedb.get(key); store.shared = storedb.shared; end % Save the given store at the given key. If no key is provided, a % new key is generated for this store (i.e., it is assumed this % store pertains to a new point). The key is returned in all cases. % A field 'lastset__' is added/updated in the store structure, % keeping track of the last time that store was modified. function key = set(storedb, store, key) if nargin < 3 key = getNewKey(storedb); end store.lastset__ = storedb.timer; storedb.timer = storedb.timer + 1; storedb.history.(key) = store; end % Same as set, but extracts the shared memory and saves it. % The stored store will still have a 'shared' field, but it will be % empty. function key = setWithShared(storedb, store, key) storedb.shared = store.shared; store.shared = []; key = storedb.set(store, key); end % Generate a unique key and return it. This should be called % everytime a new point is generated / stored. Keys are valid field % names for structures. function key = getNewKey(storedb) key = sprintf('z%d', storedb.counter); storedb.counter = storedb.counter + 1; end % Clear entries in storedb.history to limit memory usage. function purge(storedb) if isinf(storedb.storedepth) return; end if storedb.storedepth <= 0 storedb.history = struct(); return; end % Get list of field names (keys). keys = fieldnames(storedb.history); nkeys = length(keys); % If we need to remove some of the elements in the database, if nkeys > storedb.storedepth % Get the last-set counter of each element: % a higher number means it was modified more recently. lastset = zeros(nkeys, 1, 'uint32'); for i = 1 : nkeys lastset(i) = storedb.history.(keys{i}).lastset__; end % Sort the counters and determine the threshold above which % the field needs to be removed. sortlastset = sort(lastset, 1, 'descend'); minlastset = sortlastset(storedb.storedepth); % Remove all fields that are too old. storedb.history = rmfield(storedb.history, ... keys(lastset < minlastset)); end end % end of purge() end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/applyStatsfun.m ================================================ function stats = applyStatsfun(problem, x, storedb, key, options, stats) % Apply the statsfun function to a stats structure (for solvers). % % function stats = applyStatsfun(problem, x, storedb, key, options, stats) % % Applies the options.statsfun user supplied function (if it was provided) % to the stats structure, and returns the (possibly) modified stats % structure. % % storedb is a StoreDB object, key is the StoreDB key to point x. % % Note: if statsfun accepts a store structure as input, this structure can % be read but not modified (modifications will be lost) ; the store % structure will contain the store.shared field. % % See also: % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, April 3, 2013. % Contributors: % Change log: % % April 3, 2015 (NB): % Works with the new StoreDB class system. if isfield(options, 'statsfun') switch nargin(options.statsfun) case 3 stats = options.statsfun(problem, x, stats); case 4 % Obtain, pass along, and save the store for x. % get/setWithShared must come in pairs. store = storedb.getWithShared(key); stats = options.statsfun(problem, x, stats, store); storedb.setWithShared(store, key); otherwise warning('manopt:statsfun', ... 'statsfun unused: wrong number of inputs'); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetApproxGradient.m ================================================ function candoit = canGetApproxGradient(problem) % Checks whether an approximate gradient can be computed for this problem. % % function candoit = canGetApproxGradient(problem) % % Returns true if an approximate gradient of the cost function is provided % in the given problem description, false otherwise. % If a gradient is defined but no approximate gradient is defined % explicitly, returns false. % % Even if this returns false, calls to getApproxGradient may succeed, as % they will be redirected to getGradientFD. The latter simply requires % availability of the cost in problem. % % See also: canGetGradient getGradientFD % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Nov. 1, 2016. % Contributors: % Change log: candoit = isfield(problem, 'approxgrad'); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetApproxHessian.m ================================================ function candoit = canGetApproxHessian(problem) % Checks whether an approximate Hessian can be computed for this problem. % % function candoit = canGetApproxHessian(problem) % % Returns true if an approximate Hessian of the cost function is provided % in the given problem description, false otherwise. % If a Hessian is defined but no approximate Hessian is defined explicitly, % returns false. % % Even if this returns false, calls to getApproxHessian may succeed, as % they will be redirected to getHessianFD. The latter simply requires % availability of gradients in problem, and vector transports in problem.M. % % See also: canGetHessian getHessianFD % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, April 8, 2015. % Contributors: % Change log: candoit = isfield(problem, 'approxhess'); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetCost.m ================================================ function candoit = canGetCost(problem) % Checks whether the cost function can be computed for a problem structure. % % function candoit = canGetCost(problem) % % Returns true if the cost function can be computed given the problem % description, false otherwise. % % See also: getCost canGetDirectionalDerivative canGetGradient canGetHessian % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: candoit = isfield(problem, 'cost') || isfield(problem, 'costgrad'); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetDirectionalDerivative.m ================================================ function candoit = canGetDirectionalDerivative(problem) % Checks whether dir. derivatives can be computed for a problem structure. % % function candoit = canGetDirectionalDerivative(problem) % % Returns true if the directional derivatives of the cost function can be % computed given the problem description, false otherwise. % % See also: canGetCost canGetGradient canGetHessian % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: candoit = isfield(problem, 'diff') || canGetGradient(problem); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetEuclideanGradient.m ================================================ function candoit = canGetEuclideanGradient(problem) % Checks whether the Euclidean gradient can be computed for a problem. % % function candoit = canGetEuclideanGradient(problem) % % Returns true if the Euclidean gradient can be computed given the problem % description, false otherwise. % % See also: canGetGradient getEuclideanGradient % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % % June 28, 2016 (NB): % Added support for getPartialEuclideanGradient candoit = isfield(problem, 'egrad') || canGetPartialEuclideanGradient(problem); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetGradient.m ================================================ function candoit = canGetGradient(problem) % Checks whether the gradient can be computed for a problem structure. % % function candoit = canGetGradient(problem) % % Returns true if the gradient of the cost function can be computed given % the problem description, false otherwise. % % See also: canGetCost canGetDirectionalDerivative canGetHessian % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % % June 28, 2016 (NB): % Added support for getPartialGradient % % Nov. 1, 2016 (NB): % Added support for gradient from directional derivatives candoit = isfield(problem, 'grad') || isfield(problem, 'costgrad') || ... canGetEuclideanGradient(problem) || ... canGetPartialGradient(problem) || ... ... % Check if directional derivatives can be obtained, since ... % it is possible to compute the gradient from directional ... % derivatives (expensively). Here, it is not possible to ... % call canGetDirectionalDerivative, because that function ... % would then potentially call canGetGradient, thus ... % starting an infinite loop. As a result, we have some ... % code redundancy: the check below needs to be kept ... % equivalent to the check in canGetDirectionalDerivative. isfield(problem, 'diff'); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetHessian.m ================================================ function candoit = canGetHessian(problem) % Checks whether the Hessian can be computed for a problem structure. % % function candoit = canGetHessian(problem) % % Returns true if the Hessian of the cost function can be computed given % the problem description, false otherwise. % % See also: canGetCost canGetDirectionalDerivative canGetGradient % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: candoit = isfield(problem, 'hess') || ... (isfield(problem, 'ehess') && canGetEuclideanGradient(problem)); % Display an extra warning message to the user in anticipation of % common mistakes. if ~candoit && ... (isfield(problem, 'ehess') && ~canGetEuclideanGradient(problem)) warning('manopt:canGetHessian', ... ['If the Hessian is supplied as a Euclidean Hessian (ehess),\n' ... 'then the Euclidean gradient must also be supplied (egrad).']); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetLinesearch.m ================================================ function candoit = canGetLinesearch(problem) % Checks whether the problem structure can give a line-search a hint. % % function candoit = canGetLinesearch(problem) % % Returns true if the the problem description includes a mechanism to give % line-search algorithms a hint as to "how far to look", false otherwise. % % See also: getLinesearch % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, July 17, 2014. % Contributors: % Change log: candoit = isfield(problem, 'linesearch'); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetPartialEuclideanGradient.m ================================================ function candoit = canGetPartialEuclideanGradient(problem) % Checks whether the partial Euclidean gradient can be computed for a problem. % % function candoit = canGetPartialEuclideanGradient(problem) % % Returns true if the partial Euclidean gradient of the cost function can % be computed given the problem description, false otherwise. % % See also: getPartialEuclideanGradient canGetPartialGradient % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, June 28, 2016. % Contributors: % Change log: candoit = (isfield(problem, 'partialegrad') && isfield(problem, 'ncostterms')); if isfield(problem, 'partialegrad') && ~isfield(problem, 'ncostterms') warning('manopt:partialegrad', ... ['If problem.partialegrad is specified, indicate the number n\n' ... 'of terms in the cost function with problem.ncostterms = n.']); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetPartialGradient.m ================================================ function candoit = canGetPartialGradient(problem) % Checks whether the partial gradient can be computed for a given problem. % % function candoit = canGetPartialGradient(problem) % % Returns true if the partial gradient of the cost function can be computed % given the problem description, false otherwise. % % See also: getPartialGradient canGetPartialEuclideanGradient % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, June 28, 2016. % Contributors: % Change log: candoit = (isfield(problem, 'partialgrad') && isfield(problem, 'ncostterms')) || ... canGetPartialEuclideanGradient(problem); if isfield(problem, 'partialgrad') && ~isfield(problem, 'ncostterms') warning('manopt:partialgrad', ... ['If problem.partialgrad is specified, indicate the number n\n' ... 'of terms in the cost function with problem.ncostterms = n.']); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetPrecon.m ================================================ function candoit = canGetPrecon(problem) % Checks whether a preconditioner was specified in the problem description. % % function candoit = canGetPrecon(problem) % % Returns true if a preconditioner was specified, false otherwise. Notice % that even if this function returns false, it is still possible to call % getPrecon, as the default preconditioner is simply the identity operator. % This check function is mostly useful to tell whether that default % preconditioner will be in use or not. % % See also: getPrecon getSqrtPrecon canGetSqrtPrecon getHessian % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, July 3, 2013. % Contributors: % Change log: candoit = isfield(problem, 'precon') || canGetSqrtPrecon(problem); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetSqrtPrecon.m ================================================ function candoit = canGetSqrtPrecon(problem) % Checks whether a square root of preconditioner was specified in problem. % % function candoit = canGetSqrtPrecon(problem) % % Returns true if the problem structure allows for applying the square root % of a preconditioner to tangent vectors at a given point. The square root % of the preconditioner at x must be a symmetric, positive definite % operator Q such that applying Q twice (Q o Q) amounts to applying the % preconditioner once. If both a preconditioner and a square root of % preconditioner are provided, it is the user's responsibility to ensure % their compatibility. % % Similarly to getPrecon, if the present function returns false, calls to % getSqrtPrecon will still work: they will act as the identity. Note that % this may be incompatible with the preconditioner if it is given. Thus, % always check by calling canGetSqrtPrecon first. % % See also: canGetPrecon getSqrtPrecon getPrecon % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, April 3, 2015. % Contributors: % Change log: candoit = isfield(problem, 'sqrtprecon'); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetSubgradient.m ================================================ function candoit = canGetSubgradient(problem) % Checks whether a subgradient can be computed for a problem structure. % % function candoit = canGetSubgradient(problem) % % Returns true if a subgradient of the cost function can be computed given % the problem description, false otherwise. % % See also: canGetGradient % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, July 20, 2017. % Contributors: % Change log: candoit = isfield(problem, 'subgrad') || canGetGradient(problem); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getApproxGradient.m ================================================ function approxgrad = getApproxGradient(problem, x, storedb, key) % Computes an approximation of the gradient of the cost function at x. % % function approxgrad = getApproxGradient(problem, x) % function approxgrad = getApproxGradient(problem, x, storedb) % function approxgrad = getApproxGradient(problem, x, storedb, key) % % Returns an approximation of the gradient at x for the cost function % described in the problem structure. % % storedb is a StoreDB object, key is the StoreDB key to point x. % % If no approximate gradient was provided, this call is redirected to % getGradientFD. % % See also: getGradientFD canGetApproxGradient % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Nov. 1, 2016. % Contributors: % Change log: % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end if isfield(problem, 'approxgrad') %% Compute the approximate gradient using approxgrad. % Check whether this function wants to deal with storedb or not. switch nargin(problem.approxgrad); case 1 approxgrad = problem.approxgrad(x); case 2 % Obtain, pass along, and save the store for x. store = storedb.getWithShared(key); [approxgrad, store] = problem.approxgrad(x, store); storedb.setWithShared(store, key); case 3 % Pass along the whole storedb (by reference), with key. approxgrad = problem.approxgrad(x, storedb, key); otherwise up = MException('manopt:getApproxGradient:badapproxgrad', ... 'approxgrad should accept 1, 2 or 3 inputs.'); throw(up); end else %% Try to fall back to a standard FD approximation. approxgrad = getGradientFD(problem, x, storedb, key); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getApproxHessian.m ================================================ function approxhess = getApproxHessian(problem, x, d, storedb, key) % Computes an approximation of the Hessian of the cost fun. at x along d. % % function approxhess = getApproxHessian(problem, x, d) % function approxhess = getApproxHessian(problem, x, d, storedb) % function approxhess = getApproxHessian(problem, x, d, storedb, key) % % Returns an approximation of the Hessian at x along d of the cost function % described in the problem structure. % % storedb is a StoreDB object, key is the StoreDB key to point x. % % If no approximate Hessian was provided, this call is redirected to % getHessianFD. % % See also: getHessianFD canGetApproxHessian % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % % April 3, 2015 (NB): % Works with the new StoreDB class system. % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end if isfield(problem, 'approxhess') %% Compute the approximate Hessian using approxhess. % Check whether this function wants to deal with storedb or not. switch nargin(problem.approxhess); case 2 approxhess = problem.approxhess(x, d); case 3 % Obtain, pass along, and save the store for x. store = storedb.getWithShared(key); [approxhess, store] = problem.approxhess(x, d, store); storedb.setWithShared(store, key); case 4 % Pass along the whole storedb (by reference), with key. approxhess = problem.approxhess(x, d, storedb, key); otherwise up = MException('manopt:getApproxHessian:badapproxhess', ... 'approxhess should accept 2, 3 or 4 inputs.'); throw(up); end else %% Try to fall back to a standard FD approximation. approxhess = getHessianFD(problem, x, d, storedb, key); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getCost.m ================================================ function cost = getCost(problem, x, storedb, key) % Computes the cost function at x. % % function cost = getCost(problem, x) % function cost = getCost(problem, x, storedb) % function cost = getCost(problem, x, storedb, key) % % Returns the value at x of the cost function described in the problem % structure. % % storedb is a StoreDB object, key is the StoreDB key to point x. % % See also: canGetCost % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % % April 3, 2015 (NB): % Works with the new StoreDB class system. % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end if isfield(problem, 'cost') %% Compute the cost function using cost. % Check whether this function wants to deal with storedb or not. switch nargin(problem.cost) case 1 cost = problem.cost(x); case 2 % Obtain, pass along, and save the store for x. store = storedb.getWithShared(key); [cost, store] = problem.cost(x, store); storedb.setWithShared(store, key); case 3 % Pass along the whole storedb (by reference), with key. cost = problem.cost(x, storedb, key); otherwise up = MException('manopt:getCost:badcost', ... 'cost should accept 1, 2 or 3 inputs.'); throw(up); end elseif isfield(problem, 'costgrad') %% Compute the cost function using costgrad. % Check whether this function wants to deal with storedb or not. switch nargin(problem.costgrad) case 1 cost = problem.costgrad(x); case 2 % Obtain, pass along, and save the store for x. store = storedb.getWithShared(key); [cost, grad, store] = problem.costgrad(x, store); %#ok storedb.setWithShared(store, key); case 3 % Pass along the whole storedb (by reference), with key. cost = problem.costgrad(x, storedb, key); otherwise up = MException('manopt:getCost:badcostgrad', ... 'costgrad should accept 1, 2 or 3 inputs.'); throw(up); end else %% Abandon computing the cost function. up = MException('manopt:getCost:fail', ... ['The problem description is not explicit enough to ' ... 'compute the cost.']); throw(up); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getCostGrad.m ================================================ function [cost, grad] = getCostGrad(problem, x, storedb, key) % Computes the cost function and the gradient at x in one call if possible. % % function [cost, grad] = getCostGrad(problem, x) % function [cost, grad] = getCostGrad(problem, x, storedb) % function [cost, grad] = getCostGrad(problem, x, storedb, key) % % Returns the value at x of the cost function described in the problem % structure, as well as the gradient at x. % % storedb is a StoreDB object, key is the StoreDB key to point x. % % See also: canGetCost canGetGradient getCost getGradient % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % % April 3, 2015 (NB): % Works with the new StoreDB class system. % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end if isfield(problem, 'costgrad') %% Compute the cost/grad pair using costgrad. % Check whether this function wants to deal with storedb or not. switch nargin(problem.costgrad) case 1 [cost, grad] = problem.costgrad(x); case 2 % Obtain, pass along, and save the store for x. store = storedb.getWithShared(key); [cost, grad, store] = problem.costgrad(x, store); storedb.setWithShared(store, key); case 3 % Pass along the whole storedb (by reference), with key. [cost, grad] = problem.costgrad(x, storedb, key); otherwise up = MException('manopt:getCostGrad:badcostgrad', ... 'costgrad should accept 1, 2 or 3 inputs.'); throw(up); end else %% Revert to calling getCost and getGradient separately cost = getCost(problem, x, storedb, key); grad = getGradient(problem, x, storedb, key); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getDirectionalDerivative.m ================================================ function diff = getDirectionalDerivative(problem, x, d, storedb, key) % Computes the directional derivative of the cost function at x along d. % % function diff = getDirectionalDerivative(problem, x, d) % function diff = getDirectionalDerivative(problem, x, d, storedb) % function diff = getDirectionalDerivative(problem, x, d, storedb, key) % % Returns the derivative at x along d of the cost function described in the % problem structure. % % storedb is a StoreDB object, key is the StoreDB key to point x. % % See also: getGradient canGetDirectionalDerivative % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % % April 3, 2015 (NB): % Works with the new StoreDB class system. % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end if isfield(problem, 'diff') %% Compute the directional derivative using diff. % Check whether this function wants to deal with storedb or not. switch nargin(problem.diff) case 2 diff = problem.diff(x, d); case 3 % Obtain, pass along, and save the store for x. store = storedb.getWithShared(key); [diff, store] = problem.diff(x, d, store); storedb.setWithShared(store, key); case 4 % Pass along the whole storedb (by reference), with key. diff = problem.diff(x, d, storedb, key); otherwise up = MException('manopt:getDirectionalDerivative:baddiff', ... 'diff should accept 2, 3 or 4 inputs.'); throw(up); end elseif canGetGradient(problem) %% Compute the directional derivative using the gradient. % Compute the gradient at x, then compute its inner product with d. grad = getGradient(problem, x, storedb, key); diff = problem.M.inner(x, grad, d); else %% Abandon computing the directional derivative. up = MException('manopt:getDirectionalDerivative:fail', ... ['The problem description is not explicit enough to ' ... 'compute the directional derivatives of f.']); throw(up); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getEuclideanGradient.m ================================================ function egrad = getEuclideanGradient(problem, x, storedb, key) % Computes the Euclidean gradient of the cost function at x. % % function egrad = getEuclideanGradient(problem, x) % function egrad = getEuclideanGradient(problem, x, storedb) % function egrad = getEuclideanGradient(problem, x, storedb, key) % % Returns the Euclidean gradient at x of the cost function described in the % problem structure. % % storedb is a StoreDB object, key is the StoreDB key to point x. % % Because computing the Hessian based on the Euclidean Hessian will require % the Euclidean gradient every time, to avoid overly redundant % computations, if the egrad function does not use the store caching % capabilites, this implements an automatic caching functionality. Writing % egrad to accept the optional store or storedb parameter will disable % automatic caching, but allow user controlled caching. % % See also: getGradient canGetGradient canGetEuclideanGradient % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, July 9, 2013. % Contributors: % Change log: % % April 3, 2015 (NB): % Works with the new StoreDB class system. % % June 28, 2016 (NB): % Added support for getPartialEuclideanGradient % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end if isfield(problem, 'egrad') %% Compute the Euclidean gradient using egrad. % Check whether this function wants to deal with storedb or not. switch nargin(problem.egrad) case 1 % If it does not want to deal with the store structure, % then we do some caching of our own. There is a small % performance hit for this is some cases, but we expect % that this is most often the preferred choice. store = storedb.get(key); if ~isfield(store, 'egrad__') store.egrad__ = problem.egrad(x); storedb.set(store, key); end egrad = store.egrad__; case 2 % Obtain, pass along, and save the store for x. % If the user deals with the store structure, then we don't % do any automatic caching: the user is in control. store = storedb.getWithShared(key); [egrad, store] = problem.egrad(x, store); storedb.setWithShared(store, key); case 3 % Pass along the whole storedb (by reference), with key. % Same here: no automatic caching. egrad = problem.egrad(x, storedb, key); otherwise up = MException('manopt:getEuclideanGradient:badegrad', ... 'egrad should accept 1, 2 or 3 inputs.'); throw(up); end elseif canGetPartialEuclideanGradient(problem) %% Compute the Euclidean gradient using a full partial Euclidean gradient. d = problem.ncostterms; egrad = getPartialEuclideanGradient(problem, x, 1:d, storedb, key); else %% Abandon computing the Euclidean gradient up = MException('manopt:getEuclideanGradient:fail', ... ['The problem description is not explicit enough to ' ... 'compute the Euclidean gradient of the cost.']); throw(up); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getGlobalDefaults.m ================================================ function opts = getGlobalDefaults() % Returns a structure with default option values for Manopt. % % function opts = getGlobalDefaults() % % Returns a structure opts containing the global default options such as % verbosity level etc. Typically, global defaults are overwritten by solver % defaults, which are in turn overwritten by user-specified options. % See the online Manopt documentation for details on options. % % See also: mergeOptions % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % There should be no reason to modify this file. % For better compatibility with future Manopt versions, % use the options structure of solvers. % % Really: don't modify it. % Verbosity level: 0 is no output at all. The higher the verbosity, the % more info is printed / displayed during solver execution. opts.verbosity = 3; % If debug is set to true, additional computations may be performed and % debugging information is outputed during solver execution. opts.debug = false; % Maximum number of store structures to store. If set to 0, caching % capabilities are not disabled, but the cache will be emptied at each % iteration of iterative solvers (more specifically: every time the % solver calls to purge the storedb). opts.storedepth = 20; % Maximum amount of time a solver may execute, in seconds. opts.maxtime = inf; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getGradient.m ================================================ function grad = getGradient(problem, x, storedb, key) % Computes the gradient of the cost function at x. % % function grad = getGradient(problem, x) % function grad = getGradient(problem, x, storedb) % function grad = getGradient(problem, x, storedb, key) % % Returns the gradient at x of the cost function described in the problem % structure. % % storedb is a StoreDB object, key is the StoreDB key to point x. % % See also: getDirectionalDerivative canGetGradient % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % % April 3, 2015 (NB): % Works with the new StoreDB class system. % % June 28, 2016 (NB): % Works with getPartialGradient. % % Nov. 1, 2016 (NB): % Added support for gradient from directional derivatives. % Last resort is call to getApproxGradient instead of an exception. % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end if isfield(problem, 'grad') %% Compute the gradient using grad. % Check whether this function wants to deal with storedb or not. switch nargin(problem.grad) case 1 grad = problem.grad(x); case 2 % Obtain, pass along, and save the store for x. store = storedb.getWithShared(key); [grad, store] = problem.grad(x, store); storedb.setWithShared(store, key); case 3 % Pass along the whole storedb (by reference), with key. grad = problem.grad(x, storedb, key); otherwise up = MException('manopt:getGradient:badgrad', ... 'grad should accept 1, 2 or 3 inputs.'); throw(up); end elseif isfield(problem, 'costgrad') %% Compute the gradient using costgrad. % Check whether this function wants to deal with storedb or not. switch nargin(problem.costgrad) case 1 [unused, grad] = problem.costgrad(x); %#ok case 2 % Obtain, pass along, and save the store for x. store = storedb.getWithShared(key); [unused, grad, store] = problem.costgrad(x, store); %#ok storedb.setWithShared(store, key); case 3 % Pass along the whole storedb (by reference), with key. [unused, grad] = problem.costgrad(x, storedb, key); %#ok otherwise up = MException('manopt:getGradient:badcostgrad', ... 'costgrad should accept 1, 2 or 3 inputs.'); throw(up); end elseif canGetEuclideanGradient(problem) %% Compute the gradient using the Euclidean gradient. egrad = getEuclideanGradient(problem, x, storedb, key); grad = problem.M.egrad2rgrad(x, egrad); elseif canGetPartialGradient(problem) %% Compute the gradient using a full partial gradient. d = problem.ncostterms; grad = getPartialGradient(problem, x, 1:d, storedb, key); elseif canGetDirectionalDerivative(problem) %% Compute gradient based on directional derivatives; expensive! B = tangentorthobasis(problem.M, x); df = zeros(size(B)); for k = 1 : numel(B) df(k) = getDirectionalDerivative(problem, x, B{k}, storedb, key); end grad = lincomb(problem.M, x, B, df); else %% Attempt the computation of an approximation of the gradient. grad = getApproxGradient(problem, x, storedb, key); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getGradientFD.m ================================================ function gradfd = getGradientFD(problem, x, storedb, key) % Computes an approx. of the gradient w/ finite differences of the cost. % % function gradfd = getGradientFD(problem, x) % function gradfd = getGradientFD(problem, x, storedb) % function gradfd = getGradientFD(problem, x, storedb, key) % % Returns a finite difference approximation of the gradient at x for % the cost function described in the problem structure. The finite % difference is based on M.dim()+1 computations of the cost. % % storedb is a StoreDB object, key is the StoreDB key to point x. % % If the cost cannot be computed, an exception is thrown. % % See also: approxgradientFD % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Nov. 1, 2016. % Contributors: % Change log: % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end % This gradient approximation is based on the cost: % check availability. if ~canGetCost(problem) up = MException('manopt:getGradientFD:nocost', ... 'getGradientFD requires the cost to be computable.'); throw(up); end % Default parameters. See approxgradientFD for explicit user access to % these parameters. stepsize = 2^-23; subspacedim = []; % Evaluate the cost at the root point fx = getCost(problem, x, storedb, key); % Pick an orthonormal basis for the tangent space at x, or a subspace % thereof. The default is a full subspace. If a strict subspace is % picked, the returned vector approximates the orthogonal projection of % the gradient to that subspace. B = tangentorthobasis(problem.M, x, subspacedim); % Use finite differences to approximate the directional derivative % along each direction in the basis B. df = zeros(size(B)); for k = 1 : numel(B) % Move in the B{k} direction xk = problem.M.retr(x, B{k}, stepsize); % Evaluate the cost there fxk = getCost(problem, xk, storedb); % Finite difference df(k) = (fxk - fx)/stepsize; end % Build the gradient approximation. gradfd = lincomb(problem.M, x, B, df); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getHessian.m ================================================ function hess = getHessian(problem, x, d, storedb, key) % Computes the Hessian of the cost function at x along d. % % function hess = getHessian(problem, x, d) % function hess = getHessian(problem, x, d, storedb) % function hess = getHessian(problem, x, d, storedb, key) % % Returns the Hessian at x along d of the cost function described in the % problem structure. % % storedb is a StoreDB object, key is the StoreDB key to point x. % % If an exact Hessian is not provided, an approximate Hessian is returned % if possible, without warning. If not possible, an exception will be % thrown. To check whether an exact Hessian is available or not (typically % to issue a warning if not), use canGetHessian. % % See also: getPrecon getApproxHessian canGetHessian % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % % April 3, 2015 (NB): % Works with the new StoreDB class system. % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end if isfield(problem, 'hess') %% Compute the Hessian using hess. % Check whether this function wants to deal with storedb or not. switch nargin(problem.hess) case 2 hess = problem.hess(x, d); case 3 % Obtain, pass along, and save the store for x. store = storedb.getWithShared(key); [hess, store] = problem.hess(x, d, store); storedb.setWithShared(store, key); case 4 % Pass along the whole storedb (by reference), with key. hess = problem.hess(x, d, storedb, key); otherwise up = MException('manopt:getHessian:badhess', ... 'hess should accept 2, 3 or 4 inputs.'); throw(up); end elseif isfield(problem, 'ehess') && canGetEuclideanGradient(problem) %% Compute the Hessian using ehess. % We will need the Euclidean gradient for the conversion from the % Euclidean Hessian to the Riemannian Hessian. egrad = getEuclideanGradient(problem, x, storedb, key); % Check whether this function wants to deal with storedb or not. switch nargin(problem.ehess) case 2 ehess = problem.ehess(x, d); case 3 % Obtain, pass along, and save the store for x. store = storedb.getWithShared(key); [ehess, store] = problem.ehess(x, d, store); storedb.setWithShared(store, key); case 4 % Pass along the whole storedb (by reference), with key. ehess = problem.ehess(x, d, storedb, key); otherwise up = MException('manopt:getHessian:badehess', ... 'ehess should accept 2, 3 or 4 inputs.'); throw(up); end % Convert to the Riemannian Hessian hess = problem.M.ehess2rhess(x, egrad, ehess, d); else %% Attempt the computation of an approximation of the Hessian. hess = getApproxHessian(problem, x, d, storedb, key); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getHessianFD.m ================================================ function hessfd = getHessianFD(problem, x, d, storedb, key) % Computes an approx. of the Hessian w/ finite differences of the gradient. % % function hessfd = getHessianFD(problem, x, d) % function hessfd = getHessianFD(problem, x, d, storedb) % function hessfd = getHessianFD(problem, x, d, storedb, key) % % Returns a finite difference approximation of the Hessian at x along d of % the cost function described in the problem structure. The finite % difference is based on computations of the gradient. % % storedb is a StoreDB object, key is the StoreDB key to point x. % % If the gradient cannot be computed, an exception is thrown. % % See also: approxhessianFD % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % % Feb. 19, 2015 (NB): % It is sufficient to ensure positive radial linearity to guarantee % (together with other assumptions) that this approximation of the % Hessian will confer global convergence to the trust-regions method. % Formerly, in-code comments referred to the necessity of having % complete radial linearity, and that this was harder to achieve. % This appears not to be necessary after all, which simplifies the % code. % % April 3, 2015 (NB): % Works with the new StoreDB class system. % % Nov. 1, 2016 (NB): % Removed exception in case of unavailable gradient, as getGradient % now knows to fall back to an approximate gradient if need be. % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end % Step size norm_d = problem.M.norm(x, d); % First, check whether the step d is not too small if norm_d < eps hessfd = problem.M.zerovec(x); return; end % Parameter: how far do we look? % If you need to change this parameter, use approxhessianFD explicitly. % A power of 2 is chosen so that scaling by epsilon does not incur any % round-off error in IEEE arithmetic. epsilon = 2^-14; c = epsilon/norm_d; % Compute the gradient at the current point. grad = getGradient(problem, x, storedb, key); % Compute a point a little further along d and the gradient there. % Since this is a new point, we need a new key for it, for the storedb. x1 = problem.M.retr(x, d, c); key1 = storedb.getNewKey(); grad1 = getGradient(problem, x1, storedb, key1); % Transport grad1 back from x1 to x. grad1 = problem.M.transp(x1, x, grad1); % Return the finite difference of them. hessfd = problem.M.lincomb(x, 1/c, grad1, -1/c, grad); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getLinesearch.m ================================================ function t = getLinesearch(problem, x, d, storedb, key) % Returns a hint for line-search algorithms. % % function t = getLinesearch(problem, x, d) % function t = getLinesearch(problem, x, d, storedb) % function t = getLinesearch(problem, x, d, storedb, key) % % For a line-search problem at x along the tangent direction d, computes % and returns t such that retracting t*d at x yields a good point around % where to look for a line-search solution. That is: t is a hint as to % "how far to look" along the line. % % storedb is a StoreDB object, key is the StoreDB key to point x. % % See also: canGetLinesearch % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, July 17, 2014. % Contributors: % Change log: % % April 3, 2015 (NB): % Works with the new StoreDB class system. % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end if isfield(problem, 'linesearch') %% Compute the line-search hint function using linesearch. % Check whether this function wants to deal with storedb or not. switch nargin(problem.linesearch) case 2 t = problem.linesearch(x, d); case 3 % Obtain, pass along, and save the store for x. store = storedb.getWithShared(key); [t, store] = problem.linesearch(x, d, store); storedb.setWithShared(store, key); case 4 % Pass along the whole storedb (by reference), with key. t = problem.linesearch(x, d, storedb, key); otherwise up = MException('manopt:getLinesearch:badfun', ... 'linesearch should accept 2, 3 or 4 inputs.'); throw(up); end else %% Abandon computing the line-search function. up = MException('manopt:getLinesearch:fail', ... ['The problem description is not explicit enough to ' ... 'compute a line-search hint.']); throw(up); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getPartialEuclideanGradient.m ================================================ function egrad = getPartialEuclideanGradient(problem, x, I, storedb, key) % Computes the Euclidean gradient of a subset of terms in cost function. % % function egrad = getPartialEuclideanGradient(problem, x, I) % function egrad = getPartialEuclideanGradient(problem, x, I, storedb) % function egrad = getPartialEuclideanGradient(problem, x, I, storedb, key) % % Assume the cost function described in the problem structure is a sum of % many terms, as % % f(x) = sum_i f_i(x) for i = 1:d, % where d is specified as d = problem.ncostterms. % % For a subset I of 1:d, getPartialEuclideanGradient obtains the Euclidean % gradient of the partial cost function % % f_I(x) = sum_i f_i(x) for i = I. % % storedb is a StoreDB object, key is the StoreDB key to point x. % % See also: getGradient canGetPartialEuclidean Gradient getPartialGradient % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, June 28, 2016 % Contributors: % Change log: % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end % Make sure I is a row vector, so that it is natural to loop over it % with " for i = I ". I = (I(:)).'; if isfield(problem, 'partialegrad') %% Compute the partial Euclidean gradient using partialegrad. % Check whether this function wants to deal with storedb or not. switch nargin(problem.partialegrad) case 2 egrad = problem.partialegrad(x, I); case 3 % Obtain, pass along, and save the store for x. store = storedb.getWithShared(key); [egrad, store] = problem.partialegrad(x, I, store); storedb.setWithShared(store, key); case 4 % Pass along the whole storedb (by reference), with key. egrad = problem.partialegrad(x, I, storedb, key); otherwise up = MException('manopt:getPartialEuclideanGradient:badpartialegrad', ... 'partialegrad should accept 2, 3 or 4 inputs.'); throw(up); end else %% Abandon computing the partial Euclidean gradient. up = MException('manopt:getPartialEuclideanGradient:fail', ... ['The problem description is not explicit enough to ' ... 'compute the partial Euclidean gradient of the cost.']); throw(up); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getPartialGradient.m ================================================ function grad = getPartialGradient(problem, x, I, storedb, key) % Computes the gradient of a subset of terms in the cost function at x. % % function grad = getPartialGradient(problem, x, I) % function grad = getPartialGradient(problem, x, I, storedb) % function grad = getPartialGradient(problem, x, I, storedb, key) % % Assume the cost function described in the problem structure is a sum of % many terms, as % % f(x) = sum_i f_i(x) for i = 1:d, % where d is specified as d = problem.ncostterms. % % For a subset I of 1:d, getPartialGradient obtains the gradient of the % partial cost function % % f_I(x) = sum_i f_i(x) for i = I. % % storedb is a StoreDB object, key is the StoreDB key to point x. % % See also: getGradient canGetPartialGradient getPartialEuclideanGradient % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, June 28, 2016 % Contributors: % Change log: % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end % Make sure I is a row vector, so that it is natural to loop over it % with " for i = I ". I = (I(:)).'; if isfield(problem, 'partialgrad') %% Compute the partial gradient using partialgrad. % Check whether this function wants to deal with storedb or not. switch nargin(problem.partialgrad) case 2 grad = problem.partialgrad(x, I); case 3 % Obtain, pass along, and save the store for x. store = storedb.getWithShared(key); [grad, store] = problem.partialgrad(x, I, store); storedb.setWithShared(store, key); case 4 % Pass along the whole storedb (by reference), with key. grad = problem.partialgrad(x, I, storedb, key); otherwise up = MException('manopt:getPartialGradient:badpartialgrad', ... 'partialgrad should accept 2, 3 or 4 inputs.'); throw(up); end elseif canGetPartialEuclideanGradient(problem) %% Compute the partial gradient using the Euclidean partial gradient. egrad = getPartialEuclideanGradient(problem, x, I, storedb, key); grad = problem.M.egrad2rgrad(x, egrad); else %% Abandon computing the partial gradient. up = MException('manopt:getPartialGradient:fail', ... ['The problem description is not explicit enough to ' ... 'compute the partial gradient of the cost.']); throw(up); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getPrecon.m ================================================ function Pd = getPrecon(problem, x, d, storedb, key) % Applies the preconditioner for the Hessian of the cost at x along d. % % function Pd = getPrecon(problem, x, d) % function Pd = getPrecon(problem, x, d, storedb) % function Pd = getPrecon(problem, x, d, storedb, key) % % Returns as Pd the result of applying the Hessian preconditioner to the % tangent vector d at point x. The preconditioner is supposed to be a % symmetric, positive definite approximation of the inverse of the Hessian. % % If no preconditioner is available, Pd = d (identity). % % storedb is a StoreDB object, key is the StoreDB key to point x. % % See also: getHessian % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % % April 3, 2015 (NB): % Works with the new StoreDB class system. % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end if isfield(problem, 'precon') %% Precondition using precon. % Check whether this function wants to deal with storedb or not. switch nargin(problem.precon) case 2 Pd = problem.precon(x, d); case 3 % Obtain, pass along, and save the store for x. store = storedb.getWithShared(key); [Pd, store] = problem.precon(x, d, store); storedb.setWithShared(store, key); case 4 % Pass along the whole storedb (by reference), with key. Pd = problem.precon(x, d, storedb, key); otherwise up = MException('manopt:getPrecon:badprecon', ... 'precon should accept 2, 3 or 4 inputs.'); throw(up); end elseif canGetSqrtPrecon(problem) %% Precondition by applying the square root of the preconditioner twice. sqrtPd = getSqrtPrecon(problem, x, d, storedb, key); Pd = getSqrtPrecon(problem, x, sqrtPd, storedb, key); else %% No preconditioner provided, so just use the identity. Pd = d; end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getSqrtPrecon.m ================================================ function sqrtPd = getSqrtPrecon(problem, x, d, storedb, key) % Applies the square root of the Hessian preconditioner at x along d. % % function sqrtPd = getSqrtPrecon(problem, x, d) % function sqrtPd = getSqrtPrecon(problem, x, d, storedb) % function sqrtPd = getSqrtPrecon(problem, x, d, storedb, key) % % Returns as sqrtPd the result of applying the square root of the Hessian % preconditioner to the tangent vector d at point x. The preconditioner is % supposed to be a symmetric, positive definite approximation of the % inverse of the Hessian. Its square root must thus be symmetric and % positive definite itself. % % If no square root of preconditioner is available, sqrtPd = d (identity). % Note that this may be incompatible with the preconditioner, if that one % is supplied in the problem description. Always check with canGetPrecon % and canGetSqrtPrecon. % % storedb is a StoreDB object, key is the StoreDB key to point x. % % See also: getPrecon canGetPrecon canGetSqrtPrecon getHessian % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, April 3, 2015. % Contributors: % Change log: % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end if isfield(problem, 'sqrtprecon') %% Apply sqrtprecon for the square root of the preconditioner % Check whether this function wants to deal with storedb or not. switch nargin(problem.sqrtprecon) case 2 sqrtPd = problem.sqrtprecon(x, d); case 3 % Obtain, pass along, and save the store for x. store = storedb.getWithShared(key); [sqrtPd, store] = problem.sqrtprecon(x, d, store); storedb.setWithShared(store, key); case 4 % Pass along the whole storedb (by reference), with key. sqrtPd = problem.sqrtprecon(x, d, storedb, key); otherwise up = MException('manopt:getSqrtPrecon:badsqrtprecon', ... 'sqrtprecon should accept 2, 3 or 4 inputs.'); throw(up); end else %% No preconditioner square root provided, so just use the identity. sqrtPd = d; end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getStore.m ================================================ function store = getStore(problem, x, storedb) %#ok error('This file was removed from Manopt. Please use the StoreDB class.'); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getSubgradient.m ================================================ function subgrad = getSubgradient(problem, x, tol, storedb, key) % Computes a subgradient of the cost function at x, up to a tolerance % % function subgrad = getSubgradient(problem, x) % function subgrad = getSubgradient(problem, x, tol) % function subgrad = getSubgradient(problem, x, tol, storedb) % function subgrad = getSubgradient(problem, x, tol, storedb, key) % % Returns a subgradient at x of the cost function described in the problem % structure. A tolerance tol ( >= 0 ) can also be specified. By default, % tol = 0. % % storedb is a StoreDB object, key is the StoreDB key to point x. % % See also: getDirectionalDerivative canGetGradient % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, July 20, 2017. % Contributors: % Change log: % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end % Default tolerance is 0 if ~exist('tol', 'var') || isempty(tol) tol = 0; end if isfield(problem, 'subgrad') %% Compute a subgradient using subgrad. % Check whether this function wants to deal with storedb or not. switch nargin(problem.subgrad) case 1 warning('manopt:subgradient', ... ['problem.subgrad normally admits a second\n' ... 'parameter, tol >= 0, as a tolerance.\n']); subgrad = problem.subgrad(x); % tol is not passed here case 2 subgrad = problem.subgrad(x, tol); case 3 % Obtain, pass along, and save the store for x. store = storedb.getWithShared(key); [subgrad, store] = problem.subgrad(x, tol, store); storedb.setWithShared(store, key); case 4 % Pass along the whole storedb (by reference), with key. subgrad = problem.subgrad(x, tol, storedb, key); otherwise up = MException('manopt:getSubgradient:badsubgrad', ... 'subgrad should accept 1, 2, 3 or 4 inputs.'); throw(up); end elseif canGetGradient(problem) %% The gradient is a subgradient. subgrad = getGradient(problem, x, storedb, key); else %% Abandon up = MException('manopt:getSubgradient:fail', ... ['The problem description is not explicit enough to ' ... 'compute a subgradient.']); throw(up); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/handle_light.m ================================================ classdef handle_light < handle % Trick class to hide methods inherited from the handle class % when calling methods(myclass). % % Source: % http://stackoverflow.com/questions/6621850/is-it-possible-to-hide-the-methods-inherited-from-the-handle-class-in-matlab % Posted by sclarke81 on StackOverflow on Oct. 24, 2012. % This file is part of Manopt: www.manopt.org. % Original author: sclarke81, added April 3, 2013. % Contributors: % Change log: methods(Hidden) function lh = addlistener(varargin) lh = addlistener@handle(varargin{:}); end function notify(varargin) notify@handle(varargin{:}); end function delete(varargin) delete@handle(varargin{:}); end function Hmatch = findobj(varargin) Hmatch = findobj@handle(varargin{:}); end function p = findprop(varargin) p = findprop@handle(varargin{:}); end function TF = eq(varargin) TF = eq@handle(varargin{:}); end function TF = ne(varargin) TF = ne@handle(varargin{:}); end function TF = lt(varargin) TF = lt@handle(varargin{:}); end function TF = le(varargin) TF = le@handle(varargin{:}); end function TF = gt(varargin) TF = gt@handle(varargin{:}); end function TF = ge(varargin) TF = ge@handle(varargin{:}); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/mergeOptions.m ================================================ function opts = mergeOptions(opts1, opts2) % Merges two options structures with one having precedence over the other. % % function opts = mergeOptions(opts1, opts2) % % input: opts1 and opts2 are two structures. % output: opts is a structure containing all fields of opts1 and opts2. % Whenever a field is present in both opts1 and opts2, it is the value in % opts2 that is kept. % % The typical usage is to have opts1 contain default options and opts2 % contain user-specified options that overwrite the defaults. % % See also: getGlobalDefaults % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: if isempty(opts1) opts1 = struct(); end if isempty(opts2) opts2 = struct(); end opts = opts1; fields = fieldnames(opts2); for i = 1 : length(fields) opts.(fields{i}) = opts2.(fields{i}); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/purgeStoredb.m ================================================ function storedb = purgeStoredb(storedb, storedepth) %#ok error('This file was removed from Manopt. Please use the StoreDB class.'); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/setStore.m ================================================ function storedb = setStore(problem, x, storedb, store) %#ok error('This file was removed from Manopt. Please use the StoreDB class.'); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/stoppingcriterion.m ================================================ function [stop, reason] = stoppingcriterion(problem, x, options, info, last) % Checks for standard stopping criteria, as a helper to solvers. % % function [stop, reason] = stoppingcriterion(problem, x, options, info, last) % % Executes standard stopping criterion checks, based on what is defined in % the info(last) stats structure and in the options structure. % % The returned number 'stop' is 0 if none of the stopping criteria % triggered, and a (strictly) positive integer otherwise. The integer % identifies which criterion triggered: % 0 : Nothing triggered; % 1 : Cost tolerance reached; % 2 : Gradient norm tolerance reached; % 3 : Max time exceeded; % 4 : Max iteration count reached; % 5 : Maximum number of cost evaluations reached; % 6 : User defined stopfun criterion triggered. % % The output 'reason' is a string describing the triggered event. % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % % April 2, 2015 (NB): % 'reason' now contains the option (name and value) that triggered. stop = 0; reason = ''; stats = info(last); % Target cost attained if isfield(stats, 'cost') && isfield(options, 'tolcost') && ... stats.cost <= options.tolcost reason = sprintf('Cost tolerance reached; options.tolcost = %g.', options.tolcost); stop = 1; return; end % Target gradient norm attained if isfield(stats, 'gradnorm') && isfield(options, 'tolgradnorm') && ... stats.gradnorm < options.tolgradnorm reason = sprintf('Gradient norm tolerance reached; options.tolgradnorm = %g.', options.tolgradnorm); stop = 2; return; end % Allotted time exceeded if isfield(stats, 'time') && isfield(options, 'maxtime') && ... stats.time >= options.maxtime reason = sprintf('Max time exceeded; options.maxtime = %g.', options.maxtime); stop = 3; return; end % Allotted iteration count exceeded if isfield(stats, 'iter') && isfield(options, 'maxiter') && ... stats.iter >= options.maxiter reason = sprintf('Max iteration count reached; options.maxiter = %g.', options.maxiter); stop = 4; return; end % Allotted function evaluation count exceeded if isfield(stats, 'costevals') && isfield(options, 'maxcostevals') && ... stats.costevals >= options.maxcostevals reason = sprintf('Maximum number of cost evaluations reached; options.maxcostevals = %g.', options.maxcostevals); stop = 5; end % Check whether the possibly user defined stopping criterion % triggers or not. if isfield(options, 'stopfun') userstop = options.stopfun(problem, x, info, last); if userstop reason = 'User defined stopfun criterion triggered; see options.stopfun.'; stop = 6; return; end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/complexcircle/complexcirclefactory.m ================================================ function M = complexcirclefactory(n) % Returns a manifold struct to optimize over unit-modulus complex numbers. % % function M = complexcirclefactory() % function M = complexcirclefactory(n) % % Description of vectors z in C^n (complex) such that each component z(i) % has unit modulus. The manifold structure is the Riemannian submanifold % structure from the embedding space R^2 x ... x R^2, i.e., the complex % circle is identified with the unit circle in the real plane. % % By default, n = 1. % % See also spherecomplexfactory % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % % July 7, 2014 (NB): Added ehess2rhess function. % % Sep. 3, 2014 (NB): Correction to the dist function (extract real part). % % April 13, 2015 (NB): Fixed logarithm. % % Oct. 8, 2016 (NB) % Code for exponential was simplified to only treat the zero vector % as a particular case. % % July 20, 2017 (NB) % The distance function is now even more accurate. Improved logarithm % accordingly. if ~exist('n', 'var') n = 1; end M.name = @() sprintf('Complex circle (S^1)^%d', n); M.dim = @() n; M.inner = @(z, v, w) real(v'*w); M.norm = @(x, v) norm(v); M.dist = @(x, y) norm(real(2*asin(.5*abs(x - y)))); M.typicaldist = @() pi*sqrt(n); M.proj = @(z, u) u - real( conj(u) .* z ) .* z; M.tangent = M.proj; % For Riemannian submanifolds, converting a Euclidean gradient into a % Riemannian gradient amounts to an orthogonal projection. M.egrad2rgrad = M.proj; M.ehess2rhess = @ehess2rhess; function rhess = ehess2rhess(z, egrad, ehess, zdot) rhess = M.proj(z, ehess - real(z.*conj(egrad)).*zdot); end M.exp = @exponential; function y = exponential(z, v, t) if nargin == 2 % t = 1; tv = v; else tv = t*v; end y = zeros(n, 1); nrm_tv = abs(tv); % We need to be careful for zero steps. mask = (nrm_tv > 0); y(mask) = z(mask).*cos(nrm_tv(mask)) + ... tv(mask).*(sin(nrm_tv(mask))./nrm_tv(mask)); y(~mask) = z(~mask); end M.retr = @retraction; function y = retraction(z, v, t) if nargin == 2 % t = 1; tv = v; else tv = t*v; end y = sign(z+tv); end M.log = @logarithm; function v = logarithm(x1, x2) v = M.proj(x1, x2 - x1); di = real(2*asin(.5*abs(x1 - x2))); nv = abs(v); factors = di ./ nv; factors(di <= 1e-10) = 1; v = v .* factors; end M.hash = @(z) ['z' hashmd5( [real(z(:)) ; imag(z(:))] ) ]; M.rand = @random; function z = random() z = sign(randn(n, 1) + 1i*randn(n, 1)); end M.randvec = @randomvec; function v = randomvec(z) % i*z(k) is a basis vector of the tangent vector to the k-th circle v = randn(n, 1) .* (1i*z); v = v / norm(v); end M.lincomb = @matrixlincomb; M.zerovec = @(x) zeros(n, 1); M.transp = @(x1, x2, d) M.proj(x2, d); M.pairmean = @pairmean; function z = pairmean(z1, z2) z = sign(z1+z2); end M.vec = @(x, u_mat) [real(u_mat) ; imag(u_mat)]; M.mat = @(x, u_vec) u_vec(1:n) + 1i*u_vec((n+1):end); M.vecmatareisometries = @() true; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/complexcircle/realphasefactory.m ================================================ function M = realphasefactory(n, z0, zmax) % Returns a manifold struct to optimize over phases of fft's of real signals % % function M = realphasefactory(n) % function M = realphasefactory(n, z0) % function M = realphasefactory(n, z0, zmax) % % If x is a real vector of length n, then y = fft(x) is a complex vector % which obeys certain symmetries. Specifically, for any integer k, % % y(1+mod(k, n)) = conj(y(1+mod(n-k, n))) % % The same holds for the phases of the Fourier transform z = sign(y). % % This factory returns a Manopt manifold structure which represents the set % of complex vectors z of length n which could be the phases of the Fourier % transform of a real signal of length n: % % abs(z) = 1 and z(1+mod(k, n)) = conj(z(1+mod(n-k, n))) for each k. % % For k = 1, this readily implies that z(1) is +1 or -1, so that the set of % possible z's is disconnected. To choose which connected component to work % with, set the second input z0 to +1 or -1 (this is the sign of the mean % of x). By default, z0 = 1. % % Furthermore, if n is even, then k = n/2 implies z(1+n/2) is +1 or -1 as % well, thus further disconnecting the set of acceptable z's. To choose % which component to work with, set the third input zmax to +1 or -1. By % default, it is +1. % % The Riemannian manifold structure is the Riemannian submanifold % structure from the embedding space R^2 x ... x R^2, i.e., the complex % circles are identified with the unit circle in the real plane. % Concretely, this means the inner product is _z = real(u'*v). % Tangent vectors at z are complex vectors of length n which notably % satisfy z(1+0) = 0 and, if n is even, z(1+n/2) = 0. % % n must be integer and n >= 3 (for n = 1:2 the manifold has dimension 0). % % Extra functions available in M include M.up, M.down and M.downup. They % allow to capture the symmetries concisely, as: % % M.up(z) == conj(M.down(z)). % % See in code for more details. % % See also complexcirclefactory % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Feb. 2, 2017. % Contributors: joint work with Tamir Bendory, Zhizhen Zhao and Amit Singer % Change log: % % July 20, 2017 (NB) % The distance function is now more accurate. Improved logarithm % accordingly. assert(n == round(n) && n >= 3, 'n must be an integer >= 3.'); even_n = (round(n/2) == n/2); if ~exist('z0', 'var') || isempty(z0) z0 = 1; end if ~exist('zmax', 'var') || isempty(zmax) zmax = 1; end assert(z0 == 1 || z0 == -1, 'z0 must be +1 or -1.'); assert(zmax == 1 || zmax == -1, 'zmax must be +1 or -1.'); if even_n M.name = @() sprintf('Phases of fft''s of real signals of length %d (z0 = %d, zmax = %d)', n, z0, zmax); else M.name = @() sprintf('Phases of fft''s of real signals of length %d (z0 = %d)', n, z0); end M.dim = @() floor((n-1)/2); M.inner = @(z, v, w) real(v'*w); M.norm = @(z, u) norm(u); M.dist = @(z1, z2) norm(real(2*asin(.5*abs(z1 - z2)))); M.typicaldist = @() pi*sqrt(n/2); % Special functions to ease working with the symmetries. down = @(u) u; up = @(u) u([1 ; (n:-1:2)']); downup = @(u) (down(u) + conj(up(u)))/2; M.down = down; M.up = up; M.downup = downup; M.proj = @proj; function pu = proj(z, u) duu = downup(u); pu = duu - real(duu .* conj(z)).*z; % Note that there is no need to enforce pu(1) = 0 or (if n is even) % pu(1+n/2) = 0 manually, since the IEEE standard ensures that the % above operation will be exact for those entries provided z(1) % (and possibly z(1+n/2) is +1 or -1, as should be the case. end M.tangent = M.proj; % For Riemannian submanifolds, converting a Euclidean gradient into a % Riemannian gradient amounts to an orthogonal projection. M.egrad2rgrad = M.proj; M.ehess2rhess = @ehess2rhess; function rhess = ehess2rhess(z, egrad, ehess, zdot) rhess = M.proj(z, ehess - real(downup(egrad) .* conj(z)).*zdot); end M.exp = @exponential; function y = exponential(z, v, t) if nargin == 2 % t = 1; tv = v; else tv = t*v; end y = zeros(n, 1); nrm_tv = abs(tv); % We need to be careful for zero steps. mask = (nrm_tv > 0); y(mask) = z(mask).*cos(nrm_tv(mask)) + ... tv(mask).*(sin(nrm_tv(mask))./nrm_tv(mask)); y(~mask) = z(~mask); end M.retr = @retraction; function y = retraction(z, v, t) if nargin == 2 % t = 1; tv = v; else tv = t*v; end y = sign(z+tv); end M.log = @logarithm; function v = logarithm(x1, x2) v = M.proj(x1, x2 - x1); di = real(2*asin(.5*abs(x1 - x2))); nv = abs(v); factors = di ./ nv; factors(di <= 1e-6) = 1; v = v .* factors; end M.hash = @(z) ['z' hashmd5( [real(z(:)) ; imag(z(:))] ) ]; M.rand = @random; function z = random() z = sign(downup(randn(n, 1) + 1i*randn(n, 1))); z(1) = z0; if even_n z(1 + n/2) = zmax; end end M.randvec = @randomvec; function v = randomvec(z) v = M.proj(z, randn(n, 1) + 1i*randn(n, 1)); v = v / norm(v); end M.lincomb = @matrixlincomb; M.zerovec = @(z) zeros(n, 1); M.transp = @(z1, z2, u) M.proj(z2, u); M.pairmean = @pairmean; function z = pairmean(z1, z2) z = sign(z1+z2); end % This vec/mat pair is an isometry which allows to switch between the % classical representation of tangent vectors---as complex vectors of % length n---to real vectors of length M.dim() whose entries are the % coordinates of the tangent vector in the basis 1i*z, for the first % half. A scaling of sqrt(2) is applied to ensure isometry, since % tangent vectors are represented with only half of their entries. I = 2 : floor((n+1)/2); if even_n middle = 0; else middle = []; end M.vec = @(z, u_mat) sqrt(2)*real(u_mat(I) .* conj(1i*z(I))); M.mat = @(z, u_vec) [0 ; u_vec.*(1i*z(I)) ; middle ; ... flipud(conj(u_vec.*(1i*z(I))))]/sqrt(2); M.vecmatareisometries = @() true; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/README_Essential.txt ================================================ # A Riemannian quotient representation for the essential manifold Contributed by Roberto Tron. The essential matrix is a 3x3 matrix that encodes the epipolar constraint between the homogeneous coordinates of the projection of a common 3-D point in two cameras. Not all the 3x3 matrices are essential matrices, as these need to encode the relative pose of the two cameras (up to a global scaling). The space of valid essential matrices can be endowed with a Riemannian structure by following the derivations presented in: R. Tron, K. Daniilidis, "The Space of Essential Matrices as a Riemannian Quotient Manifold - Geometric Interpretation and Optimization Algorithms" International Journal of Computer Vision, (submitted). This work shows that the essential manifold can be seen as a quotient manifold of $SO(3) \times SO(3)$, where $SO(3)$ is the manifold of 3-D rotations. In Matlab, we represents k points on the essential manifold as array of dimension $[3 \times 6 \times k]$, where each $[3 \times 3]$ sub matrix is a 3-D rotation. The implementation provides both the "signed" and "unsigned" version of the manifold presented in the paper. The only difference between the two is in how the logarithm, and hence the distance, are computed. In the signed version, the points related by the twisted pair ambiguity are considered as distinct; in practice, this is the case when the cheirality constraint is used to remove the ambiguity. In the unsigned version, points related by the twisted pair ambiguity belong to the same class; in practice, it means that they all produce equivalent epipolar constraints. See the paper for details. Factory call: M=essentialfactory(k,signature). By default, k equals 1. The string signature should be set to "signed" (resp. "unsigned") to use the signed (resp. unsigned) version of the manifold. By default, signature equals "signed". See the paper for the definition of the set and tangent spaces. Note: following the representation of tangent vectors for SO(3) in MANOPT, tangent vectors for the essential manifold are represented as $[3 \times 6 \times k]$ matrices, where each $[3 \times 3]$ sub matrix is skew-symmetric. The real tangent vector in the ambient space is obtained by multiplying on the left each $[3 \times 3]$ skew-symmetric matrix with the corresponding rotation from the base point. ## Toolset The following list contains some of the nontrivial available functions in the structure M. - Dimension M.dim() $\dim M=5k$ - Metric M.inner(X,S,T) $\langle U, V \rangle = \sum_{i=1}^k trace(S_i^TT_i)$, where S and T are representation of two tangent vectors at X. - Norm M.norm(X,S) $\norm{U}=\sqrt{\langle U, U \rangle}$ - Distance M.dist(X,Y) $\dist(X,Y)=\sqrt(\sum_{i=1}^k \norm{\log(X_i,Y_i)}$, see M.log(X,Y) below - Typical distance M.typicaldist() \pi\sqrt{k} - Vertical tangent space projector M.vertproj(X,H) Projects a point in the ambient space onto the vertical space at X. See the paper for details. Note that this operation returns an array containing skew-symmetric matrices. - Tangent space projector M.proj(X,H) Projects a point in the ambient space onto the horizontal space at X. See the paper for details. Note that this operation returns an array containing skew-symmetric matrices. - Tangent space to ambient space M.tangent2ambient(X,S) Computes a matrix H where H(1:3,:,i)=X(1:3,:,i)*S(1:3,:,i) and H(4:6,:,i)=X(4:6,:,i)*S(4:6,:,i). This function is necessary because the proj operator takes as input an ambient vector and returns a tangent vector. To apply the proj again to the result (which should change nothing), it is necessary to first represent the tangent vector obtained as an ambient vector. This function is here because of formal peculiarities and is likely to disappear at some point. - Essential matrix M.E(X) Returns the 3\times 3 essential matrix corresponding to the point on the manifold X. - Tangent of the essential matrix M.dE(X,S) Returns the matrix $\dot{E}$ obtained from a point X moving on a curve with tangent S. Mathematically, this is the push-forward of S through the mapping M.E(X) - Double tangent of the essential matrix M.ddE(X,S) Returns the matrix $\ddot{E}$ obtained from a point X moving on a *geodesic* curve (i.e., with zero acceleration) with tangent S. Mathematically, this is the push-forward of S through the mapping M.dE(X,S) - Euclidean to Riemannian function M.ef2rf(X,ef) Returns the value of ef evaluated at M.E(X). ef must be a function handle - Euclidean gradient of a function of E to Euclidean gradient of a function of X M.egradE2egrad(X,egradE) Returns the Euclidean gradient (matrix of partial derivatives) in the entries of X (taken as a $3 \times 6$ matrix) given the Euclidean gradient of a function of E (which is a $3 \times 3$ matrix). egrad must be a function handle for which egrad(E) returns the $3 \times 3$ Euclidean gradient of a function evaluated at the essential matrix E=M.E(X) Note: this function uses a different convention than egrad2rgrad for other manifolds. In this case egradE is a function handle, while in the other cases egrad is a matrix. - Euclidean to Riemannian gradient M.egrad2rgrad(X,egrad) Returns the Riemannian gradient (a tangent vector at X) corresponding to the Euclidean gradient of a function of X taken as a matrix. egrad must be a function handle for which egrad(X) returns the $3 \times 6$ Euclidean gradient of a function evaluated at the point X. Note: this function uses a different convention than egrad2rgrad for other manifolds. In this case egrad is a function handle, while in the other cases egrad is a matrix. - Euclidean gradient of a function of E to Riemannian gradient M.egradE2rgrad(X,egradE) This function is the combination of M.egradE2egrad and M.egrad2rgrad. See the respective comments for more information. - Euclidean Hessian of a function of E to to Euclidean Hessian of a function of X M.ehessE2ehess(X,egradE, ehessE, V) Returns the Euclidean Hessian (operator given by second order partial derivatives) in the entries of X (taken as a $3 \times 6$ matrix) evaluated in the direction V (which represents a direction in the ambient space) given the Euclidean Hessian operator of a function of E (which is a $3 \times 3$ matrix). ehessE must be a function handle for which egrad(E,dE) returns the $3 \times 3$ Euclidean Hessian of a function evaluated at the essential matrix E for the tangent vector dE. See also M.egradE2egrad. - Euclidean to Riemannian Hessian M.ehessE2rhess(X,egrad, ehess, V) This function is the combination of M.ehessE2ehess and M.ehess2rhess. See the respective comments for more information. - Exponential map M.exp(X,S,t) Returns the point obtained by following the normal geodesic starting from X with tangent S for a length t. This function does not check that S is horizontal: it simply applies the exponential map on each copy of SO(3) - Logarithm map M.log(X,Y) The inverse of the exponential map. It is guaranteed to correspond to the horizontal vector pointing in the direction of the shortest geodesic from X to Y. - Transport M.transp(X1,X2,S1) Transport a vector from the tangent space of X1 to the tangent space of X2, using left translations in SO(3)^2. This transport preserves the length of the vectors. - Distance M.dist(X,Y) $\dist(X,Y)=\|\log(X,Y)\|$ Compute the shortest geodesic distance between X and Y. - Pair mean M.pairmean(X,Y) Mid-point of the shortest geodesic between X and Y. ## Example The file essential_svd.m contains an example of the use of the essential manifold in MANOPT. It first builds random essential matrices A_i, i=1,..,k. It then tries to find matrices E_i, i=1,...,k which minimize \sum_{i=1}^k \frac{1}{2}\|E_i-A_i\|^2. The i-th component of the Euclidean gradient is simply E_i-A_i and the Hessian operator is the identity. This problem is trivial, as the cost function is separable in each i and the solution is simply E_i=A_i. However, this example shows how to define the gradient and hessian of the cost function with k>1 and shows that indeed the optimization procedure converges to the expected minimizer. ## Files With respect to a vanilla installation of MANOPT, the implementation of the essential manifold adds the following files and directories manopt/manifolds/essential examples/essential_svd.m ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/essential_costE2cost.m ================================================ function val = essential_costE2cost(X, costE) % Cost evaluation at X given function handle in the Essential matrix E. % % function val = essential_costE2cost(X, costE) % % costE is the function handle for the cost function in E. % % See also: essential_egradE2egrad essential_ehessE2ehess % This file is part of Manopt: www.manopt.org. % Original author: Roberto Tron, Aug. 8, 2014 % Contributors: Bamdev Mishra, May 22, 2015. e3hat = [0 -1 0; 1 0 0; 0 0 0]; RA = X(:,1:3,:); RB = X(:,4:6,:); E = multiprod(multiprod(multitransp(RA), e3hat), RB); val = costE(E); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/essential_egradE2egrad.m ================================================ function egrad = essential_egradE2egrad(X, egradE) % Converts the gradient in essential matrix E to the gradient in X. % % function egrad = essential_egradE2egrad(X, egradE) % % egradE is the function handle for the gradient in E. % % The output is a matrix in the space of X. % % See also: essential_costE2cost essential_ehessE2ehess % This file is part of Manopt: www.manopt.org. % Original author: Roberto Tron, Aug. 8, 2014 % Contributors: Bamdev Mishra, May 22, 2015. e3hat = [0 -1 0; 1 0 0; 0 0 0]; RA = X(:,1:3,:); RB = X(:,4:6,:); E = multiprod(multiprod(multitransp(RA), e3hat), RB); G = egradE(E); %The following is the vectorized version of egrad = e3hat*[RB*G' -RA*G]; egrad = multiprod(e3hat, cat(2,... multiprod(RB, multitransp(G)),... -multiprod(RA, G))); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/essential_ehessE2ehess.m ================================================ function ehess = essential_ehessE2ehess(X, egradE, ehessE, S) % Converts the Hessian in essential matrix E to the Hessian in X. % % function ehess = essential_ehessE2ehess(X, egradE, ehessE, S) % % egradE is the function handle for the gradient in E. % ehessE is the function handle for the Hessian in E. % S is the search direction in the space of X. % % The output is a matrix in the space of X. % % See also: essential_costE2cost essential_egradE2egrad % This file is part of Manopt: www.manopt.org. % Original author: Roberto Tron, Aug. 8, 2014 % Contributors: Bamdev Mishra, May 22, 2015. e3hat = [0 -1 0; 1 0 0; 0 0 0]; RA = X(:,1:3,:); RB = X(:,4:6,:); E = multiprod(multiprod(multitransp(RA), e3hat), RB); % M.E(X); G = egradE(E); V = essential_sharp(multiprod(essential_flat(X), essential_flat(S))); VA = V(:,1:3,:); VB = V(:,4:6,:); dE = multiprod(multiprod(multitransp(RA), e3hat), VB)... + multiprod(multiprod(multitransp(VA), e3hat), RB); dG = ehessE(E, dE); %The following is the vectorized version of ehess = e3hat*[(VB*G'+RB*H') -(VA*G+RA*H)] ehess = multiprod(e3hat,cat(2,... multiprod(VB, multitransp(G)) + multiprod(RB, multitransp(dG)),... -multiprod(VA, G) - multiprod(RA, dG))); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/essential_flat.m ================================================ function Hp = essential_flat(H) %Reshape a [3x6xk] matrix to a [3x3x2k] matrix Hp = reshape(H,3,3,[]); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/essential_hat3.m ================================================ %Compute the matrix representation of the cross product %function [V,vShift] = essential_hat3(v) %V is a [3x3xN] array of skew-symmetric matrices where each [3x3] block is %the matrix representation of the cross product of one of the columns of v %vShift is equal to permute(v,[1 3 2]). function [V, vShift] = essential_hat3(v) N = size(v,2); V = zeros(3,3,N); vShift = permute(v,[1 3 2]); V(1,2,:) = -vShift(3,:,:); V(2,1,:) = vShift(3,:,:); V(1,3,:) = vShift(2,:,:); V(3,1,:) = -vShift(2,:,:); V(2,3,:) = -vShift(1,:,:); V(3,2,:) = vShift(1,:,:); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/essential_sharp.m ================================================ function H = essential_sharp(Hp) %Reshape a [3x3x2k] matrix to a [3x6xk] matrix H = reshape(Hp,3,6,[]); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/essentialfactory.m ================================================ function M = essentialfactory(k, strSigned) % Manifold structure to optimize over the space of essential matrices. % % function M = essentialfactory(k) % function M = essentialfactory(k, 'signed') % function M = essentialfactory(k, 'unsigned') % % % Quotient representation of the essential manifold: deals with the % representation of the space of essential matrices M_rE. These are used in % computer vision to represent the epipolar constraint between projected % points in two perspective views. % % The space is represented as the quotient (SO(3)^2/SO(2)). % See the following references for details: % % R. Tron, K. Daniilidis, % "On the quotient representation of the essential manifold" % IEEE Conference on Computer Vision and Pattern Recognition, 2014 % % For computational purposes, each essential matrix is represented as a % [3x6] matrix where each [3x3] block is a rotation. % % The metric used is the one induced by the submersion of M_rE in SO(3)^2. % % Tangent vectors are represented in the Lie algebra of SO(3)^2, i.e., as % [3x6] matrices where each [3x3] block is a skew-symmetric matrix. % Use the function tangent2ambient(X, H) to switch from the Lie algebra % representation to the embedding space representation in R^(3x6). % % By default, k = 1, and the geometry is 'signed'. % % Optional arguments: % "signed" selects the signed version of the manifold % "unsigned" selects the unsigned version of the manifold % % See also rotationsfactory % Please cite the Manopt paper as well as the research paper: % @InProceedings{tron2014essential, % Title = {On the quotient representation of the essential manifold}, % Author = {Tron, R. and Daniilidis, K.}, % Booktitle = {IEEE Conference on Computer Vision and Pattern Recognition}, % Year = {2014}, % Organization = {{IEEE CVPR}} % } % This file is part of Manopt: www.manopt.org. % Original author: Roberto Tron, Aug. 8, 2014 % Contributors: Bamdev Mishra, May 15, 2015. % % % RT: General implementation note: to streamline component-wise % computations, in tangentProjection and exponential, % we flatten out the arguments into [3 x 3 x 2K] arrays, compute the % components all together, and then sharp the result again into [3 x 6 x K] % arrays. % Optional parameters to switch between the signed and unsigned % versions of the manifold. if ~exist('strSigned', 'var') || isempty(strSigned) strSigned = 'signed'; end switch(strSigned) case 'signed' flagSigned = true; case 'unsigned' flagSigned = false; otherwise error('Second argument can be either empty, ''signed'', or ''unsigned''.'); end if ~exist('k', 'var') || isempty(k) k = 1; end if k == 1 M.name = @() sprintf('Quotient representation of the essential manifold, %s', strSigned); elseif k > 1 && k == round(k) M.name = @() sprintf('Product of %d quotient representations of the essential manifold, %s', k, strSigned); else error('k must be an integer no less than 1.'); end M.dim = @() k*5; M.inner = @(x, d1, d2) d1(:).'*d2(:); M.norm = @(x, d) norm(d(:)); M.typicaldist = @() pi*sqrt(2*k); M.proj = @tangentProjection; function HProjHoriz=tangentProjection(X,H) % Project H on the tangent space of SO(3)^2 HProj = essential_sharp(multiskew(multiprod(multitransp(essential_flat(X)), essential_flat(H)))); % Compute projection on vertical component p = vertproj(X, HProj); HProjHoriz = HProj - multiprod(p/2,[essential_hat3(permute(X(3,1:3,:),[2 3 1])) essential_hat3(permute(X(3,4:6,:),[2 3 1]))]);% BM: okay end M.tangent = @(X, H) essential_sharp(multiskew(essential_flat(H))); M.egrad2rgrad=@egrad2rgrad; function rgrad = egrad2rgrad(X, egrad) rgrad = M.proj(X, egrad); end M.ehess2rhess = @ehess2rhess; function rhess = ehess2rhess(X, egrad, ehess, S) % Reminder: S contains skew-symmeric matrices. The actual % direction that the point X is moved along is X*S. RA = p1(X); RB = p2(X); SA = p1(S); SB = p2(S); G = egrad; GA = p1(G); GB = p2(G); H = ehess; % RT: We now compute the connection, i.e. the part of the derivative % given by the curvature of the space (as opposed to a simple % Euclidean derivative). % The following is the vectorized version of connection=-[multisym(GA'*RA)*SA multisym(GB'*RB)*SB]; connection = tangent2ambient(X,-cat(2,... multiprod(multisym(multiprod(multitransp(GA), RA)), SA),... multiprod(multisym(multiprod(multitransp(GB), RB)), SB))); rhess = M.proj(X,H + connection); end M.exp = @exponential; function Y = exponential(X, U, t) if nargin == 3 U = t*U; end UFlat = essential_flat(U); exptUFlat = rot3_exp(UFlat); Y = essential_sharp(multiprod(essential_flat(X), exptUFlat)); end M.retr = @exponential; M.log = @logarithm; function U = logarithm(X, Y) QX = [X(:,1:3,:);X(:,4:6,:)]; QY = [Y(:,1:3,:);Y(:,4:6,:)]; QYr = essential_closestRepresentative(QX,QY,'flagSigned',flagSigned); Yr = [QYr(1:3,:,:) QYr(4:6,:,:)]; U = zeros(size(X)); U(:,1:3,:) = rot3_log(multiprod(multitransp(X(:,1:3,:)),Yr(:,1:3,:))); U(:,4:6,:) = rot3_log(multiprod(multitransp(X(:,4:6,:)),Yr(:,4:6,:))); end M.hash = @(X) ['z' hashmd5(X(:))]; M.rand = @() randessential(k); function Q = randessential(N) % Generates random essential matrices. % % function Q = randessential(N) % % Q is a [3x6] matrix where each [3x3] block is a uniformly distributed % matrix. % This file is part of Manopt: www.manopt.org. % Original author: Roberto Tron, Aug. 8, 2014 % Contributors: % Change log: if nargin < 1 N = 1; end Q = [randrot(3,N) randrot(3,N)]; end M.randvec = @randomvec; function U = randomvec(X) U = tangentProjection(X, essential_sharp(randskew(3, 2*k))); U = U / sqrt(M.inner([],U,U)); end M.lincomb = @matrixlincomb; M.zerovec = @(x) zeros(3, 6, k); M.transp = @transport; function S2 = transport(X1, X2, S1) % Transport a vector from the tangent space at X1 to the tangent % space at X2. This transport uses the left translation of the % ambient group and preserves the norm of S1. The left translation % aligns the vertical spaces at the two elements. % Group operation in the ambient group, X12=X2'*X1 X12 = essential_sharp(multiprod(multitransp(essential_flat(X2)),essential_flat(X1))); X12Flat = essential_flat(X12); % Left translation, S2=X12*S*X12' S2 = essential_sharp(multiprod(X12Flat,multiprod(essential_flat(S1),multitransp(X12Flat)))); end M.pairmean = @pairmean; function Y = pairmean(X1, X2) V = M.log(X1, X2); Y = M.exp(X1, .5*V); end M.dist = @(x, y) M.norm(x, M.log(x, y)); M.vec = @(x, u_mat) u_mat(:); M.mat = @(x, u_vec) reshape(u_vec, [3, 6, k]); M.vecmatareisometries = @() true; p1 = @(X) X(:,1:3,:); p2 = @(X) X(:,4:6,:); vertproj = @(X,H) multiprod(X(3,1:3,:),permute(vee3(H(:,1:3,:)),[1 3 2]))+multiprod(X(3,4:6,:),permute(vee3(H(:,4:6,:)),[1 3 2])); tangent2ambient = @(X, H) essential_sharp(multiprod(essential_flat(X), essential_flat(H))); end %% Some functions used by the essential factory function v = vee3(V) v = squeeze([V(3,2,:)-V(2,3,:); V(1,3,:)-V(3,1,:); V(2,1,:)-V(1,2,:)])/2; end % Compute the exponential map in SO(3) using Rodrigues' formula % function R = rot3_exp(V) % V must be a [3x3xN] array of [3x3] skew-symmetric matrices. function R = rot3_exp(V) v = vee3(V); nv = cnorm(v); idxZero = nv < 1e-15; nvMod = nv; nvMod(idxZero) = 1; vNorm = v./([1;1;1]*nvMod); % Matrix exponential using Rodrigues' formula nv = shiftdim(nv,-1); c = cos(nv); s = sin(nv); [VNorm,vNormShift] = essential_hat3(vNorm); vNormvNormT = multiprod(vNormShift,multitransp(vNormShift)); R=multiprod(eye(3),c)+multiprod(VNorm,s)+multiprod(vNormvNormT,1-c); end % Compute the logarithm map in SO(3) % function V = rot3_log(R) % V is a [3x3xN] array of [3x3] skew-symmetric matrices function V = rot3_log(R) skewR = multiskew(R); ctheta = (multitrace(R)'-1)/2; stheta = cnorm(vee3(skewR)); theta = atan2(stheta,ctheta); V=skewR; for ik=1:size(R,3) V(:,:,ik)=V(:,:,ik)/sincN(theta(ik)); end end function sx = sincN(x) sx = sin(x)./x; sx(x==0) = 1; end function nv = cnorm(v) nv = sqrt(sum(v.^2)); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/privateessential/essential_closestRepresentative.m ================================================ function Q2r=essential_closestRepresentative(Q1,Q2,varargin) [tMin,~,Q2]=essential_distMinAngle(Q1,Q2,varargin{:}); NQ1=size(Q1,3); NQ2=size(Q2,3); if NQ1>1 && NQ2==1 Q2=repmat(Q2,[1 1 NQ1]); end NQ=max(NQ1,NQ2); Q2r=zeros(size(Q2)); for iQ=1:NQ t=tMin(iQ); Rz=[cos(t) -sin(t) 0; sin(t) cos(t) 0; 0 0 1]; Q2r(1:3,1:3,iQ)=Rz*Q2(1:3,1:3,iQ); Q2r(4:6,1:3,iQ)=Rz*Q2(4:6,1:3,iQ); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/privateessential/essential_distMinAngle.m ================================================ function [tMin,fMin,Q2Flip,output]=essential_distMinAngle(Q1,Q2,varargin) NQ1=size(Q1,3); NQ2=size(Q2,3); if NQ1==1 && NQ2>1 Q1=repmat(Q1,[1 1 NQ2]); NQ1=NQ2; end if NQ1>1 && NQ2==1 Q2=repmat(Q2,[1 1 NQ1]); end if NQ1>1 tMin=zeros(NQ1,1); fMin=zeros(NQ1,1); Q2Flip=zeros(6,3,NQ1); if nargout>3 output=repmat(struct('tMin',[],'fMin',[],'tBreak1',[],'tBreak2',[]),NQ1,1); end for iQ=1:NQ1 if nargout>3 [tMin(iQ),fMin(iQ),Q2Flip(:,:,iQ),output(iQ)]=... essential_distMinAngle(Q1(:,:,iQ),Q2(:,:,iQ),varargin{:}); else [tMin(iQ),fMin(iQ),Q2Flip(:,:,iQ)]=... essential_distMinAngle(Q1(:,:,iQ),Q2(:,:,iQ),varargin{:}); end end else flagModTMin=false; flagSigned=false; %optional parameters ivarargin=1; while(ivarargin<=length(varargin)) switch(lower(varargin{ivarargin})) case 'flagmodtmin' ivarargin=ivarargin+1; flagModTMin=varargin{ivarargin}; case 'signed' flagSigned=true; case 'flagsigned' ivarargin=ivarargin+1; flagSigned=varargin{ivarargin}; otherwise error(['Argument ' varargin{ivarargin} ' not valid!']) end ivarargin=ivarargin+1; end tMin=zeros(4,1); fMin=zeros(4,1); tBreak1=zeros(4,1); tBreak2=zeros(4,1); Q2Flip=zeros(6,3,4); if ~flagSigned for k=1:4 [tMin(k),fMin(k),tBreak1(k),tBreak2(k),Q2Flip(:,:,k)]=... essential_distMinAnglePair(Q1,Q2,k); end else [tMin,fMin,tBreak1,tBreak2,Q2Flip]=... essential_distMinAnglePair(Q1,Q2,1); end if flagModTMin tMin=modAngle(tMin); end if nargout>3 output.tMin=tMin; output.fMin=fMin; output.tBreak1=tBreak1; output.tBreak2=tBreak2; end if ~flagSigned [fMin,idxMin]=min(fMin); fMin=max(fMin,0); tMin=tMin(idxMin); Q2Flip=Q2Flip(:,:,idxMin); if nargout>3 output.idxMin=idxMin; end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/privateessential/essential_distMinAnglePair.m ================================================ function [tMin,fMin,tBreak1,tBreak2,Q2,tMinAll]=essential_distMinAnglePair(Q1,Q2,kFlip) switch kFlip case 1 %nothing to do case 2 Q2([2 3 4 6],:)=-Q2([2 3 4 6],:); case 3 Q2([4 5],:)=-Q2([4 5],:); case 4 Q2([2 3 5 6],:)=-Q2([2 3 5 6],:); otherwise error('Value of kFlip invalid') end Q11=Q1(1:3,:); Q12=Q1(4:6,:); Q21=Q2(1:3,:); Q22=Q2(4:6,:); Q211=Q21*Q11'; Q212=Q22*Q12'; [tMin,fMin,tBreak1,tBreak2,tMinAll]=essential_distMinAnglePair_base(Q211,Q212); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/privateessential/essential_distMinAnglePair_base.m ================================================ function [tMin,fMin,tBreak1,tBreak2,tMinAll]=essential_distMinAnglePair_base(Q211,Q212) flagCheckFirstDer=true; flagUseNewton=true; %Note: requires flagCheckFirstDer=true tolMZero=1e-15; tMinAll=[]; [tBreak1,~,~,c1,m1,p1]=essential_distMinAnglePair_discontinuityDistance(Q211); [tBreak2,~,~,c2,m2,p2]=essential_distMinAnglePair_discontinuityDistance(Q212); %check for the degenerate case where the cost is constant if abs(m1)tSearch2 tSearch1=tSearch1-2*pi; end if flagCheckFirstDer %compute derivatives of each term at discontinuity points df1Break1=essential_distMinAnglePair_computeDfBreak(tBreak1,Q211); df2Break2=essential_distMinAnglePair_computeDfBreak(tBreak2,Q212); % disp('[df1Break1 df2Break2]') % disp([df1Break1 df2Break2]) %compute derivative of each term at other's discontinuity %(unroll two calls to dfi) theta1Break2=acos(clip((m1*sin(tBreak2+p1)+c1-1)/2)); df1Break2=-theta1Break2*(m1*cos(tBreak2+p1))/(2*sin(theta1Break2)); theta2Break1=acos(clip((m2*sin(tBreak1+p2)+c2-1)/2)); df2Break1=-theta2Break1*(m2*cos(tBreak1+p2))/(2*sin(theta2Break1)); %compute left and right derivatives of sum of the two terms dfBreak1n=+df1Break1+df2Break1; dfBreak1p=-df1Break1+df2Break1; dfBreak2n=+df2Break2+df1Break2; dfBreak2p=-df2Break2+df1Break2; flagSearch1=false; % plot([tBreak1 tBreak2],[dfBreak1p dfBreak2p],'cx','MarkerSize',10) % plot([tBreak1 tBreak2],[dfBreak1n dfBreak2n],'mx','MarkerSize',10) if sign(dfBreak1p)~=sign(dfBreak2n) if flagUseNewton %parabolic prediction of min tMin0=tSearch1-dfBreak1p*(tSearch2-tSearch1)/(dfBreak2n-dfBreak1p); %tMin0=(tSearch1+tSearch2)/2; [tMin,fMin]=essential_distMinAnglePair_dfNewton(m1,p1,c1,m2,p2,c2,tMin0,tSearch1,tSearch2); %fMin=essential_distMinAnglePair_ft(m1,p1,c1,m2,p2,c2,tMin); else [tMin,fMin]=fminbnd(essential_distMinAnglePair_ft,tSearch1,tSearch2); end tMinAll=[tMinAll tMin]; flagSearch1=true; end tSearch1=tSearch1+2*pi; if sign(dfBreak2p)~=sign(dfBreak1n) if flagUseNewton %parabolic prediction of min tMin0=tSearch2-dfBreak2p*(tSearch1-tSearch2)/(dfBreak1n-dfBreak2p); %tMin0=(tSearch1+tSearch2)/2; [tMin2,fMin2]=essential_distMinAnglePair_dfNewton(m1,p1,c1,m2,p2,c2,tMin0,tSearch2,tSearch1); %fMin2=essential_distMinAnglePair_ft(m1,p1,c1,m2,p2,c2,tMin2); else [tMin2,fMin2]=fminbnd(essential_distMinAnglePair_ft,tSearch2,tSearch1); end if ~flagSearch1 || (flagSearch1 && fMin2 u = randn(dimensions_vec) + 1i*randn(dimensions_vec); u = u / norm(u(:), 'fro'); end M.lincomb = @matrixlincomb; M.zerovec = @(x) zeros(dimensions_vec); M.transp = @(x1, x2, d) d; M.pairmean = @(x1, x2) .5*(x1+x2); sz = prod(dimensions_vec); M.vec = @(x, u_mat) [real(u_mat(:)) ; imag(u_mat(:))]; M.mat = @(x, u_vec) reshape(u_vec(1:sz), dimensions_vec) ... + 1i*reshape(u_vec((sz+1):end), dimensions_vec); M.vecmatareisometries = @() true; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/euclidean/euclideanfactory.m ================================================ function M = euclideanfactory(m, n) % Returns a manifold struct to optimize over real matrices. % % function M = euclideanfactory(m) % function M = euclideanfactory(m, n) % function M = euclideanfactory([n1, n2, ...]) % % Returns M, a structure describing the Euclidean space of real matrices, % equipped with the standard Frobenius distance and associated trace inner % product, as a manifold for Manopt. % % m and n in general can be vectors to handle multidimensional arrays. % If either of m or n is a vector, they are concatenated as [m, n]. % % Using this simple linear manifold, Manopt can be used to solve standard % unconstrained optimization problems, for example in replacement of % Matlab's fminunc. % % See also: euclideancomplexfactory % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: Bamdev Mishra, May 4, 2015. % Change log: % % July 5, 2013 (NB): % Added egred2rgrad, ehess2rhess, mat, vec, tangent. % May 4, 2015 (BM): % Added functionality to handle multidimensional arrays. % The size can be defined using both m and n, or simply with m. % If m is a scalar, then n is implicitly 1. % This mimics the use of built-in Matlab functions such as zeros(...). if ~exist('n', 'var') || isempty(n) if numel(m) == 1 n = 1; else n = []; end end dimensions_vec = [m(:)', n(:)']; % We have a row vector. M.size = @() dimensions_vec; M.name = @() sprintf('Euclidean space R^(%s)', num2str(dimensions_vec)); M.dim = @() prod(dimensions_vec); M.inner = @(x, d1, d2) d1(:).'*d2(:); M.norm = @(x, d) norm(d(:), 'fro'); M.dist = @(x, y) norm(x(:) - y(:), 'fro'); M.typicaldist = @() sqrt(prod(dimensions_vec)); M.proj = @(x, d) d; M.egrad2rgrad = @(x, g) g; M.ehess2rhess = @(x, eg, eh, d) eh; M.tangent = M.proj; M.exp = @exp; function y = exp(x, d, t) if nargin == 3 y = x + t*d; else y = x + d; end end M.retr = M.exp; M.log = @(x, y) y-x; M.hash = @(x) ['z' hashmd5(x(:))]; M.rand = @() randn(dimensions_vec); M.randvec = @randvec; function u = randvec(x) %#ok u = randn(dimensions_vec); u = u / norm(u(:), 'fro'); end M.lincomb = @matrixlincomb; M.zerovec = @(x) zeros(dimensions_vec); M.transp = @(x1, x2, d) d; M.pairmean = @(x1, x2) .5*(x1+x2); M.vec = @(x, u_mat) u_mat(:); M.mat = @(x, u_vec) reshape(u_vec, dimensions_vec); M.vecmatareisometries = @() true; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/euclidean/shapefitfactory.m ================================================ function M = shapefitfactory(VJt) % Linear manifold structure for optimization over the ShapeFit search space % % function M = shapefitfactory(VJt) % % Input: VJt is a matrix of size dxn, such that VJt * ones(n, 1) = 0. % % Returns M, a structure describing the Euclidean space of d-by-n matrices % equipped with the standard Frobenius distance and associated trace inner % product, as a manifold for Manopt. Matrices on M, denoted by T, have size % dxn and obey T*ones(n, 1) = 0 (centered columns) and = 1, where % = Trace(A' * B). % % See this paper: http://arxiv.org/abs/1506.01437 % ShapeFit: Exact location recovery from corrupted pairwise directions, 2015 % Paul Hand, Choongbum Lee, Vladislav Voroninski % % See also: shapefit_smoothed % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, June 18, 2015. % Contributors: % Change log: % % Jan. 25, 2017 (NB): % M.tangent = M.proj now, instead of being identity. This is notably % necessary so that checkgradient will pick up on gradients that do % not lie in the appropriate tangent space. [d, n] = size(VJt); M.name = @() sprintf('ShapeFit space of size %d x %d', d, n); M.dim = @() d*n - d - 1; M.inner = @(x, d1, d2) d1(:).'*d2(:); M.norm = @(x, d) norm(d, 'fro'); M.dist = @(x, y) norm(x-y, 'fro'); M.typicaldist = @() sqrt(d*n); M.proj = @(T, U) projection(U); VJt_normed = VJt / norm(VJt, 'fro'); function PU = projection(U) % Center the columns PU = bsxfun(@minus, U, mean(U, 2)); % Remove component along VJt % Note: these two actions can be executed separately, without % interference, owing to VJt having centered columns itself. PU = PU - (VJt_normed(:)'*U(:))*VJt_normed; end M.egrad2rgrad = M.proj; M.ehess2rhess = @(x, eg, eh, d) projection(eh); M.tangent = M.proj; M.exp = @exp; function y = exp(x, d, t) if nargin == 3 y = x + t*d; else y = x + d; end end M.retr = M.exp; M.log = @(x, y) y-x; M.hash = @(x) ['z' hashmd5(x(:))]; M.randvec = @(x) randvec(); function u = randvec() u = projection(randn(d, n)); u = u / norm(u, 'fro'); end % We exploit the fact that VJt_normed belongs to the manifold M.rand = @() VJt_normed + randn(1) * randvec(); M.lincomb = @matrixlincomb; M.zerovec = @(x) zeros(d, n); M.transp = @(x1, x2, d) d; M.pairmean = @(x1, x2) .5*(x1+x2); M.vec = @(x, u_mat) u_mat(:); M.mat = @(x, u_vec) reshape(u_vec, [d, n]); M.vecmatareisometries = @() true; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/euclidean/skewsymmetricfactory.m ================================================ function M = skewsymmetricfactory(n, k) % Returns a manifold struct to optimize over k skew-symmetric matrices of size n % % function M = skewsymmetricfactory(n) % function M = skewsymmetricfactory(n, k) % % Returns M, a structure describing the Euclidean space of n-by-n % skew-symmetric matrices equipped with the standard Frobenius distance and % associated trace inner product, as a manifold for Manopt. % % By default, k = 1. If k > 1, points and vectors are stored in 3D matrices % X of size nxnxk such that each slice X(:, :, i), for i = 1:k, is % skew-symmetric. % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, June 28, 2016. % Contributors: % Change log: % % Jan. 25, 2017 (NB): % M.tangent = M.proj now, instead of being identity. This is notably % necessary so that checkgradient will pick up on gradients that do % not lie in the appropriate tangent space. if ~exist('k', 'var') || isempty(k) k = 1; end M.name = @() sprintf('(Skew-symmetric matrices of size %d)^%d', n, k); M.dim = @() k*n*(n-1)/2; M.inner = @(x, d1, d2) d1(:).'*d2(:); M.norm = @(x, d) norm(d(:), 'fro'); M.dist = @(x, y) norm(x(:)-y(:), 'fro'); M.typicaldist = @() sqrt(k)*n; M.proj = @(x, d) multiskew(d); M.egrad2rgrad = M.proj; M.ehess2rhess = @(x, eg, eh, d) M.proj(x, eh); M.tangent = M.proj; M.exp = @exp; function y = exp(x, d, t) if nargin == 3 y = x + t*d; else y = x + d; end end M.retr = M.exp; M.log = @(x, y) y-x; M.hash = @(x) ['z' hashmd5(x(:))]; M.rand = @() multiskew(randn(n, n, k)); M.randvec = @randvec; function u = randvec(x) %#ok u = multiskew(randn(n, n, k)); u = u / norm(u(:), 'fro'); end M.lincomb = @matrixlincomb; M.zerovec = @(x) zeros(n, n, k); M.transp = @(x1, x2, d) d; M.pairmean = @(x1, x2) .5*(x1+x2); % Elaborate list of indices of strictly upper-triangular entries. single_upper_triangle = find(triu(ones(n), 1)); all_upper_triangle = bsxfun(@plus, single_upper_triangle, n^2*(0:(k-1))); all_upper_triangle = all_upper_triangle(:); % To vectorize a matrix, we extract all upper-triangular entries and % scale by sqrt(2) to ensure isometry, that is: given two tangent % vectors U and V at a point X, M.inner(X, U, V) is equal to u'*v, % where u = M.vec(X, U) and likewise for v. This construction has the % advantage of providing a vectorized representation of matrices that % has the same length as the intrinsic dimension of the space they live % in. M.vec = @(x, u_mat) sqrt(2)*u_mat(all_upper_triangle); M.mat = @matricize; function u_mat = matricize(X, u_vec) %#ok u_mat = zeros(n, n, k); u_mat(all_upper_triangle) = u_vec((k*n+1):end) / sqrt(2); u_mat = u_mat - multitransp(u_mat); end M.vecmatareisometries = @() true; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/euclidean/symmetricfactory.m ================================================ function M = symmetricfactory(n, k) % Returns a manifold struct to optimize over k symmetric matrices of size n % % function M = symmetricfactory(n) % function M = symmetricfactory(n, k) % % Returns M, a structure describing the Euclidean space of n-by-n symmetric % matrices equipped with the standard Frobenius distance and associated % trace inner product, as a manifold for Manopt. % % By default, k = 1. If k > 1, points and vectors are stored in 3D matrices % X of size nxnxk such that each slice X(:, :, i), for i = 1:k, is % symmetric. % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Jan. 22, 2014. % Contributors: % Change log: % % Jan. 25, 2017 (NB): % M.tangent = M.proj now, instead of being identity. This is notably % necessary so that checkgradient will pick up on gradients that do % not lie in the appropriate tangent space. if ~exist('k', 'var') || isempty(k) k = 1; end M.name = @() sprintf('(Symmetric matrices of size %d)^%d', n, k); M.dim = @() k*n*(n+1)/2; M.inner = @(x, d1, d2) d1(:).'*d2(:); M.norm = @(x, d) norm(d(:), 'fro'); M.dist = @(x, y) norm(x(:)-y(:), 'fro'); M.typicaldist = @() sqrt(k)*n; M.proj = @(x, d) multisym(d); M.egrad2rgrad = M.proj; M.ehess2rhess = @(x, eg, eh, d) M.proj(x, eh); M.tangent = M.proj; M.exp = @exp; function y = exp(x, d, t) if nargin == 3 y = x + t*d; else y = x + d; end end M.retr = M.exp; M.log = @(x, y) y-x; M.hash = @(x) ['z' hashmd5(x(:))]; M.rand = @() multisym(randn(n, n, k)); M.randvec = @randvec; function u = randvec(x) %#ok u = multisym(randn(n, n, k)); u = u / norm(u(:), 'fro'); end M.lincomb = @matrixlincomb; M.zerovec = @(x) zeros(n, n, k); M.transp = @(x1, x2, d) d; M.pairmean = @(x1, x2) .5*(x1+x2); % Elaborate list of indices of diagonal entries of an nxnxk matrix. single_diag_entries = (1:(n+1):n^2)'; all_diag_entries = bsxfun(@plus, single_diag_entries, n^2*(0:(k-1))); all_diag_entries = all_diag_entries(:); % Likewise, elaborate list of indices of upper-triangular entries. single_upper_triangle = find(triu(ones(n), 1)); all_upper_triangle = bsxfun(@plus, single_upper_triangle, n^2*(0:(k-1))); all_upper_triangle = all_upper_triangle(:); % To vectorize a matrix, we extract all diagonal entries, then all % upper-triangular entries, the latter being scaled by sqrt(2) to % ensure isometry, that is: given two tangent vectors U and V at a % point X, M.inner(X, U, V) is equal to u'*v, where u = M.vec(X, U) and % likewise for v. This construction has the advantage of providing a % vectorized representation of matrices that has the same length as the % intrinsic dimension of the space they live in. M.vec = @(x, u_mat) [u_mat(all_diag_entries) ; ... sqrt(2)*u_mat(all_upper_triangle)]; M.mat = @matricize; function u_mat = matricize(X, u_vec) %#ok u_mat = zeros(n, n, k); u_mat(all_upper_triangle) = u_vec((k*n+1):end) / sqrt(2); u_mat = u_mat + multitransp(u_mat); u_mat(all_diag_entries) = u_vec(1:(k*n)); end M.vecmatareisometries = @() true; end % Former, easier versions for vec / mat. They had the disadvantage of % giving vector representations of length k*n^2, instead of k*n*(n+1). % M.vec = @(x, u_mat) u_mat(:); % M.mat = @(x, u_vec) reshape(u_vec, [m, n]); % M.vecmatareisometries = @() true; ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/fixedrank/fixedrankMNquotientfactory.m ================================================ function M = fixedrankMNquotientfactory(m, n, k) % Manifold of m-by-n matrices of rank k with two factor quotient geometry. % % function M = fixedrankMNquotientfactory(m, n, k) % % This follows the quotient geometry described in the following paper: % P.-A. Absil, L. Amodei and G. Meyer, % "Two Newton methods on the manifold of fixed-rank matrices endowed % with Riemannian quotient geometries", arXiv, 2012. % % Paper link: http://arxiv.org/abs/1209.0068 % % A point X on the manifold is represented as a structure with two % fields: M and N. The matrix M (mxk) is orthonormal, while the matrix N % (nxk) is full-rank such that X = M*N'; % % Tangent vectors are represented as a structure with two fields (M, N). % % Please cite the Manopt paper as well as the research paper: % @Article{absil2014fixedrank, % Title = {Two Newton methods on the manifold of fixed-rank matrices endowed with Riemannian quotient geometries}, % Author = {Absil, P.-A. and Amodei, L. and Meyer, G.}, % Journal = {Computational Statistics}, % Year = {2014}, % Number = {3-4}, % Pages = {569--590}, % Volume = {29}, % Doi = {10.1007/s00180-013-0441-6} % } % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: M.name = @() sprintf('MN'' quotient manifold of %dx%d matrices of rank %d', m, n, k); M.dim = @() (m+n-k)*k; % Choice of the metric is motivated by the symmetry present in the % space. M.inner = @(X, eta, zeta) eta.M(:).'*zeta.M(:) + eta.N(:).'*zeta.N(:); M.norm = @(X, eta) sqrt(M.inner(X, eta, eta)); M.dist = @(x, y) error('fixedrankMNquotientfactory.dist not implemented yet.'); M.typicaldist = @() 10*k; symm = @(X) .5*(X+X'); stiefel_proj = @(M, H) H - M*symm(M'*H); M.egrad2rgrad = @egrad2rgrad; function eta = egrad2rgrad(X, eta) eta.M = stiefel_proj(X.M, eta.M); end M.ehess2rhess = @ehess2rhess; function Hess = ehess2rhess(X, egrad, ehess, eta) % Directional derivative of the Riemannian gradient. Hess.M = ehess.M - eta.M*symm(X.M'*egrad.M); Hess.M = stiefel_proj(X.M, Hess.M); Hess.N = ehess.N; % Projection onto the horizontal space. Hess = M.proj(X, Hess); end M.proj = @projection; function etaproj = projection(X, eta) % Start by projecting the vector from Rmp x Rnp to the tangent % space to the total space, that is, eta.M should be in the % tangent space to Stiefel at X.M and eta.N is arbitrary. eta.M = stiefel_proj(X.M, eta.M); % Now project from the tangent space to the horizontal space, that % is, take care of the quotient. % First solve a Sylvester equation (A symm., B skew-symm.) A = X.N'*X.N + eye(k); B = eta.M'*X.M + eta.N'*X.N; B = B-B'; omega = lyap(A, -B); % And project along the vertical space to the horizontal space. etaproj.M = eta.M + X.M*omega; etaproj.N = eta.N + X.N*omega; end M.exp = @exponential; function Y = exponential(X, eta, t) if nargin < 3 t = 1.0; end A = t*X.M'*eta.M; S = t^2*eta.M'*eta.M; Y.M = [X.M t*eta.M]*expm([A -S ; eye(k) A])*eye(2*k, k)*expm(-A); % re-orthonormalize (seems necessary from time to time). [Q R] = qr(Y.M, 0); Y.M = Q * diag(sign(diag(R))); Y.N = X.N + t*eta.N; end % Factor M lives on the Stiefel manifold, hence we will reuse its % random generator. stiefelm = stiefelfactory(m, k); M.retr = @retraction; function Y = retraction(X, eta, t) if nargin < 3 t = 1.0; end Y.M = uf(X.M + t*eta.M); % This is a valid retraction Y.N = X.N + t*eta.N; end M.hash = @(X) ['z' hashmd5([X.M(:) ; X.N(:)])]; M.rand = @random; function X = random() X.M = stiefelm.rand(); X.N = randn(n, k); end M.randvec = @randomvec; function eta = randomvec(X) eta.M = randn(m, k); eta.N = randn(n, k); eta = projection(X, eta); nrm = M.norm(X, eta); eta.M = eta.M / nrm; eta.N = eta.N / nrm; end M.lincomb = @lincomb; M.zerovec = @(X) struct('M', zeros(m, k), 'N', zeros(n, k)); M.transp = @(x1, x2, d) projection(x2, d); end % Linear combination of tangent vectors function d = lincomb(x, a1, d1, a2, d2) %#ok if nargin == 3 d.M = a1*d1.M; d.N = a1*d1.N; elseif nargin == 5 d.M = a1*d1.M + a2*d2.M; d.N = a1*d1.N + a2*d2.N; else error('Bad use of fixedrankMNquotientfactory.lincomb.'); end end function A = uf(A) [L, unused, R] = svd(A, 0); A = L*R'; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/fixedrank/fixedrankembeddedfactory.m ================================================ function M = fixedrankembeddedfactory(m, n, k) % Manifold struct to optimize fixed-rank matrices w/ an embedded geometry. % % function M = fixedrankembeddedfactory(m, n, k) % % Manifold of m-by-n real matrices of fixed rank k. This follows the % embedded geometry described in Bart Vandereycken's 2013 paper: % "Low-rank matrix completion by Riemannian optimization". % % Paper link: http://arxiv.org/pdf/1209.3834.pdf % % A point X on the manifold is represented as a structure with three % fields: U, S and V. The matrices U (mxk) and V (nxk) are orthonormal, % while the matrix S (kxk) is any /diagonal/, full rank matrix. % Following the SVD formalism, X = U*S*V'. Note that the diagonal entries % of S are not constrained to be nonnegative. % % Tangent vectors are represented as a structure with three fields: Up, M % and Vp. The matrices Up (mxk) and Vp (mxk) obey Up'*U = 0 and Vp'*V = 0. % The matrix M (kxk) is arbitrary. Such a structure corresponds to the % following tangent vector in the ambient space of mxn matrices: % Z = U*M*V' + Up*V' + U*Vp' % where (U, S, V) is the current point and (Up, M, Vp) is the tangent % vector at that point. % % Vectors in the ambient space are best represented as mxn matrices. If % these are low-rank, they may also be represented as structures with % U, S, V fields, such that Z = U*S*V'. There are no resitrictions on what % U, S and V are, as long as their product as indicated yields a real, mxn % matrix. % % The chosen geometry yields a Riemannian submanifold of the embedding % space R^(mxn) equipped with the usual trace (Frobenius) inner product. % % % Please cite the Manopt paper as well as the research paper: % @Article{vandereycken2013lowrank, % Title = {Low-rank matrix completion by {Riemannian} optimization}, % Author = {Vandereycken, B.}, % Journal = {SIAM Journal on Optimization}, % Year = {2013}, % Number = {2}, % Pages = {1214--1236}, % Volume = {23}, % Doi = {10.1137/110845768} % } % % See also: fixedrankfactory_2factors fixedrankfactory_3factors % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % % Feb. 20, 2014 (NB): % Added function tangent to work with checkgradient. % % June 24, 2014 (NB): % A couple modifications following % Bart Vandereycken's feedback: % - The checksum (hash) was replaced for a faster alternative: it's a % bit less "safe" in that collisions could arise with higher % probability, but they're still very unlikely. % - The vector transport was changed. % The typical distance was also modified, hopefully giving the % trustregions method a better initial guess for the trust region % radius, but that should be tested for different cost functions too. % % July 11, 2014 (NB): % Added ehess2rhess and tangent2ambient, supplied by Bart. % % July 14, 2014 (NB): % Added vec, mat and vecmatareisometries so that hessianspectrum now % works with this geometry. Implemented the tangent function. % Made it clearer in the code and in the documentation in what format % ambient vectors may be supplied, and generalized some functions so % that they should now work with both accepted formats. % It is now clearly stated that for a point X represented as a % triplet (U, S, V), the matrix S needs to be diagonal. M.name = @() sprintf('Manifold of %dx%d matrices of rank %d', m, n, k); M.dim = @() (m+n-k)*k; M.inner = @(x, d1, d2) d1.M(:).'*d2.M(:) + d1.Up(:).'*d2.Up(:) ... + d1.Vp(:).'*d2.Vp(:); M.norm = @(x, d) sqrt(M.inner(x, d, d)); M.dist = @(x, y) error('fixedrankembeddedfactory.dist not implemented yet.'); M.typicaldist = @() M.dim(); % Given Z in tangent vector format, projects the components Up and Vp % such that they satisfy the tangent space constraints up to numerical % errors. If Z was indeed a tangent vector at X, this should barely % affect Z (it would not at all if we had infinite numerical accuracy). M.tangent = @tangent; function Z = tangent(X, Z) Z.Up = Z.Up - X.U*(X.U'*Z.Up); Z.Vp = Z.Vp - X.V*(X.V'*Z.Vp); end % For a given ambient vector Z, applies it to a matrix W. If Z is given % as a matrix, this is straightfoward. If Z is given as a structure % with fields U, S, V such that Z = U*S*V', the product is executed % efficiently. function ZW = apply_ambient(Z, W) if ~isstruct(Z) ZW = Z*W; else ZW = Z.U*(Z.S*(Z.V'*W)); end end % Same as apply_ambient, but applies Z' to W. function ZtW = apply_ambient_transpose(Z, W) if ~isstruct(Z) ZtW = Z'*W; else ZtW = Z.V*(Z.S'*(Z.U'*W)); end end % Orthogonal projection of an ambient vector Z represented as an mxn % matrix or as a structure with fields U, S, V to the tangent space at % X, in a tangent vector structure format. M.proj = @projection; function Zproj = projection(X, Z) ZV = apply_ambient(Z, X.V); UtZV = X.U'*ZV; ZtU = apply_ambient_transpose(Z, X.U); Zproj.M = UtZV; Zproj.Up = ZV - X.U*UtZV; Zproj.Vp = ZtU - X.V*UtZV'; end M.egrad2rgrad = @projection; % Code supplied by Bart. % Given the Euclidean gradient at X and the Euclidean Hessian at X % along H, where egrad and ehess are vectors in the ambient space and H % is a tangent vector at X, returns the Riemannian Hessian at X along % H, which is a tangent vector. M.ehess2rhess = @ehess2rhess; function rhess = ehess2rhess(X, egrad, ehess, H) % Euclidean part rhess = projection(X, ehess); % Curvature part T = apply_ambient(egrad, H.Vp)/X.S; rhess.Up = rhess.Up + (T - X.U*(X.U'*T)); T = apply_ambient_transpose(egrad, H.Up)/X.S; rhess.Vp = rhess.Vp + (T - X.V*(X.V'*T)); end % Transforms a tangent vector Z represented as a structure (Up, M, Vp) % into a structure with fields (U, S, V) that represents that same % tangent vector in the ambient space of mxn matrices, as U*S*V'. % This matrix is equal to X.U*Z.M*X.V' + Z.Up*X.V' + X.U*Z.Vp'. The % latter is an mxn matrix, which could be too large to build % explicitly, and this is why we return a low-rank representation % instead. Note that there are no guarantees on U, S and V other than % that USV' is the desired matrix. In particular, U and V are not (in % general) orthonormal and S is not (in general) diagonal. % (In this implementation, S is identity, but this might change.) M.tangent2ambient = @tangent2ambient; function Zambient = tangent2ambient(X, Z) Zambient.U = [X.U*Z.M + Z.Up, X.U]; Zambient.S = eye(2*k); Zambient.V = [X.V, Z.Vp]; end % This retraction is second order, following general results from % Absil, Malick, "Projection-like retractions on matrix manifolds", % SIAM J. Optim., 22 (2012), pp. 135-158. M.retr = @retraction; function Y = retraction(X, Z, t) if nargin < 3 t = 1.0; end % See personal notes June 28, 2012 (NB) [Qu, Ru] = qr(Z.Up, 0); [Qv, Rv] = qr(Z.Vp, 0); % Calling svds or svd should yield the same result, but BV % advocated svd is more robust, and it doesn't change the % asymptotic complexity to call svd then trim rather than call % svds. Also, apparently Matlab calls ARPACK in a suboptimal way % for svds in this scenario. % [Ut St Vt] = svds([X.S+t*Z.M , t*Rv' ; t*Ru , zeros(k)], k); [Ut, St, Vt] = svd([X.S+t*Z.M , t*Rv' ; t*Ru , zeros(k)]); Y.U = [X.U Qu]*Ut(:, 1:k); Y.V = [X.V Qv]*Vt(:, 1:k); Y.S = St(1:k, 1:k) + eps*eye(k); % equivalent but very slow code % [U S V] = svds(X.U*X.S*X.V' + t*(X.U*Z.M*X.V' + Z.Up*X.V' + X.U*Z.Vp'), k); % Y.U = U; Y.V = V; Y.S = S; end % Orthographic retraction provided by Teng Zhang. One interst of the % orthographic retraction is that if matrices are represented in full % size, it can be computed without any SVDs. If for an application it % makes sense to represent the matrices in full size, this may be a % good idea, but it won't shine in the present implementation of the % manifold. M.retr_ortho = @retraction_orthographic; function Y = retraction_orthographic(X, Z, t) if nargin < 3 t = 1.0; end % First, write Y (the output) as U1*S0*V1', where U1 and V1 are % orthogonal matrices and S0 is of size r by r. [U1, ~] = qr(t*(X.U*Z.M + Z.Up) + X.U*X.S, 0); [V1, ~] = qr(t*(X.V*Z.M' + Z.Vp) + X.V*X.S, 0); S0 = (U1'*X.U)*(X.S + t*Z.M)*(X.V'*V1) + ... t*((U1'*Z.Up)*(X.V'*V1) + (U1'*X.U)*(Z.Vp'*V1)); % Then, obtain the singular value decomposition of Y. [U2, S2, V2] = svd(S0); Y.U = U1*U2; Y.S = S2; Y.V = V1*V2; end M.exp = @exponential; function Y = exponential(X, Z, t) if nargin < 3 t = 1.0; end Y = retraction(X, Z, t); warning('manopt:fixedrankembeddedfactory:exp', ... ['Exponential for fixed rank ' ... 'manifold not implemented yet. Used retraction instead.']); end % Less safe but much faster checksum, June 24, 2014. % Older version right below. M.hash = @(X) ['z' hashmd5([sum(X.U(:)) ; sum(X.S(:)); sum(X.V(:)) ])]; %M.hash = @(X) ['z' hashmd5([X.U(:) ; X.S(:) ; X.V(:)])]; M.rand = @random; % Factors U and V live on Stiefel manifolds, hence we will reuse % their random generator. stiefelm = stiefelfactory(m, k); stiefeln = stiefelfactory(n, k); function X = random() X.U = stiefelm.rand(); X.V = stiefeln.rand(); X.S = diag(sort(rand(k, 1), 1, 'descend')); end % Generate a random tangent vector at X. % TODO: consider a possible imbalance between the three components Up, % Vp and M, when m, n and k are widely different (which is typical). M.randvec = @randomvec; function Z = randomvec(X) Z.Up = randn(m, k); Z.Vp = randn(n, k); Z.M = randn(k); Z = tangent(X, Z); nrm = M.norm(X, Z); Z.Up = Z.Up / nrm; Z.Vp = Z.Vp / nrm; Z.M = Z.M / nrm; end M.lincomb = @lincomb; M.zerovec = @(X) struct('Up', zeros(m, k), 'M', zeros(k, k), ... 'Vp', zeros(n, k)); % New vector transport on June 24, 2014 (as indicated by Bart) % Reference: Absil, Mahony, Sepulchre 2008 section 8.1.3: % For Riemannian submanifolds of a Euclidean space, it is acceptable to % transport simply by orthogonal projection of the tangent vector % translated in the ambient space. M.transp = @project_tangent; function Z2 = project_tangent(X1, X2, Z1) Z2 = projection(X2, tangent2ambient(X1, Z1)); end M.vec = @vec; function Zvec = vec(X, Z) Zamb = tangent2ambient(X, Z); Zamb_mat = Zamb.U*Zamb.S*Zamb.V'; Zvec = Zamb_mat(:); end M.mat = @(X, Zvec) projection(X, reshape(Zvec, [m, n])); M.vecmatareisometries = @() true; end % Linear combination of tangent vectors function d = lincomb(x, a1, d1, a2, d2) %#ok if nargin == 3 d.Up = a1*d1.Up; d.Vp = a1*d1.Vp; d.M = a1*d1.M; elseif nargin == 5 d.Up = a1*d1.Up + a2*d2.Up; d.Vp = a1*d1.Vp + a2*d2.Vp; d.M = a1*d1.M + a2*d2.M; else error('fixedrank.lincomb takes either 3 or 5 inputs.'); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/fixedrank/fixedrankfactory_2factors.m ================================================ function M = fixedrankfactory_2factors(m, n, k) % Manifold of m-by-n matrices of rank k with balanced quotient geometry. % % function M = fixedrankfactory_2factors(m, n, k) % % The first-order geometry follows the balanced quotient geometry described % in the paper, % "Linear regression under fixed-rank constraints: a Riemannian approach", % G. Meyer, S. Bonnabel and R. Sepulchre, ICML 2011. % % Paper link: http://www.icml-2011.org/papers/350_icmlpaper.pdf. % % The second-order geometry follows from the paper % "Fixed-rank matrix factorizations and Riemannian low-rank optimization", % B. Mishra, R. Meyer, S. Bonnabel and R. Sepulchre, % Computational Statistics, 29(3 - 4), pp. 591 - 621, 2014. % % A point X on the manifold is represented as a structure with two % fields: L and R. The matrices L (mxk) and R (nxk) are full column-rank % matrices such that X = L*R'. % % Tangent vectors are represented as a structure with two fields: L, R. % % For first-order geometry, please cite the Manopt paper as well as the research paper: % @InProceedings{meyer2011linear, % Title = {Linear regression under fixed-rank constraints: a {R}iemannian approach}, % Author = {Meyer, G. and Bonnabel, S. and Sepulchre, R.}, % Booktitle = {{28th International Conference on Machine Learning}}, % Year = {2011}, % Organization = {{ICML}} % } % % For second-order geometry, please cite the Manopt paper as well as the research paper: % @Article{mishra2014fixedrank, % Title = {Fixed-rank matrix factorizations and {Riemannian} low-rank optimization}, % Author = {Mishra, B. and Meyer, G. and Bonnabel, S. and Sepulchre, R.}, % Journal = {Computational Statistics}, % Year = {2014}, % Number = {3-4}, % Pages = {591--621}, % Volume = {29}, % Doi = {10.1007/s00180-013-0464-z} % } % % % See also fixedrankembeddedfactory fixedrankfactory_3factors fixedrankfactory_2factors_preconditioned % This file is part of Manopt: www.manopt.org. % Original author: Bamdev Mishra, Dec. 30, 2012. % Contributors: % Change log: % % July 10, 2013 (NB): % Added vec, mat, tangent, tangent2ambient. % % July 03, 2015 (BM): % Cosmetic changes including avoiding storing the inverse of a % k-by-k matrix. M.name = @() sprintf('LR'' quotient manifold of %dx%d matrices of rank %d', m, n, k); M.dim = @() (m+n-k)*k; % Some precomputations at the point X to be used in the inner product % (and pretty much everywhere else). function X = prepare(X) if ~all(isfield(X,{'LtL','RtR'})) L = X.L; R = X.R; X.LtL = L'*L; X.RtR = R'*R; end end % Choice of the metric is motivated by the symmetry present in the % space. The metric is the natural Grassmannian metric on L and R. M.inner = @iproduct; function ip = iproduct(X, eta, zeta) X = prepare(X); ip = trace(X.LtL\(eta.L'*zeta.L)) + trace( X.RtR\(eta.R'*zeta.R)); end M.norm = @(X, eta) sqrt(M.inner(X, eta, eta)); M.dist = @(x, y) error('fixedrankfactory_2factors.dist not implemented yet.'); M.typicaldist = @() 10*k; symm = @(M) .5*(M+M'); M.egrad2rgrad = @egrad2rgrad; function rgrad = egrad2rgrad(X, egrad) X = prepare(X); rgrad.L = egrad.L*X.LtL; rgrad.R = egrad.R*X.RtR; end M.ehess2rhess = @ehess2rhess; function Hess = ehess2rhess(X, egrad, ehess, eta) X = prepare(X); % Riemannian gradient computation. rgrad = egrad2rgrad(X, egrad); % Directional derivative of the Riemannian gradient. Hess.L = ehess.L*X.LtL + 2*egrad.L*symm(eta.L'*X.L); Hess.R = ehess.R*X.RtR + 2*egrad.R*symm(eta.R'*X.R); % We need a correction term for the non-constant metric. Hess.L = Hess.L - rgrad.L*(X.LtL\(symm(X.L'*eta.L))) - eta.L*(X.LtL\(symm(X.L'*rgrad.L))) + X.L*(X.LtL\(symm(eta.L'*rgrad.L))); Hess.R = Hess.R - rgrad.R*(X.RtR\(symm(X.R'*eta.R))) - eta.R*(X.RtR\(symm(X.R'*rgrad.R))) + X.R*(X.RtR\(symm(eta.R'*rgrad.R))); % Projection onto the horizontal space. Hess = M.proj(X, Hess); end M.proj = @projection; % Projection of the vector eta in the ambient space onto the horizontal space. function etaproj = projection(X, eta) X = prepare(X); SS = (X.LtL)*(X.RtR); AS = (X.LtL)*(X.R'*eta.R) - (eta.L'*X.L)*(X.RtR); Omega = lyap(SS, SS,-AS); etaproj.L = eta.L + X.L*Omega'; etaproj.R = eta.R - X.R*Omega; end M.tangent = M.proj; M.tangent2ambient = @(X, eta) eta; M.retr = @retraction; function Y = retraction(X, eta, t) if nargin < 3 t = 1.0; end Y.L = X.L + t*eta.L; Y.R = X.R + t*eta.R; % Numerical conditioning step: A simpler version. % We need to ensure that L and R do not have very relative % skewed norms. scaling = norm(X.L, 'fro')/norm(X.R, 'fro'); scaling = sqrt(scaling); Y.L = Y.L / scaling; Y.R = Y.R * scaling; % These are reused in the computation of the gradient and Hessian. Y = prepare(Y); end M.exp = @exponential; function Y = exponential(X, eta, t) if nargin < 3 t = 1.0; end Y = retraction(X, eta, t); warning('manopt:fixedrankfactory_2factors:exp', ... ['Exponential for fixed rank ' ... 'manifold not implemented yet. Used retraction instead.']); end M.hash = @(X) ['z' hashmd5([X.L(:) ; X.R(:)])]; M.rand = @random; function X = random() % A random point on the total space. X.L = randn(m, k); X.R = randn(n, k); X = prepare(X); end M.randvec = @randomvec; function eta = randomvec(X) % A random vector in the horizontal space. eta.L = randn(m, k); eta.R = randn(n, k); eta = projection(X, eta); nrm = M.norm(X, eta); eta.L = eta.L / nrm; eta.R = eta.R / nrm; end M.lincomb = @lincomb; M.zerovec = @(X) struct('L', zeros(m, k),'R', zeros(n, k)); M.transp = @(x1, x2, d) projection(x2, d); % vec and mat are not isometries, because of the unusual inner metric. M.vec = @(X, U) [U.L(:) ; U.R(:)]; M.mat = @(X, u) struct('L', reshape(u(1:(m*k)), m, k), ... 'R', reshape(u((m*k+1):end), n, k)); M.vecmatareisometries = @() false; end % Linear combination of tangent vectors. function d = lincomb(x, a1, d1, a2, d2) %#ok if nargin == 3 d.L = a1*d1.L; d.R = a1*d1.R; elseif nargin == 5 d.L = a1*d1.L + a2*d2.L; d.R = a1*d1.R + a2*d2.R; else error('Bad use of fixedrankfactory_2factors.lincomb.'); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/fixedrank/fixedrankfactory_2factors_preconditioned.m ================================================ function M = fixedrankfactory_2factors_preconditioned(m, n, k) % Manifold of m-by-n matrices of rank k with two factor quotient geometry. % % function M = fixedrankfactory_2factors_preconditioned(m, n, k) % % This geometry is tuned to least-squares problems such as low-rank matrix % completion with ell-2 loss. % % A point X on the manifold is represented as a structure with two % fields: L and R. The matrices L (m-by-k) and R (n-by-k) are % full column-rank matrices such that X = L*R'. % % Tangent vectors are represented as a structure with two fields: L, R. % % Please cite the Manopt paper as well as the research paper: % @Techreport{mishra2012optimized, % Title = {A {R}iemannian geometry for low-rank matrix completion}, % Author = {Mishra, B. and Adithya Apuroop, K. and Sepulchre, R.}, % Journal = {Arxiv preprint arXiv:1211.1550}, % Year = {2012} % } % % % See also: fixedrankembeddedfactory fixedrankfactory_2factors fixedrankfactory_3factors_preconditioned % This file is part of Manopt: www.manopt.org. % Original author: Bamdev Mishra, Dec. 30, 2012. % Contributors: % Change log: % % April 04, 2015 (BM): % Cosmetic changes including avoiding storing the inverse of a % k-by-k matrix. M.name = @() sprintf('LR''(tuned to least square problems) quotient manifold of %dx%d matrices of rank %d', m, n, k); M.dim = @() (m+n-k)*k; % Some precomputations at the point X to be used in the inner product % (and pretty much everywhere else). function X = prepare(X) if ~all(isfield(X,{'LtL','RtR'})) L = X.L; R = X.R; X.LtL = L'*L; X.RtR = R'*R; end end % The choice of metric is motivated by symmetry and % tuned to least-squares cost function. M.inner = @iproduct; function ip = iproduct(X, eta, zeta) X = prepare(X); ip = trace(X.RtR*(eta.L'*zeta.L)) + trace(X.LtL*(eta.R'*zeta.R)); % Scaled metric end M.norm = @(X, eta) sqrt(M.inner(X, eta, eta)); M.dist = @(x, y) error('fixedrankfactory_2factors_preconditioned.dist not implemented yet.'); M.typicaldist = @() 10*k; M.egrad2rgrad = @egrad2rgrad; function rgrad = egrad2rgrad(X, egrad) X = prepare(X); % Riemannian gradient rgrad.L = egrad.L/X.RtR; rgrad.R = egrad.R/X.LtL; end M.ehess2rhess = @ehess2rhess; function Hess = ehess2rhess(X, egrad, ehess, eta) X = prepare(X); % Riemannian gradient. rgrad = egrad2rgrad(X, egrad); % Directional derivative of the Riemannian gradient. Hess.L = ehess.L/X.RtR - 2*egrad.L*(X.RtR \ (symm(eta.R'*X.R) / X.RtR)); Hess.R = ehess.R/X.LtL - 2*egrad.R*(X.LtL \ (symm(eta.L'*X.L) / X.LtL)); % We still need a correction factor for the non-constant metric. Hess.L = Hess.L + rgrad.L*(symm(eta.R'*X.R)/X.RtR) + eta.L*(symm(rgrad.R'*X.R)/X.RtR) - X.L*(symm(eta.R'*rgrad.R)/X.RtR); Hess.R = Hess.R + rgrad.R*(symm(eta.L'*X.L)/X.LtL) + eta.R*(symm(rgrad.L'*X.L)/X.LtL) - X.R*(symm(eta.L'*rgrad.L)/X.LtL); % Project on the horizontal space. Hess = M.proj(X, Hess); end M.proj = @projection; function etaproj = projection(X, eta) X = prepare(X); % Projection onto the horizontal space. Lambda = 0.5*((eta.R'*X.R)/X.RtR - X.LtL\(X.L'*eta.L)); etaproj.L = eta.L + X.L*Lambda; etaproj.R = eta.R - X.R*Lambda'; end M.tangent = M.proj; M.tangent2ambient = @(X, eta) eta; M.retr = @retraction; function Y = retraction(X, eta, t) if nargin < 3 t = 1.0; end Y.L = X.L + t*eta.L; Y.R = X.R + t*eta.R; % Numerical conditioning step: a simpler version. % We need to ensure that L and R are do not have very relative % skewed norms. scaling = norm(X.L, 'fro')/norm(X.R, 'fro'); scaling = sqrt(scaling); Y.L = Y.L / scaling; Y.R = Y.R * scaling; % These are reused in the computations of gradient and Hessian. Y = prepare(Y); end M.exp = @exponential; function Y = exponential(X, eta, t) if nargin < 3 t = 1.0; end Y = retraction(X, eta, t); warning('manopt:fixedrankfactory_2factors_preconditioned:exp', ... ['Exponential for fixed rank ' ... 'manifold not implemented yet. Used retraction instead.']); end M.hash = @(X) ['z' hashmd5([X.L(:) ; X.R(:)])]; M.rand = @random; function X = random() X.L = randn(m, k); X.R = randn(n, k); end M.randvec = @randomvec; function eta = randomvec(X) eta.L = randn(m, k); eta.R = randn(n, k); eta = projection(X, eta); nrm = M.norm(X, eta); eta.L = eta.L / nrm; eta.R = eta.R / nrm; end M.lincomb = @lincomb; M.zerovec = @(X) struct('L', zeros(m, k),'R', zeros(n, k)); M.transp = @(x1, x2, d) projection(x2, d); % vec and mat are not isometries, because of the scaled inner metric. M.vec = @(X, U) [U.L(:) ; U.R(:)]; M.mat = @(X, u) struct('L', reshape(u(1:(m*k)), m, k), ... 'R', reshape(u((m*k+1):end), n, k)); M.vecmatareisometries = @() false; % Auxiliary functions symm = @(M) .5*(M+M'); end % Linear combination of tangent vectors. function d = lincomb(x, a1, d1, a2, d2) %#ok if nargin == 3 d.L = a1*d1.L; d.R = a1*d1.R; elseif nargin == 5 d.L = a1*d1.L + a2*d2.L; d.R = a1*d1.R + a2*d2.R; else error('Bad use of fixedrankfactory_2factors_preconditioned.lincomb.'); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/fixedrank/fixedrankfactory_2factors_subspace_projection.m ================================================ function M = fixedrankfactory_2factors_subspace_projection(m, n, k) % Manifold of m-by-n matrices of rank k with two factor quotient geometry. % % function M = fixedrankfactory_2factors_subspace_projection(m, n, k) % % A point X on the manifold is represented as a structure with two % fields: L and R. The matrix L (mxk) is orthonormal, % while the matrix R (nxk) is a full column-rank % matrix such that X = L*R'. % % Tangent vectors are represented as a structure with two fields: L, R. % % Note: L is orthonormal, i.e., columns are orthogonal to each other. % Such a geometry might be of interest where the left factor has a % subspace interpretation. A motivation is in Sections 3.3 and 6.4 of the % paper below. % % Please cite the Manopt paper as well as the research paper: % @Article{mishra2014fixedrank, % Title = {Fixed-rank matrix factorizations and {Riemannian} low-rank optimization}, % Author = {Mishra, B. and Meyer, G. and Bonnabel, S. and Sepulchre, R.}, % Journal = {Computational Statistics}, % Year = {2014}, % Number = {3-4}, % Pages = {591--621}, % Volume = {29}, % Doi = {10.1007/s00180-013-0464-z} % } % % See also: fixedrankfactory_2factors fixedrankembeddedfactory fixedrankfactory_2factors_preconditioned % This file is part of Manopt: www.manopt.org. % Original author: Bamdev Mishra, Dec. 30, 2012. % Contributors: % Change log: M.name = @() sprintf('LR'' quotient manifold of %dx%d matrices of rank %d', m, n, k); M.dim = @() (m+n-k)*k; % Some precomputations at the point X to be used in the inner product (and % pretty much everywhere else). function X = prepare(X) if ~all(isfield(X,{'RtR'}) == 1) X.RtR = X.R'*X.R; end end % The choice of the metric is motivated by symmetry and scale % invariance in the total space. M.inner = @iproduct; function ip = iproduct(X, eta, zeta) X = prepare(X); ip = eta.L(:).'*zeta.L(:) + trace(X.RtR\(eta.R'*zeta.R)); end M.norm = @(X, eta) sqrt(M.inner(X, eta, eta)); M.dist = @(x, y) error('fixedrankfactory_2factors_subspace_projection.dist not implemented yet.'); M.typicaldist = @() 10*k; skew = @(X) .5*(X-X'); symm = @(X) .5*(X+X'); stiefel_proj = @(L, H) H - L*symm(L'*H); M.egrad2rgrad = @egrad2rgrad; function rgrad = egrad2rgrad(X, egrad) X = prepare(X); rgrad.L = stiefel_proj(X.L, egrad.L); rgrad.R = egrad.R*X.RtR; end M.ehess2rhess = @ehess2rhess; function Hess = ehess2rhess(X, egrad, ehess, eta) X = prepare(X); % Riemannian gradient. rgrad = egrad2rgrad(X, egrad); % Directional derivative of the Riemannian gradient. Hess.L = ehess.L - eta.L*symm(X.L'*egrad.L); Hess.L = stiefel_proj(X.L, Hess.L); Hess.R = ehess.R*X.RtR + 2*egrad.R*symm(eta.R'*X.R); % Correction factor for the non-constant metric on the factor R. Hess.R = Hess.R - rgrad.R*(X.RtR\(symm(X.R'*eta.R))) - eta.R*(X.RtR\(symm(X.R'*rgrad.R))) + X.R*(X.RtR\(symm(eta.R'*rgrad.R))); % Projection onto the horizontal space. Hess = M.proj(X, Hess); end M.proj = @projection; function etaproj = projection(X, eta) X = prepare(X); eta.L = stiefel_proj(X.L, eta.L); % On the tangent space. SS = X.RtR; AS1 = 2*X.RtR*skew(X.L'*eta.L)*X.RtR; AS2 = 2*skew(X.RtR*(X.R'*eta.R)); AS = skew(AS1 + AS2); Omega = nested_sylvester(SS,AS); etaproj.L = eta.L - X.L*Omega; etaproj.R = eta.R - X.R*Omega; end M.tangent = M.proj; M.tangent2ambient = @(X, eta) eta; M.retr = @retraction; function Y = retraction(X, eta, t) if nargin < 3 t = 1.0; end Y.L = uf(X.L + t*eta.L); Y.R = X.R + t*eta.R; % These are reused in the computation of the gradient and Hessian. Y = prepare(Y); end M.exp = @exponential; function R = exponential(X, eta, t) if nargin < 3 t = 1.0; end R = retraction(X, eta, t); warning('manopt:fixedrankfactory_2factors_subspace_projection:exp', ... ['Exponential for fixed rank ' ... 'manifold not implemented yet. Lsed retraction instead.']); end M.hash = @(X) ['z' hashmd5([X.L(:) ; X.R(:)])]; M.rand = @random; % Factors L lives on Stiefel manifold, hence we will reuse % its random generator. stiefelm = stiefelfactory(m, k); function X = random() X.L = stiefelm.rand(); X.R = randn(n, k); end M.randvec = @randomvec; function eta = randomvec(X) eta.L = randn(m, k); eta.R = randn(n, k); eta = projection(X, eta); nrm = M.norm(X, eta); eta.L = eta.L / nrm; eta.R = eta.R / nrm; end M.lincomb = @lincomb; M.zerovec = @(X) struct('L', zeros(m, k),... 'R', zeros(n, k)); M.transp = @(x1, x2, d) projection(x2, d); % vec and mat are not isometries, because of the scaled inner metric. M.vec = @(X, U) [U.L(:) ; U.R(:)]; M.mat = @(X, u) struct('L', reshape(u(1:(m*k)), m, k), ... 'R', reshape(u((m*k+1):end), n, k)); M.vecmatareisometries = @() false; end % Linear combination of tangent vectors. function d = lincomb(x, a1, d1, a2, d2) %#ok if nargin == 3 d.L = a1*d1.L; d.R = a1*d1.R; elseif nargin == 5 d.L = a1*d1.L + a2*d2.L; d.R = a1*d1.R + a2*d2.R; else error('Bad use of fixedrankfactory_2factors_subspace_projection.lincomb.'); end end function A = uf(A) [L, unused, R] = svd(A, 0); %#ok A = L*R'; end function omega = nested_sylvester(sym_mat, asym_mat) % omega=nested_sylvester(sym_mat,asym_mat) % This function solves the system of nested Sylvester equations: % % X*sym_mat + sym_mat*X = asym_mat % Omega*sym_mat+sym_mat*Omega = X % Mishra, Meyer, Bonnabel and Sepulchre, 'Fixed-rank matrix factorizations and Riemannian low-rank optimization' % Uses built-in lyap function, but does not exploit the fact that it's % twice the same sym_mat matrix that comes into play. X = lyap(sym_mat, -asym_mat); omega = lyap(sym_mat, -X); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/fixedrank/fixedrankfactory_3factors.m ================================================ function M = fixedrankfactory_3factors(m, n, k) % Manifold of m-by-n matrices of rank k with polar quotient geometry. % % function M = fixedrankfactory_3factors(m, n, k) % % The first-order geometry follows the balanced quotient geometry described % in the paper, % "Linear regression under fixed-rank constraints: a Riemannian approach", % G. Meyer, S. Bonnabel and R. Sepulchre, ICML 2011. % % Paper link: http://www.icml-2011.org/papers/350_icmlpaper.pdf. % % The second-order geometry follows from the paper % "Fixed-rank matrix factorizations and Riemannian low-rank optimization", % B. Mishra, R. Meyer, S. Bonnabel and R. Sepulchre, % Computational Statistics, 29(3 - 4), pp. 591 - 621, 2014. % % A point X on the manifold is represented as a structure with three % fields: L, S and R. The matrices L (mxk) and R (nxk) are orthonormal, % while the matrix S (kxk) is a symmetric positive definite full rank % matrix. % % Tangent vectors are represented as a structure with three fields: L, S % and R. % % % For first-order geometry, please cite the Manopt paper as well as the research paper: % @InProceedings{meyer2011linear, % Title = {Linear regression under fixed-rank constraints: a {R}iemannian approach}, % Author = {Meyer, G. and Bonnabel, S. and Sepulchre, R.}, % Booktitle = {{28th International Conference on Machine Learning}}, % Year = {2011}, % Organization = {{ICML}} % } % For second-order geometry, please cite the Manopt paper as well as the research paper: % @Article{mishra2014fixedrank, % Title = {Fixed-rank matrix factorizations and {Riemannian} low-rank optimization}, % Author = {Mishra, B. and Meyer, G. and Bonnabel, S. and Sepulchre, R.}, % Journal = {Computational Statistics}, % Year = {2014}, % Number = {3-4}, % Pages = {591--621}, % Volume = {29}, % Doi = {10.1007/s00180-013-0464-z} % } % % % See also fixedrankembeddedfactory fixedrankfactory_2factors fixedrankfactory_3factors_preconditioned % This file is part of Manopt: www.manopt.org. % Original author: Bamdev Mishra, Dec. 30, 2012. % Contributors: % Change log: M.name = @() sprintf('LSR'' quotient manifold of %dx%d matrices of rank %d', m, n, k); M.dim = @() (m+n-k)*k; % Choice of the metric on the orthnormal space is motivated by the symmetry present in the % space. The metric on the positive definite space is its natural metric. M.inner = @(X, eta, zeta) eta.L(:).'*zeta.L(:) + eta.R(:).'*zeta.R(:) ... + trace( (X.S\eta.S) * (X.S\zeta.S) ); M.norm = @(X, eta) sqrt(M.inner(X, eta, eta)); M.dist = @(x, y) error('fixedrankfactory_3factors.dist not implemented yet.'); M.typicaldist = @() 10*k; skew = @(X) .5*(X-X'); symm = @(X) .5*(X+X'); stiefel_proj = @(L, H) H - L*symm(L'*H); M.egrad2rgrad = @egrad2rgrad; function rgrad = egrad2rgrad(X, egrad) rgrad.L = stiefel_proj(X.L, egrad.L); rgrad.S = X.S*symm(egrad.S)*X.S; rgrad.R = stiefel_proj(X.R, egrad.R); end M.ehess2rhess = @ehess2rhess; function Hess = ehess2rhess(X, egrad, ehess, eta) % Riemannian gradient for the factor S. rgrad.S = X.S*symm(egrad.S)*X.S; % Directional derivatives of the Riemannian gradient. Hess.L = ehess.L - eta.L*symm(X.L'*egrad.L); Hess.L = stiefel_proj(X.L, Hess.L); Hess.R = ehess.R - eta.R*symm(X.R'*egrad.R); Hess.R = stiefel_proj(X.R, Hess.R); Hess.S = X.S*symm(ehess.S)*X.S + 2*symm(eta.S*symm(egrad.S)*X.S); % Correction factor for the non-constant metric on the factor S. Hess.S = Hess.S - symm(eta.S*(X.S\rgrad.S)); % Projection onto the horizontal space. Hess = M.proj(X, Hess); end M.proj = @projection; function etaproj = projection(X, eta) % First, projection onto the tangent space of the total space. eta.L = stiefel_proj(X.L, eta.L); eta.R = stiefel_proj(X.R, eta.R); eta.S = symm(eta.S); % Then, projection onto the horizontal space. SS = X.S*X.S; AS = X.S*(skew(X.L'*eta.L) + skew(X.R'*eta.R) - 2*skew(X.S\eta.S))*X.S; omega = lyap(SS, -AS); etaproj.L = eta.L - X.L*omega; etaproj.S = eta.S - (X.S*omega - omega*X.S); etaproj.R = eta.R - X.R*omega; end M.tangent = M.proj; M.tangent2ambient = @(X, eta) eta; M.retr = @retraction; function Y = retraction(X, eta, t) if nargin < 3 t = 1.0; end L = chol(X.S); Y.S = L'*expm(L'\(t*eta.S)/L)*L; Y.L = uf(X.L + t*eta.L); Y.R = uf(X.R + t*eta.R); end M.exp = @exponential; function Y = exponential(X, eta, t) if nargin < 3 t = 1.0; end Y = retraction(X, eta, t); warning('manopt:fixedrankfactory_3factors:exp', ... ['Exponential for fixed rank ' ... 'manifold not implemented yet. Lsed retraction instead.']); end M.hash = @(X) ['z' hashmd5([X.L(:) ; X.S(:) ; X.R(:)])]; M.rand = @random; % Factors L and R are on Stiefel manifolds, hence we reuse % their random generators. stiefelm = stiefelfactory(m, k); stiefeln = stiefelfactory(n, k); function X = random() X.L = stiefelm.rand(); X.R = stiefeln.rand(); X.S = diag(1+rand(k, 1)); end M.randvec = @randomvec; function eta = randomvec(X) % A random vector on the horizontal space. eta.L = randn(m, k); eta.R = randn(n, k); eta.S = randn(k, k); eta = projection(X, eta); nrm = M.norm(X, eta); eta.L = eta.L / nrm; eta.R = eta.R / nrm; eta.S = eta.S / nrm; end M.lincomb = @lincomb; M.zerovec = @(X) struct('L', zeros(m, k), 'S', zeros(k, k), ... 'R', zeros(n, k)); M.transp = @(x1, x2, d) projection(x2, d); % vec and mat are not isometries, because of the scaled inner metric. M.vec = @(X, U) [U.L(:) ; U.S(:); U.R(:)]; M.mat = @(X, u) struct('L', reshape(u(1:(m*k)), m, k), ... 'S', reshape(u((m*k+1): m*k + k*k), k, k), ... 'R', reshape(u((m*k+ k*k + 1):end), n, k)); M.vecmatareisometries = @() false; end % Linear combination of tangent vectors. function d = lincomb(x, a1, d1, a2, d2) %#ok if nargin == 3 d.L = a1*d1.L; d.R = a1*d1.R; d.S = a1*d1.S; elseif nargin == 5 d.L = a1*d1.L + a2*d2.L; d.R = a1*d1.R + a2*d2.R; d.S = a1*d1.S + a2*d2.S; else error('Bad use of fixedrankfactory_3factors.lincomb.'); end end function A = uf(A) [L, unused, R] = svd(A, 0); %#ok A = L*R'; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/fixedrank/fixedrankfactory_3factors_preconditioned.m ================================================ function M = fixedrankfactory_3factors_preconditioned(m, n, k) % Manifold of m-by-n matrices of rank k with three factor quotient geometry. % % function M = fixedrankfactory_3factors_preconditioned(m, n, k) % % This geometry is tuned to least squares problems such as low-rank matrix % completion with ell-2 loss. % % A point X on the manifold is represented as a structure with three % fields: L, S and R. The matrices L (mxk) and R (nxk) are orthonormal, % while the matrix S (kxk) is a full rank matrix such that X = L*S*R'. % % Tangent vectors are represented as a structure with three fields: L, S % and R. % % Please cite the Manopt paper as well as the research paper: % @InProceedings{mishra2014r3mc, % Title = {{R3MC}: A {R}iemannian three-factor algorithm for low-rank matrix completion}, % Author = {Mishra, B. and Sepulchre, R.}, % Booktitle = {{53rd IEEE Conference on Decision and Control}}, % Year = {2014}, % Organization = {{IEEE CDC}} % } % % % See also: fixedrankfactory_3factors fixedrankfactory_2factors_preconditioned % This file is part of Manopt: www.manopt.org. % Original author: Bamdev Mishra, Dec. 30, 2012. % Contributors: % Change log: % % April 04, 2015 (BM): % Cosmetic changes including avoiding storing the inverse of a kxk matrix. M.name = @() sprintf('LSR'' (tuned for least square problems) quotient manifold of %dx%d matrices of rank %d', m, n, k); M.dim = @() (m+n-k)*k; % Some precomputations at the point X that are to be used in the inner product (and % pretty much everywhere else). function X = prepare(X) if ~all(isfield(X,{'StS','SSt'}) == 1) X.SSt = X.S*X.S'; X.StS = X.S'*X.S; end end % The choice of metric is motivated by symmetry and tuned to least square % objective function. M.inner = @iproduct; function ip = iproduct(X, eta, zeta) X = prepare(X); ip = trace(X.SSt*(eta.L'*zeta.L)) + trace(X.StS*(eta.R'*zeta.R)) ... + trace(eta.S'*zeta.S); end M.norm = @(X, eta) sqrt(M.inner(X, eta, eta)); M.dist = @(x, y) error('fixedrankfactory_3factors_preconditioned.dist not implemented yet.'); M.typicaldist = @() 10*k; skew = @(X) .5*(X-X'); symm = @(X) .5*(X+X'); M.egrad2rgrad = @egrad2rgrad; function rgrad = egrad2rgrad(X, egrad) X = prepare(X); SSL = X.SSt; ASL = 2*symm(SSL*(egrad.S*X.S')); SSR = X.StS; ASR = 2*symm(SSR*(egrad.S'*X.S)); [BL, BR] = tangent_space_lyap(X.S, ASL, ASR); % It computes the solution without calling Matlab's Lyap. rgrad.L = (egrad.L - X.L*BL)/X.SSt; rgrad.R = (egrad.R - X.R*BR)/X.StS; rgrad.S = egrad.S; % Debug % BL1 = lyap(SSL, -ASL); % Alternate way % BR1 = lyap(SSR, -ASR); % norm(skew(X.SSt*(rgrad.L'*X.L) + rgrad.S*X.S'), 'fro') % norm(skew(X.StS*(rgrad.R'*X.R) - X.S'*rgrad.S), 'fro') end M.ehess2rhess = @ehess2rhess; function Hess = ehess2rhess(X, egrad, ehess, eta) X = prepare(X); % Riemannian gradient. SSL = X.SSt; ASL = 2*symm(SSL*(egrad.S*X.S')); SSR = X.StS; ASR = 2*symm(SSR*(egrad.S'*X.S)); [BL, BR] = tangent_space_lyap(X.S, ASL, ASR); rgrad.L = (egrad.L - X.L*BL)/X.SSt; rgrad.R = (egrad.R - X.R*BR)/X.StS; rgrad.S = egrad.S; % Directional derivative of the Riemannian gradient. ASLdot = 2*symm((2*symm(X.S*eta.S')*(egrad.S*X.S')) + X.SSt*(ehess.S*X.S' + egrad.S*eta.S')) - 4*symm(symm(eta.S*X.S')*BL); ASRdot = 2*symm((2*symm(X.S'*eta.S)*(egrad.S'*X.S)) + X.StS*(ehess.S'*X.S + egrad.S'*eta.S)) - 4*symm(symm(eta.S'*X.S)*BR); % SSLdot = X.SSt; % SSRdot = X.StS; % BLdot = lyap(SSLdot, -ASLdot); % BRdot = lyap(SSRdot, -ASRdot); [BLdot, BRdot] = tangent_space_lyap(X.S, ASLdot, ASRdot); Hess.L = (ehess.L - eta.L*BL - X.L*BLdot - 2*rgrad.L*symm(eta.S*X.S'))/X.SSt; Hess.R = (ehess.R - eta.R*BR - X.R*BRdot - 2*rgrad.R*symm(eta.S'*X.S))/X.StS; Hess.S = ehess.S; % BM: Till this, everything seems correct. % We still need a correction factor for the non-constant metric % that is imposed. % The computation of the correction factor owes itself to the Koszul formula. % This corresponds to the Riemannian connection in the Euclidean space with the % scaled metric. Hess.L = Hess.L + (eta.L*symm(rgrad.S*X.S') + rgrad.L*symm(eta.S*X.S'))/X.SSt; Hess.R = Hess.R + (eta.R*symm(rgrad.S'*X.S) + rgrad.R*symm(eta.S'*X.S))/X.StS; Hess.S = Hess.S - symm(rgrad.L'*eta.L)*X.S - X.S*symm(rgrad.R'*eta.R); % The Riemannian connection on the quotient space is the % projection of the Riemmian connection in the ambient space onto the tangent space of the total space and % then onto the horizontal space. % This is accomplished by the following operation. Hess = M.proj(X, Hess); % Debug % norm(skew(X.SSt*(Hess.L'*X.L) + Hess.S*X.S')) % norm(skew(X.StS*(Hess.R'*X.R) - X.S'*Hess.S)) end M.proj = @projection; function etaproj = projection(X, eta) X = prepare(X); % First, projection onto the tangent space of the total space. SSL = X.SSt; ASL = 2*symm(X.SSt*(X.L'*eta.L)*X.SSt); BL = lyap(SSL, -ASL); eta.L = eta.L - X.L*(BL/X.SSt); SSR = X.StS; ASR = 2*symm(X.StS*(X.R'*eta.R)*X.StS); BR = lyap(SSR, -ASR); eta.R = eta.R - X.R*(BR/X.StS); % Project onto the horizontal space PU = skew((X.L'*eta.L)*X.SSt) + skew(X.S*eta.S'); PV = skew((X.R'*eta.R)*X.StS) + skew(X.S'*eta.S); [Omega1, Omega2] = coupled_lyap(X.S, PU, PV); % norm(2*skew(Omega1*X.SSt) - PU -(X.S*Omega2*X.S'),'fro' ) % norm(2*skew(Omega2*X.StS) - PV -(X.S'*Omega1*X.S),'fro' ) % etaproj.L = eta.L - (X.L*Omega1); etaproj.S = eta.S - (X.S*Omega2 - Omega1*X.S) ; etaproj.R = eta.R - (X.R*Omega2); % Debug % norm(skew(X.SSt*(etaproj.L'*X.L) + etaproj.S*X.S')) % norm(skew(X.StS*(etaproj.R'*X.R) - X.S'*etaproj.S)) % % norm(skew(X.SSt*(etaproj.L'*X.L) - X.S*etaproj.S')) % norm(skew(X.StS*(etaproj.R'*X.R) + etaproj.S'*X.S)) end M.tangent = M.proj; M.tangent2ambient = @(X, eta) eta; M.retr = @retraction; function Y = retraction(X, eta, t) if nargin < 3 t = 1.0; end Y.S = (X.S + t*eta.S); Y.L = uf((X.L + t*eta.L)); Y.R = uf((X.R + t*eta.R)); Y = prepare(Y); end M.exp = @exponential; function Y = exponential(X, eta, t) if nargin < 3 t = 1.0; end Y = retraction(X, eta, t); warning('manopt:fixedrankfactory_3factors_preconditioned:exp', ... ['Exponential for fixed rank ' ... 'manifold not implemented yet. Used retraction instead.']); end M.hash = @(X) ['z' hashmd5([X.L(:) ; X.S(:) ; X.R(:)])]; M.rand = @random; % Factors L and R live on Stiefel manifolds, hence we will reuse % their random generator. stiefelm = stiefelfactory(m, k); stiefeln = stiefelfactory(n, k); function X = random() X.L = stiefelm.rand(); X.R = stiefeln.rand(); X.S = diag(1+rand(k, 1)); X = prepare(X); end M.randvec = @randomvec; function eta = randomvec(X) % A random vector on the horizontal space eta.L = randn(m, k); eta.R = randn(n, k); eta.S = randn(k, k); eta = projection(X, eta); nrm = M.norm(X, eta); eta.L = eta.L / nrm; eta.R = eta.R / nrm; eta.S = eta.S / nrm; end M.lincomb = @lincomb; M.zerovec = @(X) struct('L', zeros(m, k), 'S', zeros(k, k), ... 'R', zeros(n, k)); M.transp = @(x1, x2, d) projection(x2, d); % vec and mat are not isometries, because of the unusual inner metric. M.vec = @(X, U) [U.L(:) ; U.S(:); U.R(:)]; M.mat = @(X, u) struct('L', reshape(u(1:(m*k)), m, k), ... 'S', reshape(u((m*k+1): m*k + k*k), k, k), ... 'R', reshape(u((m*k+ k*k + 1):end), n, k)); M.vecmatareisometries = @() false; end % Linear combination of tangent vectors function d = lincomb(x, a1, d1, a2, d2) %#ok if nargin == 3 d.L = a1*d1.L; d.R = a1*d1.R; d.S = a1*d1.S; elseif nargin == 5 d.L = a1*d1.L + a2*d2.L; d.R = a1*d1.R + a2*d2.R; d.S = a1*d1.S + a2*d2.S; else error('Bad use of fixedrankfactory_3factors_preconditioned.lincomb.'); end end function A = uf(A) [L, unused, R] = svd(A, 0); %#ok A = L*R'; end function[BU, BV] = tangent_space_lyap(R, E, F) % We intent to solve a linear system RR^T BU + BU RR^T = E % R^T R BV + BV R^T R = F % for BU and BV. % % This can be solved using two calls to the Matlab's lyap. % However, we can still have a more efficient implementation % that does not require the full functionaliyt of Matlab's lyap. [U, Sigma, V] = svd(R); E_mod = U'*E*U; F_mod = V'*F*V; b1 = E_mod(:); b2 = F_mod(:); r = size(Sigma, 1); sig = diag(Sigma); % all the singular values in a vector sig1 = sig*ones(1, r); % columns repeat sig1t = sig1'; % rows repeat s1 = sig1(:); s2 = sig1t(:); % The block elements a = s1.^2 + s2.^2; % a column vector % Solve the linear system of equations cu = b1./a; %a.\b1; cv = b2./a; %a.\b2; % Matricize CU = reshape(cu, r, r); CV = reshape(cv, r, r); % Do the similarity transforms BU = U*CU*U'; BV = V*CV*V'; % %% Debug % % norm(R*R'*BU + BU*R*R' - E, 'fro'); % norm((Sigma.^2)*CU + CU*(Sigma.^2) - E_mod, 'fro'); % norm(a.*cu - b1, 'fro'); % % norm(R'*R*BV + BV*R'*R - F, 'fro'); % % BU1 = lyap(R*R', - E); % norm(R*R'*BU1 + BU1*R*R' - E, 'fro'); % % BV1 = lyap(R'*R, - F); % norm(R'*R*BV1 + BV1*R'*R - F, 'fro'); % % % as accurate as the lyap % norm(BU - BU1, 'fro') % norm(BV - BV1, 'fro') end function[Omega1, Omega2] = coupled_lyap(R, E, F) % We intent to solve the coupled system of Lyapunov equations % % RR^T Omega1 + Omega1 RR^T - R Omega2 R^T = E % R^T R Omega2 + Omega1 R^T R - R^T Omega2 R = F, % % for Omega1 and Omega2, both are skew symmetric matrices. % % Below is an efficient implementation [U, Sigma, V] = svd(R); E_mod = U'*E*U; F_mod = V'*F*V; b1 = E_mod(:); b2 = F_mod(:); r = size(Sigma, 1); sig = diag(Sigma); % All the singular values in a vector sig1 = sig*ones(1, r); % Columns repeat sig1t = sig1'; % Rows repeat s1 = sig1(:); s2 = sig1t(:); % The block elements a = s1.^2 + s2.^2; % A column vector c = s1.*s2; % Solve directly using the formula % A = diag(a); % C = diag(c); % Y1_sol = (A*(C\A) - C) \ (b2 + A*(C\b1)); % Y2_sol = A\(b2 + C*Y1_sol); Y1_sol = (b2 + (a./c).*b1) ./ ((a.^2)./c - c); Y2_sol = (b2 + c.*Y1_sol)./a; % Matricize Omega1 = reshape(Y1_sol, r, r); Omega2 = reshape(Y2_sol, r, r); % Do the similarity transforms Omega1 = U*Omega1*U'; Omega2 = V*Omega2*V'; % %% Debug: whether we have the right solution. % norm(R*R'*Omega1 + Omega1*R*R' - R*Omega2*R' - E, 'fro') % norm(R'*R*Omega2 + Omega2*R'*R - R'*Omega1*R - F, 'fro') end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/fixedranktensors/fixedrankfactory_tucker_preconditioned.m ================================================ function M = fixedrankfactory_tucker_preconditioned(tensor_size, tensor_rank) % Manifold of fixed multilinear rank tensors in Tucker format. % % function M = fixedrankfactory_tucker_preconditioned(tensor_size, tensor_rank) % % n1 = tensor_size(1); % n2 = tensor_size(2); % n3 = tensor_size(3); % r1 = tensor_rank(1); % r2 = tensor_rank(2); % r3 = tensor_rank(3); % % A point X on the manifold is represented as a structure with four % fields: U1, U2, U3 and G. The matrices U1 (n1-by-r1), U2 (n2-by-r2), % and U3 (n3-by-r3) are orthogonal matrices. G (r1-by-r2-by-r3) is a % multidimensional array. % % Tangent vectors are represented as a structure with four fields: % U1, U2, U3, and G. % % We exploit the quotient nature of Tucker decompositions to impose a % scaled inner product on the manifold. This suits least-squares problems. % For details, refer to the technical report: % "{R}iemannian preconditioning for tensor completion", % H. Kasai and B. Mishra, Arxiv preprint arXiv:1506.02159, 2015. % % Paper link: http://arxiv.org/abs/1506.02159. % % Please cite the Manopt paper as well as the research paper: % @TechReport{kasai2015precon, % Title = {{R}iemannian preconditioning for tensor completion}, % Author = {Kasai, H. and Mishra, B.}, % Journal = {Arxiv preprint arXiv:1506.02159}, % Year = {2015} % } % Original authors: Hiroyuki Kasai and Bamdev Mishra, June 5, 2015. % Contributors: % Change log: if length(tensor_rank) > 3 error('Bad usage of fixedrankfactory_tucker_preconditioned. Currently, only handles 3-order tensors.'); end % Tensor size n1 = tensor_size(1); n2 = tensor_size(2); n3 = tensor_size(3); % Core size or multilinear rank r1 = tensor_rank(1); r2 = tensor_rank(2); r3 = tensor_rank(3); speyer1 = speye(r1); % Sparse version of identity that is used in M.proj speyer2 = speye(r2); speyer3 = speye(r3); M.name = @() sprintf('G x U1 x U2 x U3 quotient Tucker manifold of %d-by-%d-by-%d tensor of rank %d-by-%d-by-%d.', n1, n2, n3, r1, r2, r3); M.dim = @() n1*r1-r1^2 + n2*r2-r2^2 + n3*r3-r3^2 + r1*r2*r3; % Some precomputations at point X to be used in the inner product (and % pretty much everywhere else) function X = prepare(X) if ~all(isfield(X,{'G1G1t','G1',... 'G2G2t','G2', ... 'G3G3t','G3'}) == 1) X.G1 = reshape(X.G, r1, r2*r3); X.G1G1t = X.G1*X.G1'; % Positive definite X.G2 = reshape(permute(X.G, [2 1 3]), r2, r1*r3); X.G2G2t = X.G2*X.G2'; % Positive definite X.G3 = reshape(permute(X.G, [3 1 2]), r3, r1*r2); X.G3G3t = X.G3*X.G3'; % Positive definite end end % Choice of metric is motivated by symmetry and tuned to least-squares % cost function M.inner = @iproduct; function ip = iproduct(X, eta, zeta) X = prepare(X); ip = trace(X.G1G1t*(eta.U1'*zeta.U1)) ... + trace(X.G2G2t*(eta.U2'*zeta.U2)) ... + trace(X.G3G3t*(eta.U3'*zeta.U3)) ... + (eta.G(:)'*zeta.G(:)); end M.norm = @(X, eta) sqrt(M.inner(X, eta, eta)); M.dist = @(x, y) error('fixedrankfactory_tucker_preconditioned.dist not implemented yet.'); M.typicaldist = @() 10*n1*r1; % BM: To do skew = @(X) .5*(X-X'); symm = @(X) .5*(X+X'); M.egrad2rgrad = @egrad2rgrad; function rgrad = egrad2rgrad(X, egrad) X = prepare(X); % Reuse already computed terms SSU1 = X.G1G1t; ASU1 = 2*symm(SSU1*(X.U1' * egrad.U1)); SSU2 = X.G2G2t; ASU2 = 2*symm(SSU2*(X.U2' * egrad.U2)); SSU3 = X.G3G3t; ASU3 = 2*symm(SSU3*(X.U3' * egrad.U3)); BU1 = lyap(SSU1, -ASU1); BU2 = lyap(SSU2, -ASU2); BU3 = lyap(SSU3, -ASU3); % The lyap solutions ensure that the Riemannian gradient rgrad % is now on the tangent space. From the Riemannian submersion % theory, it also belongs to the horizontal space. Therefore, % no need to further project it on the horizontal space. rgrad.U1 = (egrad.U1 - X.U1*BU1)/X.G1G1t; rgrad.U2 = (egrad.U2 - X.U2*BU2)/X.G2G2t; rgrad.U3 = (egrad.U3 - X.U3*BU3)/X.G3G3t; rgrad.G = egrad.G; end M.ehess2rhess = @ehess2rhess; function Hess = ehess2rhess(X, egrad, ehess, eta) X = prepare(X); % Reuse already computed terms % Riemannian gradient SSU1 = X.G1G1t; ASU1 = 2*symm(SSU1*(X.U1' * egrad.U1)); SSU2 = X.G2G2t; ASU2 = 2*symm(SSU2*(X.U2' * egrad.U2)); SSU3 = X.G3G3t; ASU3 = 2*symm(SSU3*(X.U3' * egrad.U3)); BU1 = lyap(SSU1, -ASU1); BU2 = lyap(SSU2, -ASU2); BU3 = lyap(SSU3, -ASU3); rgrad.U1 = (egrad.U1 - X.U1*BU1)/X.G1G1t; rgrad.U2 = (egrad.U2 - X.U2*BU2)/X.G2G2t; rgrad.U3 = (egrad.U3 - X.U3*BU3)/X.G3G3t; rgrad.G = egrad.G; % Directional derivative of Riemannian gradient eta_G1 = reshape(eta.G, r1, r2*r3); % double(tenmat(eta.G,1)); eta_G2 = reshape(permute(eta.G, [2 1 3]), r2, r1*r3); % double(tenmat(eta.G,2)); eta_G3 = reshape(permute(eta.G, [3 1 2]), r3, r1*r2); % double(tenmat(eta.G,3)); egrad_G1 = reshape(egrad.G, r1, r2*r3); % double(tenmat(egrad.G,1)); egrad_G2 = reshape(permute(egrad.G, [2 1 3]), r2, r1*r3); % double(tenmat(egrad.G,2)); egrad_G3 = reshape(permute(egrad.G, [3 1 2]), r3, r1*r2); % double(tenmat(egrad.G,3)); ehess_G1 = reshape(ehess.G, r1, r2*r3); % double(tenmat(ehess.G,1)); ehess_G2 = reshape(permute(ehess.G, [2 1 3]), r2, r1*r3); % double(tenmat(ehess.G,2)); ehess_G3 = reshape(permute(ehess.G, [3 1 2]), r3, r1*r2); % double(tenmat(ehess.G,3)); rgrad_G1 = reshape(rgrad.G, r1, r2*r3); % double(tenmat(rgrad.G,1)); rgrad_G2 = reshape(permute(rgrad.G, [2 1 3]), r2, r1*r3); % double(tenmat(rgrad.G,2)); rgrad_G3 = reshape(permute(rgrad.G, [3 1 2]), r3, r1*r2); % double(tenmat(rgrad.G,3)); ASU1dot = 2*symm((2*symm(X.G1*eta_G1')*(egrad_G1*X.G1')) + X.G1G1t*(ehess_G1*X.G1' + egrad_G1*eta_G1')) - 4*symm(symm(eta_G1*X.G1')*BU1); ASU2dot = 2*symm((2*symm(X.G2*eta_G2')*(egrad_G2*X.G2')) + X.G2G2t*(ehess_G2*X.G2' + egrad_G2*eta_G2')) - 4*symm(symm(eta_G2*X.G2')*BU2); ASU3dot = 2*symm((2*symm(X.G3*eta_G3')*(egrad_G3*X.G3')) + X.G3G3t*(ehess_G3*X.G3' + egrad_G3*eta_G3')) - 4*symm(symm(eta_G3*X.G3')*BU3); SSU1dot = X.G1G1t; SSU2dot = X.G2G2t; SSU3dot = X.G3G3t; BU1dot = lyap(SSU1dot, -ASU1dot); BU2dot = lyap(SSU2dot, -ASU2dot); BU3dot = lyap(SSU3dot, -ASU3dot); Hess.U1 = (ehess.U1 - eta.U1*BU1 - X.U1*BU1dot - 2*rgrad.U1*symm(eta_G1*X.G1'))/X.G1G1t; Hess.U2 = (ehess.U2 - eta.U2*BU2 - X.U2*BU2dot - 2*rgrad.U2*symm(eta_G2*X.G2'))/X.G2G2t; Hess.U3 = (ehess.U3 - eta.U3*BU3 - X.U3*BU3dot - 2*rgrad.U3*symm(eta_G3*X.G3'))/X.G3G3t; Hess.G = ehess.G; % BM: we need a correction factor for the non-constant metric % The correction factor owes itself to the Koszul formula. % This is the Riemannian connection in the Euclidean space with the % scaled metric. Hess.U1 = Hess.U1 + (eta.U1*symm(rgrad_G1*X.G1') + rgrad.U1*symm(eta_G1*X.G1'))/X.G1G1t; Hess.U2 = Hess.U2 + (eta.U2*symm(rgrad_G2*X.G2') + rgrad.U2*symm(eta_G2*X.G2'))/X.G2G2t; Hess.U3 = Hess.U3 + (eta.U3*symm(rgrad_G3*X.G3') + rgrad.U3*symm(eta_G3*X.G3'))/X.G3G3t; Hess.G = Hess.G - permute(reshape(symm(rgrad.U1'*eta.U1)*X.G1,r1,r2,r3), [1 2 3]) ... - permute(reshape(symm(rgrad.U2'*eta.U2)*X.G2,r2,r1,r3), [2 1 3]) ... - permute(reshape(symm(rgrad.U3'*eta.U3)*X.G3,r3,r1,r2), [2 3 1]); % The Riemannian connection on the quotient space is the % projection on the tangent space of the total space and then onto the horizontal % space. This is accomplished with the following operation. Hess = M.proj(X, Hess); end M.proj = @projection; function etaproj = projection(X, eta) X = prepare(X); % Reuse already computed terms % First, projection onto tangent space of total space SSU1 = X.G1G1t; ASU1 = 2*symm(X.G1G1t*(X.U1'*eta.U1)*X.G1G1t); BU1 = lyap(SSU1, -ASU1); eta.U1 = eta.U1 - X.U1*(BU1/X.G1G1t); SSU2 = X.G2G2t; ASU2 = 2*symm(X.G2G2t*(X.U2'*eta.U2)*X.G2G2t); BU2 = lyap(SSU2, -ASU2); eta.U2 = eta.U2 - X.U2*(BU2/X.G2G2t); SSU3 = X.G3G3t; ASU3 = 2*symm(X.G3G3t*(X.U3'*eta.U3)*X.G3G3t); BU3 = lyap(SSU3, -ASU3); eta.U3 = eta.U3 - X.U3*(BU3/X.G3G3t); eta_G1 = reshape(eta.G, r1, r2*r3); eta_G2 = reshape(permute(eta.G, [2 1 3]), r2, r1*r3); eta_G3 = reshape(permute(eta.G, [3 1 2]), r3, r1*r2); % Project onto the horizontal space. PU1 = skew((X.U1'*eta.U1)*X.G1G1t) + skew(X.G1*eta_G1'); PU2 = skew((X.U2'*eta.U2)*X.G2G2t) + skew(X.G2*eta_G2'); PU3 = skew((X.U3'*eta.U3)*X.G3G3t) + skew(X.G3*eta_G3'); % Calculate Omega1, Omega2, Omega3 that are required in finding the % horizontal component. % We use the Matlab's pcg function to solve the system efficiently. % We exploit the structure by designing a good preconditioner as well. % The preconditioner takes the block positive definite part of the % linear system. % Options for PCG tol_omegax_pcg = 1e-6; % BM: standard tolerance as suggested in PCG. max_iterations_pcg = 15;% BM: fix this to 15 for simulations. In practice, it requires 7 to 10 iteraions. % Preconditioner for PCG M1 = kron(speyer1,SSU1) + kron(SSU1, speyer1); M2 = kron(speyer2,SSU2) + kron(SSU2, speyer2); M3 = kron(speyer3,SSU3) + kron(SSU3, speyer3); Mprecon_pcg = sparse(zeros(r1^2 + r2^2 + r3^2)); Mprecon_pcg(1 : r1^2, 1 : r1^2 ) = M1; Mprecon_pcg(1 + r1^2 : r1^2 + r2^2, 1 + r1^2 : r1^2 + r2^2) = M2; Mprecon_pcg(1 + r1^2 + r2^2 : end, 1 + r1^2 + r2^2 : end) = M3; % Call PCG [Omegaxsol, unused] = pcg(@compute_residual, [PU1(:); PU2(:); PU3(:)], tol_omegax_pcg, max_iterations_pcg, Mprecon_pcg); Omega1 = reshape(Omegaxsol(1:r1^2), r1, r1); Omega2 = reshape(Omegaxsol(1 + r1^2 : r1^2 + r2^2), r2, r2); Omega3 = reshape(Omegaxsol(1 + r1^2 + r2^2 : end), r3, r3); function AOmegax = compute_residual(Omegax) Omegax1 = reshape(Omegax(1:r1^2), r1, r1); Omegax2 = reshape(Omegax(1 + r1^2 : r1^2 + r2^2), r2, r2); Omegax3 = reshape(Omegax(1 + r1^2 + r2^2 : end), r3, r3); OffsetU1 = X.G1*((kron(speyer3,Omegax2) + kron(Omegax3, speyer2))*X.G1'); OffsetU2 = X.G2*((kron(speyer3,Omegax1) + kron(Omegax3, speyer1))*X.G2'); OffsetU3 = X.G3*((kron(speyer2,Omegax1) + kron(Omegax2, speyer1))*X.G3'); residual1 = Omegax1*SSU1 + SSU1*Omegax1 - OffsetU1; residual2 = Omegax2*SSU2 + SSU2*Omegax2 - OffsetU2; residual3 = Omegax3*SSU3 + SSU3*Omegax3 - OffsetU3; AOmegax = [residual1(:); residual2(:); residual3(:)]; end % Calculate projection along U1, U2, and U3 etaproj.U1 = eta.U1 - (X.U1*Omega1); etaproj.U2 = eta.U2 - (X.U2*Omega2); etaproj.U3 = eta.U3 - (X.U3*Omega3); % Calculate projection algong G GOmega1 = reshape(Omega1*X.G1, r1, r2, r3); GOmega2 = permute(reshape(Omega2*X.G2, r2, r1, r3), [2 1 3]); GOmega3 = permute(reshape(Omega3*X.G3, r3, r1, r2), [2 3 1]); etaproj.G = eta.G -(-(GOmega1+GOmega2+GOmega3)); end M.tangent = M.proj; M.tangent2ambient = @(X, eta) eta; M.retr = @retraction; function Y = retraction(X, eta, t) if nargin < 3 t = 1.0; end Y.G = (X.G + t*eta.G); Y.U1 = uf((X.U1 + t*eta.U1)); % U factor of Polar factorization Y.U2 = uf((X.U2 + t*eta.U2)); Y.U3 = uf((X.U3 + t*eta.U3)); Y = prepare(Y); end M.exp = @exponential; function Y = exponential(X, eta, t) if nargin < 3 t = 1.0; end Y = retraction(X, eta, t); warning('manopt:fixedrankfactory_tucker_preconditioned:exp', ... ['Exponential for fixed rank ' ... 'Tucker manifold not implemented yet. Used retraction instead.']); end M.hash = @(X) ['z' hashmd5([sum(X.U1(:)) ; sum(X.U2(:)); sum(X.U3(:)); sum(X.G(:)) ])]; % Efficient, suggested by Bart Vandereycken. % M.hash = @(X) ['z' hashmd5([X.U1(:); X.U2(:); X.U3(:); X.G(:)])]; M.rand = @random; function X = random() % % Random generator on the total space % % Factors U1, U2, and U3 are on Stiefel manifolds, hence we reuse % % their random generator. % stiefell = stiefelfactory(n1, r1); % stiefelm = stiefelfactory(n2, r2); % stiefeln = stiefelfactory(n3, r3); % % X.U1 = stiefell.rand(); % X.U2 = stiefelm.rand(); % X.U3 = stiefeln.rand(); % % % Random initialization: generalization of randn(r1, r1 = r2) in the % % matrix case. % X.G = randn(r1,r2,r3); % Random generator on the fixed-rank space from a uniform distribution on [0, 1]. [U1, R1] = qr(rand(n1, r1), 0); [U2, R2] = qr(rand(n2, r2), 0); [U3, R3] = qr(rand(n3, r3), 0); C = rand(r1, r2, r3); C1 = reshape(C, r1, r2*r3); CR1 = reshape(R1*C1, r1, r2, r3); % Multplication by R1 C2 = reshape(permute(CR1, [2 1 3]), r2, r1*r3); CR1R2 = permute(reshape(R2*C2, r2, r1, r3), [2 1 3]); % Multplication by R2 C3 = reshape(permute(CR1R2, [3 1 2]), r3, r1*r2); CR1R2R3 = permute(reshape(R3*C3, r3, r1, r2), [2 3 1]); % Multplication by R3 X.U1 = U1; X.U2 = U2; X.U3 = U3; X.G = CR1R2R3; % Compute some terms that are used subsequently. X = prepare(X); end M.randvec = @randomvec; function eta = randomvec(X) % A random vector on the horizontal space eta.U1 = randn(n1, r1); eta.U2 = randn(n2, r2); eta.U3 = randn(n3, r3); eta.G = randn(r1, r2, r3); eta = projection(X, eta); nrm = M.norm(X, eta); eta.U1 = eta.U1 / nrm; eta.U2 = eta.U2 / nrm; eta.U3 = eta.U3 / nrm; eta.G = eta.G / nrm; end M.lincomb = @lincomb; M.zerovec = @(X) struct('U1', zeros(n1, r1), 'U2', zeros(n2, r2), ... 'U3', zeros(n3, r3), 'G', zeros(r1, r2, r3)); M.transp = @(x1, x2, d) projection(x2, d); % vec and mat are not isometries, because of the scaled metric. M.vec = @(X, U1) [U1.U1(:); U1.U2(:); U1.U3(:); U1.G(:)]; M.mat = @(X, u) struct ... ('U1', reshape(u(1 : n1*r1), n1, r1), ... 'U2', reshape(u(n1*r1 + 1 : n1*r1 + n2*r2), n2, r2), ... 'U3', reshape(u(n1*r1 + n2*r2 + 1 : n1*r1 + n2*r2 + n3*r3), n3, r3), ... 'G', reshape(u(n1*r1 + n2*r2 + n3*r3 + 1 : end), r1, r2, r3)); M.vecmatareisometries = @() false; end % Linear combination of tangent vectors function d = lincomb(X, a1, d1, a2, d2) %#ok if nargin == 3 d.U1 = a1*d1.U1; d.U2 = a1*d1.U2; d.U3 = a1*d1.U3; d.G = a1*d1.G; elseif nargin == 5 d.U1 = a1*d1.U1 + a2*d2.U1; d.U2 = a1*d1.U2 + a2*d2.U2; d.U3 = a1*d1.U3 + a2*d2.U3; d.G = a1*d1.G + a2*d2.G; else error('Bad use of fixedrankfactory_tucker_preconditioned.lincomb.'); end end function U = uf(A) % U factor of Polar factorization of a tall matrix A. [L, unused, R] = svd(A, 0); %#ok U = L*R'; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/fixedranktensors/tucker2multiarray.m ================================================ function Xtensor = tucker2multiarray(X) % Converts a 3d Tucker form tensor to a multiarray. % % function Xtensor = tucker2multiarray(X) % % X has fields U1, U2, U3, and G. % % The matrices U1 (n1-by-r1), U2 (n2-by-r2) and U3 (n3-by-r3) are % orthogonal matrices. % G (r1-by-r2-by-r3) is a multidimensional array. % % See also: fixedrankfactory_tucker_preconditioned % This file is part of Manopt: www.manopt.org. % Original authors: Hiroyuki Kasai and Bamdev Mishra, June 05, 2015. % Contributors: % Change log: U1 = X.U1; U2 = X.U2; U3 = X.U3; G = X.G; % Tensor size n1 = size(U1, 1); n2 = size(U2, 1); n3 = size(U3, 1); % Core size [r1, r2, r3] = size(G); % Multplication by U1 G1 = reshape(G, r1, r2*r3); GU1 = reshape(U1*G1, n1, r2, r3); % Further multplication by U2 G2 = reshape(permute(GU1, [2 1 3]), r2, n1*r3); GU1U2 = permute(reshape(U2*G2, n2, n1, r3), [2 1 3]); % Further multplication by U3 G3 = reshape(permute(GU1U2, [3 1 2]), r3, n1*n2); GU1U2U3 = permute(reshape(U3*G3, n3, n1, n2), [2 3 1]); Xtensor = GU1U2U3;% Full tensor end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/grassmann/grassmanncomplexfactory.m ================================================ function M = grassmanncomplexfactory(n, p, k) % Returns a manifold struct to optimize over the set of subspaces in C^n. % % function M = grassmanncomplexfactory(n, p) % function M = grassmanncomplexfactory(n, p, k) % % Complex Grassmann manifold: each point on this manifold is a collection % of k vector subspaces of dimension p embedded in C^n. % % The metric is obtained by making the Grassmannian a Riemannian quotient % manifold of the complex Stiefel manifold, i.e., the manifold of % orthonormal matrices, itself endowed with a metric by making it a % Riemannian submanifold of the Euclidean space, endowed with the usual % real-trace inner product, that is, it is the usual metric for the complex % plane identified with R^2. % % This structure deals with complex matrices X of size n x p x k % (or n x p if k = 1, which is the default) such that each n x p matrix is % orthonormal, i.e., X'*X = eye(p) if k = 1, or X(:, :, i)' * X(:, :, i) = % eye(p) for i = 1 : k if k > 1. Each n x p matrix is a numerical % representation of the vector subspace its columns span. % % By default, k = 1. % % See also: grassmannfactory, stiefelcomplexfactory, grassmanngeneralizedfactory % This file is part of Manopt: www.manopt.org. % Original author: Hiroyuki Sato, May 21, 2015. % Contributors: % Change log: assert(n >= p, ... ['The dimension n of the ambient space must be larger ' ... 'than the dimension p of the subspaces.']); if ~exist('k', 'var') || isempty(k) k = 1; end if k == 1 M.name = @() sprintf('Complex Grassmann manifold Gr(%d, %d)', n, p); elseif k > 1 M.name = @() sprintf(['Multi complex Grassmann manifold ' ... 'Gr(%d, %d)^%d'], n, p, k); else error('k must be an integer no less than 1.'); end M.dim = @() 2*k*p*(n-p); %! k*p*(n-p) -> 2*k*p*(n-p) M.inner = @(x, d1, d2) real(d1(:)'*d2(:)); %! trace -> real-trace M.norm = @(x, d) norm(d(:)); M.dist = @distance; function d = distance(x, y) principal_angles = zeros(p, k); XHY = multiprod(multihconj(x), y); %! XtY -> XHY, multitransp -> multihconj for i = 1 : k cos_princ_angle = svd(XHY(:, :, i)); principal_angles(:, i) = acos(cos_princ_angle); end d = norm(real(principal_angles), 'fro'); end M.typicaldist = @() sqrt(p*k); % Orthogonal projection of an ambient vector U to the horizontal space % at X. M.proj = @projection; function Up = projection(X, U) XHU = multiprod(multihconj(X), U); %! XtU -> XHU, multitransp -> multihconj Up = U - multiprod(X, XHU); %! XtU -> XHU end M.tangent = M.proj; M.egrad2rgrad = M.proj; M.ehess2rhess = @ehess2rhess; function rhess = ehess2rhess(X, egrad, ehess, H) PXehess = projection(X, ehess); XHG = multiprod(multihconj(X), egrad); %! XtG -> XHG, multitransp -> multihconj HXHG = multiprod(H, XHG); %! HXtG -> HXHG, XtG -> XHG rhess = PXehess - HXHG; %! HXtG -> HXHG end M.retr = @retraction; function Y = retraction(X, U, t) if nargin < 3 t = 1.0; end Y = X + t*U; for i = 1 : k % Compute the polar factorization of Y = X+tU [u, s, v] = svd(Y(:, :, i), 'econ'); %#ok Y(:, :, i) = u*v'; % Another popular retraction uses QR instead of SVD. % As compared with the Stiefel factory, we do not need to % worry about flipping signs of columns here, since only % the column space is important, not the actual columns. % [Q, unused] = qr(Y(:, :, i), 0); %#ok % Y(:, :, i) = Q; end end M.exp = @exponential; function Y = exponential(X, U, t) if nargin == 3 tU = t*U; else tU = U; end Y = zeros(size(X)); for i = 1 : k [u, s, v] = svd(tU(:, :, i), 0); cos_s = diag(cos(diag(s))); sin_s = diag(sin(diag(s))); Y(:, :, i) = X(:, :, i)*v*cos_s*v' + u*sin_s*v'; % From numerical experiments, it seems necessary to % re-orthonormalize. This is overall quite expensive. [q, unused] = qr(Y(:, :, i), 0); %#ok Y(:, :, i) = q; end end % Test code for the logarithm: % Gr = grassmanncomplexfactory(5, 2, 3); % x = Gr.rand() % y = Gr.rand() % u = Gr.log(x, y) % Gr.dist(x, y) % These two numbers should % Gr.norm(x, u) % be the same. % z = Gr.exp(x, u) % z needs not be the same matrix as y, but it should % v = Gr.log(x, z) % be the same point as y on Grassmann: dist almost 0. M.log = @logarithm; function U = logarithm(X, Y) U = zeros(n, p, k); for i = 1 : k x = X(:, :, i); y = Y(:, :, i); yHx = y'*x; %! ytx -> yHx, y.' -> y' AH = y'-yHx*x'; %! At -> AH, x.' -> x', y.' -> y' BH = yHx\AH; %! Bt -> BH, ytx -> yHx, At -> AH [u, s, v] = svd(BH', 'econ'); %! Bt.' -> BH' u = u(:, 1:p); s = diag(s); s = s(1:p); v = v(:, 1:p); U(:, :, i) = u*diag(atan(s))*v'; %! v.' -> v' end end M.hash = @(X) ['z' hashmd5([real(X(:)); imag(X(:))])]; %! X(:) -> [real(X(:)); imag(X(:))] M.rand = @random; function X = random() X = zeros(n, p, k); for j = 1 : k [Q, unused] = qr(randn(n, p) + 1i*randn(n, p), 0); %#ok %! Complex version X(:, :, j) = Q; end end M.randvec = @randomvec; function U = randomvec(X) U = projection(X, randn(n, p, k) + 1i*randn(n, p, k)); %! Complex version U = U / norm(U(:)); end M.lincomb = @matrixlincomb; M.zerovec = @(x) zeros(n, p, k); % This transport is compatible with the polar retraction. M.transp = @(x1, x2, d) projection(x2, d); M.vec = @(x, u_mat) [real(u_mat(:)) ; imag(u_mat(:))]; M.mat = @(x, u_vec) reshape(u_vec(1:(n*p*k)) + 1i*u_vec((n*p*k+1):end), [n, p, k]); M.vecmatareisometries = @() true; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/grassmann/grassmannfactory.m ================================================ function M = grassmannfactory(n, p, k) % Returns a manifold struct to optimize over the space of vector subspaces. % % function M = grassmannfactory(n, p) % function M = grassmannfactory(n, p, k) % % Grassmann manifold: each point on this manifold is a collection of k % vector subspaces of dimension p embedded in R^n. % % The metric is obtained by making the Grassmannian a Riemannian quotient % manifold of the Stiefel manifold, i.e., the manifold of orthonormal % matrices, itself endowed with a metric by making it a Riemannian % submanifold of the Euclidean space, endowed with the usual inner product. % In short: it is the usual metric used in most cases. % % This structure deals with matrices X of size n x p x k (or n x p if % k = 1, which is the default) such that each n x p matrix is orthonormal, % i.e., X'*X = eye(p) if k = 1, or X(:, :, i)' * X(:, :, i) = eye(p) for % i = 1 : k if k > 1. Each n x p matrix is a numerical representation of % the vector subspace its columns span. % % By default, k = 1. % % See also: stiefelfactory grassmanncomplexfactory grassmanngeneralizedfactory % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % March 22, 2013 (NB) : % Implemented geodesic distance. % % April 17, 2013 (NB) : % Retraction changed to the polar decomposition, so that the vector % transport is now correct, in the sense that it is compatible with % the retraction, i.e., transporting a tangent vector G from U to V % where V = Retr(U, H) will give Z, and transporting GQ from UQ to VQ % will give ZQ: there is no dependence on the representation, which % is as it should be. Notice that the polar factorization requires an % SVD whereas the qfactor retraction requires a QR decomposition, % which is cheaper. Hence, if the retraction happens to be a % bottleneck in your application and you are not using vector % transports, you may want to replace the retraction with a qfactor. % % July 4, 2013 (NB) : % Added support for the logarithmic map 'log'. % % July 5, 2013 (NB) : % Added support for ehess2rhess. % % June 24, 2014 (NB) : % Small bug fix in the retraction, and added final % re-orthonormalization at the end of the exponential map. This % follows discussions on the forum where it appeared there is a % significant loss in orthonormality without that extra step. Also % changed the randvec function so that it now returns a globally % normalized vector, not a vector where each component is normalized % (this only matters if k>1). assert(n >= p, ... ['The dimension n of the ambient space must be larger ' ... 'than the dimension p of the subspaces.']); if ~exist('k', 'var') || isempty(k) k = 1; end if k == 1 M.name = @() sprintf('Grassmann manifold Gr(%d, %d)', n, p); elseif k > 1 M.name = @() sprintf('Multi Grassmann manifold Gr(%d, %d)^%d', ... n, p, k); else error('k must be an integer no less than 1.'); end M.dim = @() k*p*(n-p); M.inner = @(x, d1, d2) d1(:).'*d2(:); M.norm = @(x, d) norm(d(:)); M.dist = @distance; function d = distance(x, y) square_d = 0; XtY = multiprod(multitransp(x), y); for i = 1 : k cos_princ_angle = svd(XtY(:, :, i)); square_d = square_d + sum(real(acos(cos_princ_angle)).^2); end d = sqrt(square_d); end M.typicaldist = @() sqrt(p*k); % Orthogonal projection of an ambient vector U to the horizontal space % at X. M.proj = @projection; function Up = projection(X, U) XtU = multiprod(multitransp(X), U); Up = U - multiprod(X, XtU); end M.tangent = M.proj; M.egrad2rgrad = M.proj; M.ehess2rhess = @ehess2rhess; function rhess = ehess2rhess(X, egrad, ehess, H) PXehess = projection(X, ehess); XtG = multiprod(multitransp(X), egrad); HXtG = multiprod(H, XtG); rhess = PXehess - HXtG; end M.retr = @retraction; function Y = retraction(X, U, t) if nargin < 3 t = 1.0; end Y = X + t*U; for i = 1 : k % Compute the polar factorization of Y = X+tU [u, s, v] = svd(Y(:, :, i), 'econ'); %#ok Y(:, :, i) = u*v'; % Another popular retraction uses QR instead of SVD. % As compared with the Stiefel factory, we do not need to % worry about flipping signs of columns here, since only % the column space is important, not the actual columns. % [Q, unused] = qr(Y(:, :, i), 0); %#ok % Y(:, :, i) = Q; end end M.exp = @exponential; function Y = exponential(X, U, t) if nargin == 3 tU = t*U; else tU = U; end Y = zeros(size(X)); for i = 1 : k [u, s, v] = svd(tU(:, :, i), 0); cos_s = diag(cos(diag(s))); sin_s = diag(sin(diag(s))); Y(:, :, i) = X(:, :, i)*v*cos_s*v' + u*sin_s*v'; % From numerical experiments, it seems necessary to % re-orthonormalize. This is overall quite expensive. [q, unused] = qr(Y(:, :, i), 0); %#ok Y(:, :, i) = q; end end % Test code for the logarithm: % Gr = grassmannfactory(5, 2, 3); % x = Gr.rand() % y = Gr.rand() % u = Gr.log(x, y) % Gr.dist(x, y) % These two numbers should % Gr.norm(x, u) % be the same. % z = Gr.exp(x, u) % z needs not be the same matrix as y, but it should % v = Gr.log(x, z) % be the same point as y on Grassmann: dist almost 0. M.log = @logarithm; function U = logarithm(X, Y) U = zeros(n, p, k); for i = 1 : k x = X(:, :, i); y = Y(:, :, i); ytx = y.'*x; At = y.'-ytx*x.'; Bt = ytx\At; [u, s, v] = svd(Bt.', 'econ'); u = u(:, 1:p); s = diag(s); s = s(1:p); v = v(:, 1:p); U(:, :, i) = u*diag(atan(s))*v.'; end end M.hash = @(X) ['z' hashmd5(X(:))]; M.rand = @random; function X = random() X = zeros(n, p, k); for i = 1 : k [Q, unused] = qr(randn(n, p), 0); %#ok X(:, :, i) = Q; end end M.randvec = @randomvec; function U = randomvec(X) U = projection(X, randn(n, p, k)); U = U / norm(U(:)); end M.lincomb = @matrixlincomb; M.zerovec = @(x) zeros(n, p, k); % This transport is compatible with the polar retraction. M.transp = @(x1, x2, d) projection(x2, d); M.vec = @(x, u_mat) u_mat(:); M.mat = @(x, u_vec) reshape(u_vec, [n, p, k]); M.vecmatareisometries = @() true; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/grassmann/grassmanngeneralizedfactory.m ================================================ function M = grassmanngeneralizedfactory(n, p, B) % Returns a manifold struct of "scaled" vector subspaces. % % function M = grassmanngeneralizedfactory(n, p) % function M = grassmanngeneralizedfactory(n, p, B) % % Generalized Grassmann manifold: each point on this manifold is a % collection of "scaled" vector subspaces of dimension p embedded in R^n. % The scaling is due to the symmetric positive definite matrix B. % % When B is identity, the manifold is the standard Grassmann manifold. % % The metric is obtained by viewing the generalized Grassmannian % a Riemannian quotient manifold of the generalized Stiefel manifold, % which is the manifold of "scaled" orthonormal matrices. Specifically, % the scaled Stiefel manifold is the set {X : X'*B*X = I}. % The generalized Grassmann manifold is the Grassmannian of the % generalized Stiefel manifold. % % The generalized Stiefel manifold is endowed with a scaled metric % by viewing it as a Riemannian submanifold of the Euclidean space, which % is again endowed with the scaled inner product. % % Some notions (not all) are from Section 4.5 of the paper % "The geometry of algorithms with orthogonality constraints", % A. Edelman, T. A. Arias, S. T. Smith, SIMAX, 1998. % % Paper link: http://arxiv.org/abs/physics/9806030. % % % Note: some computations such as restricted_svd, distance, logarithm, and % exponential are new and we believe them to be correct. % Also, we hope that the computations are numerically stable. % In case some things do not work out as expected or there is some trouble, % please contact us at http://www.manopt.org. % % Note: egrad2rgrad and ehess2rhess involve solving linear systems in B. If % this is a bottleneck for a specific application, then a way forward is to % create a modified version of this file which preprocesses B to speed this % up (typically, by computing a Cholesky factorization of it, then calling % an appropriate solver). % % See also: stiefelgeneralizedfactory stiefelfactory grassmannfactory % This file is part of Manopt: www.manopt.org. % Original author: Bamdev Mishra, June 30, 2015. % Contributors: % % Change log: % assert(n >= p, ... ['The dimension n of the ambient space must be larger ' ... 'than the dimension p of the subspaces.']); if ~exist('B', 'var') || isempty(B) B = speye(n); % Standard Grassmann manifold. end M.name = @() sprintf('Generalized Grassmann manifold Gr(%d, %d)', n, p); M.dim = @() p*(n - p); M.inner = @(X, eta, zeta) trace(eta'*(B*zeta)); % Scaled metric, but horizontally invariant. M.norm = @(X, eta) sqrt(M.inner(X, eta, eta)); M.dist = @distance; function d = distance(X, Y) XtBY = X'*(B*Y); % XtY ---> XtBY cos_princ_angle = svd(XtBY); % svd(XtY) ---> svd(XtBY) % Two next instructions not necessary: the imaginary parts that % would appear if the cosines are not between -1 and 1, when % passed to the acos function, would be very small, and would % thus vanish when the norm is taken. % cos_princ_angle = min(cos_princ_angle, 1); % cos_princ_angle = max(cos_princ_angle, -1); square_d = norm(acos(cos_princ_angle))^2; d = sqrt(square_d); end M.typicaldist = @() sqrt(p); % Orthogonal projection of an ambient vector U onto the % horizontal space at X. M.proj = @projection; function Up = projection(X, U) BX = B*X; % Projection onto the tangent space % U = U - X*symm(BX'*U); % Projection onto the horizontal space % Up = U - X*skew(BX'*U); Up = U - X*(BX'*U); end M.tangent = M.proj; M.egrad2rgrad = @egrad2rgrad; function rgrad = egrad2rgrad(X, egrad) % First, scale egrad according to the scaled metric in the % Euclidean space. egrad_scaled = B\egrad; % Second, project onto the tangent space. % No need to project onto the horizontal space as % by the Riemannian submersion theory, this quantity automatically % belongs to the horizontal space. % % % rgrad = egrad_scaled - X*symm((B*X)'*egrad_scaled); % % Verify that symm(BX'*egrad_scaled) = symm(X'*egrad). rgrad = egrad_scaled - X*symm(X'*egrad); end M.ehess2rhess = @ehess2rhess; function rhess = ehess2rhess(X, egrad, ehess, H) egraddot = ehess; Xdot = H; % Directional derivative of the Riemannian gradient. egrad_scaleddot = B\egraddot; rgraddot = egrad_scaleddot - Xdot*symm(X'*egrad)... - X*symm(Xdot'*egrad)... - X*symm(X'*egraddot); % Project onto the horizontal space. rhess = M.proj(X, rgraddot); end M.retr = @retraction; function Y = retraction(X, U, t) if nargin < 3 t = 1.0; end Y = guf(X + t*U); % Ensure that Y'*B*Y is identity. end M.exp = @exponential; function Y = exponential(X, U, t) if nargin == 3 tU = t*U; else tU = U; end % restricted_svd is defined later in the file. [u, s, v] = restricted_svd(tU);% svd(tU, 0) ---> restricted_svd(tU). cos_s = diag(cos(diag(s))); sin_s = diag(sin(diag(s))); Y = X*v*cos_s*v' + u*sin_s*v';% Verify that Y'*B*Y is identity % From numerical experiments, it seems necessary to % re-orthonormalize. Y = guf(Y);% Ensure that Y'*B*Y is identity. end % Test code for the logarithm: % gGr = grassmanngeneralizedfactory(5, 2, diag(rand(5,1))); % x = gGr.rand() % y = gGr.rand() % u = gGr.log(x, y) % gGr.dist(x, y) % These two numbers should % gGr.norm(x, u) % be the same. % z = gGr.exp(x, u) % z needs not be the same matrix as y, but it should % v = gGr.log(x, z) % be the same point as y on Grassmann: dist almost 0. % gGr.dist(z, y) M.log = @logarithm; function U = logarithm(X, Y) YtBX = Y'*(B*X); % YtX ---> YtBX. At = (Y' - YtBX*X'); Bt = YtBX\At; [u, s, v] = restricted_svd(Bt');% svd(Bt', 'econ') ---> restricted_svd(Bt'). u = u(:, 1:p); s = diag(s); s = s(1:p); v = v(:, 1:p); U = u*diag(atan(s))*v'; % A horizontal vector, i.e., U'*(B*X) is zero. end M.hash = @(X) ['z' hashmd5(X(:))]; M.rand = @random; function X = random() X = guf(randn(n, p)); % Ensure that X'*B*X is identity; end M.randvec = @randomvec; function U = randomvec(X) U = projection(X, randn(n, p)); U = U / norm(U(:)); end M.lincomb = @matrixlincomb; M.zerovec = @(X) zeros(n, p); % This transport is compatible with the generalized polar retraction. M.transp = @(X1, X2, d) projection(X2, d); M.vec = @(X, u_mat) u_mat(:); M.mat = @(X, u_vec) reshape(u_vec, [n, p]); M.vecmatareisometries = @() false; % Some auxiliary functions symm = @(D) (D + D')/2; function X = guf(Y) % Generalized polar decomposition of an n-by-p matrix Y. % X'*B*X is identity. % Method 1 [u, ~, v] = svd(Y, 0); % Instead of the following three steps, an equivalent, but an % expensive, way is to do X = u*(sqrtm(u'*(B*u))\(v')). [q, ssquare] = eig(u'*(B*u)); qsinv = q/sparse(diag(sqrt(diag(ssquare)))); X = u*((qsinv*q')*v'); % X'*B*X is identity. % Another computation using restricted_svd % [u, ~, v] = restricted_svd(Y); % X = u*v'; % X'*B*X is identity. end function [u, s, v] = restricted_svd(Y) % We compute a thin svd-like decomposition of an n-by-p matrix Y % into matrices u, s, and v such that u is an n-by-p matrix % with u'*B*u being identity, s is a p-by-p diagonal matrix % with positive entries, and v is a p-by-p orthogonal matrix. % Y = u*s*v'. [v, ssquare] = eig(symm(Y'*(B*Y))); % Y*B*Y is positive definite ssquarevec = diag(ssquare); s = sparse(diag(abs(sqrt(ssquarevec)))); u = Y*(v/s); % u'*B*u is identity. end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/multinomial/multinomialfactory.m ================================================ function M = multinomialfactory(n, m) % Manifold of n-by-m column-stochastic matrices with positive entries. % % function M = multinomialfactory(n) % function M = multinomialfactory(n, m) % % The returned structure M is a Manopt manifold structure to optimize over % the set of n-by-m matrices with (strictly) positive entries and such that % the entries of each column sum to one. By default, m = 1. % % The metric imposed on the manifold is the Fisher metric such that % the set of n-by-m column-stochastic matrices (aka the multinomial manifold) % is a Riemannian submanifold of the space of n-by-m matrices. Also it % should be noted that the retraction operation that we define % is first order and as such the checkhessian tool cannot verify % the slope correctly. % % The file is based on developments in the research paper % Y. Sun, J. Gao, X. Hong, B. Mishra, and B. Yin, % "Heterogeneous tensor decomposition for clustering via manifold % optimization", arXiv:1504.01777, 2015. % % Link to the paper: http://arxiv.org/abs/1504.01777. % % Please cite the Manopt paper as well as the research paper: % @Article{sun2015multinomial, % author = {Y. Sun and J. Gao and X. Hong and B. Mishra and B. Yin}, % title = {Heterogeneous Tensor Decomposition for Clustering via Manifold Optimization}, % journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, % year = {2016}, % volume = {38}, % number = {3}, % pages = {476--489}, % doi = {10.1109/TPAMI.2015.2465901} % } % This file is part of Manopt: www.manopt.org. % Original author: Bamdev Mishra, April 06, 2015. % Contributors: % Change log: if ~exist('m', 'var') || isempty(m) m = 1; end M.name = @() sprintf('%dx%d column-stochastic matrices with positive entries', n, m); M.dim = @() (n-1)*m; % We impose the Fisher metric. M.inner = @iproduct; function ip = iproduct(X, eta, zeta) ip = sum((eta(:).*zeta(:))./X(:)); end M.norm = @(X, eta) sqrt(M.inner(X, eta, eta)); M.dist = @(X, Y) error('multinomialfactory.dist not implemented yet.'); M.typicaldist = @() m*pi/2; % This is an approximation. % Column vector of ones of length n. e = ones(n, 1); M.egrad2rgrad = @egrad2rgrad; function rgrad = egrad2rgrad(X, egrad) lambda = -sum(X.*egrad, 1); % Row vector of length m. rgrad = X.*egrad + (e*lambda).*X; % This is in the tangent space. end M.ehess2rhess = @ehess2rhess; function rhess = ehess2rhess(X, egrad, ehess, eta) % Riemannian gradient computation. % lambda is a row vector of length m. lambda = - sum(X.*egrad, 1); rgrad = X.*egrad + (e*lambda).*X; % Directional derivative of the Riemannian gradient. % lambdadot is a row vector of length m. lambdadot = -sum(eta.*egrad, 1) - sum(X.*ehess, 1); rgraddot = eta.*egrad + X.*ehess + (e*lambdadot).*X + (e*lambda).*eta; % Correction term because of the non-constant metric that we % impose. The computation of the correction term follows the use of % Koszul formula. correction_term = - 0.5*(eta.*rgrad)./X; rhess = rgraddot + correction_term; % Finally, projection onto the tangent space. rhess = M.proj(X, rhess); end % Projection of the vector eta in the ambeint space onto the tangent % space. M.proj = @projection; function etaproj = projection(X, eta) alpha = sum(eta, 1); % Row vector of length m. etaproj = eta - (e*alpha).*X; end M.tangent = M.proj; M.tangent2ambient = @(X, eta) eta; M.retr = @retraction; function Y = retraction(X, eta, t) if nargin < 3 t = 1.0; end % A first-order retraction. Y = X.*exp(t*(eta./X)); % Based on mapping for positive scalars. Y = Y./(e*(sum(Y, 1))); % Projection onto the constraint set. % For numerical reasons, so that we avoid entries going to zero: Y = max(Y, eps); end M.exp = @exponential; function Y = exponential(X, eta, t) if nargin < 3 t = 1.0; end Y = retraction(X, eta, t); warning('manopt:multinomialfactory:exp', ... ['Exponential for the Multinomial manifold' ... 'manifold not implemented yet. Used retraction instead.']); end M.hash = @(X) ['z' hashmd5(X(:))]; M.rand = @random; function X = random() % A random point in the ambient space. X = rand(n, m); % X = X./(e*(sum(X, 1))); end M.randvec = @randomvec; function eta = randomvec(X) % A random vector in the tangent space eta = randn(n, m); eta = M.proj(X, eta); % Projection onto the tangent space. nrm = M.norm(X, eta); eta = eta / nrm; end M.lincomb = @matrixlincomb; M.zerovec = @(X) zeros(n, m); M.transp = @(X1, X2, d) projection(X2, d); % vec and mat are not isometries, because of the scaled metric. M.vec = @(X, U) U(:); M.mat = @(X, u) reshape(u, n, m); M.vecmatareisometries = @() false; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/oblique/obliquecomplexfactory.m ================================================ function M = obliquecomplexfactory(n, m, transposed) % Returns a manifold struct defining complex matrices w/ unit-norm columns. % % function M = obliquecomplexfactory(n, m) % function M = obliquecomplexfactory(n, m, transposed) % % Oblique manifold: deals with complex matrices of size n x m such that % each column has unit 2-norm, i.e., is a point on the unit sphere in C^n. % The geometry is a product geometry of m unit spheres in C^n. For the % metric, C^n is treated as R^(2n), so that the real part and imaginary % parts are treated separately as 2n real coordinates. As such, the complex % oblique manifold is a Riemannian submanifold of (R^2)^(n x m), with the % usual metric = real(u'*v). % % If transposed is set to true (it is false by default), then the matrices % are transposed: a point Y on the manifold is a matrix of size m x n and % each row has unit 2-norm. It is the same geometry, just a different % representation. % % In transposed form, a point Y is such that Y*Y' is a Hermitian, positive % semidefinite matrix of size m and of rank at most n, such that all the % diagonal entries are equal to 1. % % Note: obliquecomplexfactory(1, n, true) is equivalent to (but potentially % slower than) complexcirclefactory(n). % % See also: spherecomplexfactory complexcirclefactory obliquefactory % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Sep. 3, 2014. % Contributors: % Change log: % % Oct. 21, 2016 (NB) % Formatted for inclusion in Manopt release. % % July 20, 2017 (NB) % Distance function is now accurate for close-by points. See notes % inside the spherefactory file for details. Also improvies distances % computation as part of the log function. if ~exist('transposed', 'var') || isempty(transposed) transposed = false; end if transposed trnsp = @(X) X.'; else trnsp = @(X) X; end M.name = @() sprintf('Complex oblique manifold COB(%d, %d)', n, m); M.dim = @() (2*n-1)*m; M.inner = @(x, d1, d2) real(d1(:)'*d2(:)); M.norm = @(x, d) norm(d(:)); M.dist = @(x, y) norm(real(2*asin(.5*sqrt(sum(trnsp(abs(x - y).^2), 1))))); M.typicaldist = @() pi*sqrt(m); M.proj = @(X, U) trnsp(projection(trnsp(X), trnsp(U))); M.tangent = M.proj; % For Riemannian submanifolds, converting a Euclidean gradient into a % Riemannian gradient amounts to an orthogonal projection. M.egrad2rgrad = M.proj; M.ehess2rhess = @ehess2rhess; function rhess = ehess2rhess(X, egrad, ehess, U) X = trnsp(X); egrad = trnsp(egrad); ehess = trnsp(ehess); U = trnsp(U); PXehess = projection(X, ehess); inners = sum(real(conj(X).*egrad), 1); rhess = PXehess - bsxfun(@times, U, inners); rhess = trnsp(rhess); end M.exp = @exponential; % Exponential on the complex oblique manifold function y = exponential(x, d, t) x = trnsp(x); d = trnsp(d); if nargin == 2 % t = 1; td = d; else td = t*d; end nrm_td = sqrt(sum(real(td).^2 + imag(td).^2, 1)); y = bsxfun(@times, x, cos(nrm_td)) + ... bsxfun(@times, td, sin(nrm_td) ./ nrm_td); % For those columns where the step is 0, replace y by x exclude = (nrm_td == 0); y(:, exclude) = x(:, exclude); y = trnsp(y); end M.log = @logarithm; function v = logarithm(x1, x2) x1 = trnsp(x1); x2 = trnsp(x2); v = projection(x1, x2 - x1); dists = real(2*asin(.5*sqrt(sum(trnsp(abs(x - y).^2), 1)))); norms = sqrt(sum(real(v).^2 + imag(v).^2, 1)); factors = dists./norms; % For very close points, dists is almost equal to norms, but % because they are both almost zero, the division above can return % NaN's. To avoid that, we force those ratios to 1. factors(dists <= 1e-10) = 1; v = bsxfun(@times, v, factors); v = trnsp(v); end M.retr = @retraction; % Retraction on the oblique manifold function y = retraction(x, d, t) x = trnsp(x); d = trnsp(d); if nargin < 3 td = d; else td = t*d; end y = normalize_columns(x + td); y = trnsp(y); end M.hash = @(x) ['z' hashmd5([real(x(:)) ; imag(x(:))])]; M.rand = @() trnsp(random(n, m)); M.randvec = @(x) trnsp(randomvec(n, m, trnsp(x))); M.lincomb = @matrixlincomb; M.zerovec = @(x) trnsp(zeros(n, m)); M.transp = @(x1, x2, d) M.proj(x2, d); M.pairmean = @pairmean; function y = pairmean(x1, x2) y = trnsp(x1+x2); y = normalize_columns(y); y = trnsp(y); end % vec returns a vector representation of an input tangent vector which % is represented as a matrix. mat returns the original matrix % representation of the input vector representation of a tangent % vector. vec and mat are thus inverse of each other. They are % furthermore isometries between a subspace of R^2nm and the tangent % space at x. vect = @(X) X(:); M.vec = @(x, u_mat) [vect(real(trnsp(u_mat))) ; ... vect(imag(trnsp(u_mat)))]; M.mat = @(x, u_vec) trnsp(reshape(u_vec(1:(n*m)), [n, m])) + ... 1i*trnsp(reshape(u_vec((n*m+1):end), [n, m])); M.vecmatareisometries = @() true; end % Given a matrix X, returns the same matrix but with each column scaled so % that they have unit 2-norm. function X = normalize_columns(X) norms = sqrt(sum(real(X).^2 + imag(X).^2, 1)); X = bsxfun(@times, X, 1./norms); end % Orthogonal projection of the ambient vector H onto the tangent space at X function PXH = projection(X, H) % Compute the inner product between each vector H(:, i) with its root % point X(:, i), that is, real(X(:, i)' * H(:, i)). % Returns a row vector. inners = real(sum(conj(X).*H, 1)); % Subtract from H the components of the H(:, i)'s that are parallel to % the root points X(:, i). PXH = H - bsxfun(@times, X, inners); end % Uniform random sampling on the sphere. function x = random(n, m) x = normalize_columns(randn(n, m) + 1i*randn(n, m)); end % Random normalized tangent vector at x. function d = randomvec(n, m, x) d = randn(n, m) + 1i*randn(n, m); d = projection(x, d); d = d / norm(d(:)); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/oblique/obliquefactory.m ================================================ function M = obliquefactory(n, m, transposed) % Returns a manifold struct to optimize over matrices w/ unit-norm columns. % % function M = obliquefactory(n, m) % function M = obliquefactory(n, m, transposed) % % Oblique manifold: deals with matrices of size n x m such that each column % has unit 2-norm, i.e., is a point on the unit sphere in R^n. The metric % is such that the oblique manifold is a Riemannian submanifold of the % space of nxm matrices with the usual trace inner product, i.e., the usual % metric. % % If transposed is set to true (it is false by default), then the matrices % are transposed: a point Y on the manifold is a matrix of size m x n and % each row has unit 2-norm. It is the same geometry, just a different % representation. % % See also: spherefactory obliquecomplexfactory % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % % July 16, 2013 (NB) : % Added 'transposed' option, mainly for ease of comparison with the % elliptope geometry. % % Nov. 29, 2013 (NB) : % Added normalize_columns function to make it easier to exploit the % bsxfun formulation of column normalization, which avoids using for % loops and provides performance gains. The exponential still uses a % for loop. % % April 4, 2015 (NB) : % Log function modified to avoid NaN's appearing for close by points. % % April 13, 2015 (NB) : % Exponential now without for-loops. % % Oct. 8, 2016 (NB) % Code for exponential was simplified to only treat the zero vector % as a particular case. % % Oct. 21, 2016 (NB) % Bug caught in M.log: the function called v = M.proj(x1, x2 - x1), % which internally applies transp to inputs and outputs. But since % M.log had already taken care of transposing things, this introduced % a bug (which only triggered if using M.log in transposed mode.) % The code now calls "v = projection(x1, x2 - x1);" since projection % assumes the inputs and outputs do not need to be transposed. % % July 20, 2017 (NB) % Distance function is now accurate for close-by points. See notes % inside the spherefactory file for details. Also improvies distances % computation as part of the log function. if ~exist('transposed', 'var') || isempty(transposed) transposed = false; end if transposed trnsp = @(X) X.'; else trnsp = @(X) X; end M.name = @() sprintf('Oblique manifold OB(%d, %d)', n, m); M.dim = @() (n-1)*m; M.inner = @(x, d1, d2) d1(:).'*d2(:); M.norm = @(x, d) norm(d(:)); M.dist = @(x, y) norm(real(2*asin(.5*sqrt(sum(trnsp(x - y).^2, 1))))); M.typicaldist = @() pi*sqrt(m); M.proj = @(X, U) trnsp(projection(trnsp(X), trnsp(U))); M.tangent = M.proj; % For Riemannian submanifolds, converting a Euclidean gradient into a % Riemannian gradient amounts to an orthogonal projection. M.egrad2rgrad = M.proj; M.ehess2rhess = @ehess2rhess; function rhess = ehess2rhess(X, egrad, ehess, U) X = trnsp(X); egrad = trnsp(egrad); ehess = trnsp(ehess); U = trnsp(U); PXehess = projection(X, ehess); inners = sum(X.*egrad, 1); rhess = PXehess - bsxfun(@times, U, inners); rhess = trnsp(rhess); end M.exp = @exponential; % Exponential on the oblique manifold function y = exponential(x, d, t) x = trnsp(x); d = trnsp(d); if nargin < 3 % t = 1; td = d; else td = t*d; end nrm_td = sqrt(sum(td.^2, 1)); y = bsxfun(@times, x, cos(nrm_td)) + ... bsxfun(@times, td, sin(nrm_td) ./ nrm_td); % For those columns where the step is 0, replace y by x exclude = (nrm_td == 0); y(:, exclude) = x(:, exclude); y = trnsp(y); end M.log = @logarithm; function v = logarithm(x1, x2) x1 = trnsp(x1); x2 = trnsp(x2); v = projection(x1, x2 - x1); dists = real(2*asin(.5*sqrt(sum((x1 - x2).^2, 1)))); norms = real(sqrt(sum(v.^2, 1))); factors = dists./norms; % For very close points, dists is almost equal to norms, but % because they are both almost zero, the division above can return % NaN's. To avoid that, we force those ratios to 1. factors(dists <= 1e-10) = 1; v = bsxfun(@times, v, factors); v = trnsp(v); end M.retr = @retraction; % Retraction on the oblique manifold function y = retraction(x, d, t) x = trnsp(x); d = trnsp(d); if nargin < 3 % t = 1; td = d; else td = t*d; end y = normalize_columns(x + td); y = trnsp(y); end M.hash = @(x) ['z' hashmd5(x(:))]; M.rand = @() trnsp(random(n, m)); M.randvec = @(x) trnsp(randomvec(n, m, trnsp(x))); M.lincomb = @matrixlincomb; M.zerovec = @(x) trnsp(zeros(n, m)); M.transp = @(x1, x2, d) M.proj(x2, d); M.pairmean = @pairmean; function y = pairmean(x1, x2) y = trnsp(x1+x2); y = normalize_columns(y); y = trnsp(y); end % vec returns a vector representation of an input tangent vector which % is represented as a matrix. mat returns the original matrix % representation of the input vector representation of a tangent % vector. vec and mat are thus inverse of each other. They are % furthermore isometries between a subspace of R^nm and the tangent % space at x. vect = @(X) X(:); M.vec = @(x, u_mat) vect(trnsp(u_mat)); M.mat = @(x, u_vec) trnsp(reshape(u_vec, [n, m])); M.vecmatareisometries = @() true; end % Given a matrix X, returns the same matrix but with each column scaled so % that they have unit 2-norm. function X = normalize_columns(X) % This is faster than norms(X, 2, 1) for small X, and as fast for large X. nrms = sqrt(sum(X.^2, 1)); X = bsxfun(@times, X, 1./nrms); end % Orthogonal projection of the ambient vector H onto the tangent space at X function PXH = projection(X, H) % Compute the inner product between each vector H(:, i) with its root % point X(:, i), that is, X(:, i).' * H(:, i). Returns a row vector. inners = sum(X.*H, 1); % Subtract from H the components of the H(:, i)'s that are parallel to % the root points X(:, i). PXH = H - bsxfun(@times, X, inners); % % Equivalent but slow code: % m = size(X, 2); % PXH = zeros(size(H)); % for i = 1 : m % PXH(:, i) = H(:, i) - X(:, i) * (X(:, i)'*H(:, i)); % end end % Uniform random sampling on the sphere. function x = random(n, m) x = normalize_columns(randn(n, m)); end % Random normalized tangent vector at x. function d = randomvec(n, m, x) d = randn(n, m); d = projection(x, d); d = d / norm(d(:)); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/rotations/randrot.m ================================================ function R = randrot(n, N) % Generates uniformly random rotation matrices. % % function R = randrot(n, N) % % R is a n-by-n-by-N matrix such that each slice R(:, :, i) is an % orthogonal matrix of size n of determinant +1 (i.e., a matrix in SO(n)). % By default, N = 1. % Complexity: N times O(n^3). % Theory in Diaconis and Shahshahani 1987 for the uniformity on O(n); % With details in Mezzadri 2007, % "How to generate random matrices from the classical compact groups." % To ensure matrices in SO(n), we permute the two first columns when % the determinant is -1. % % See also: randskew % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Sept. 25, 2012. % Contributors: % Change log: if nargin < 2 N = 1; end if n == 1 R = ones(1, 1, N); return; end R = zeros(n, n, N); for i = 1 : N % Generated as such, Q is uniformly distributed over O(n), the set % of orthogonal matrices. A = randn(n); [Q, RR] = qr(A); Q = Q * diag(sign(diag(RR))); %% Mezzadri 2007 % If Q is in O(n) but not in SO(n), we permute the two first % columns of Q such that det(new Q) = -det(Q), hence the new Q will % be in SO(n), uniformly distributed. if det(Q) < 0 Q(:, [1 2]) = Q(:, [2 1]); end R(:, :, i) = Q; end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/rotations/randskew.m ================================================ function S = randskew(n, N) % Generates random skew symmetric matrices with normal entries. % % function S = randskew(n, N) % % S is an n-by-n-by-N matrix where each slice S(:, :, i) for i = 1..N is a % random skew-symmetric matrix with upper triangular entries distributed % independently following a normal distribution (Gaussian, zero mean, unit % variance). % % See also: randrot % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Sept. 25, 2012. % Contributors: % Change log: if nargin < 2 N = 1; end % Subindices of the (strictly) upper triangular entries of an n-by-n % matrix [I J] = find(triu(ones(n), 1)); K = repmat(1:N, n*(n-1)/2, 1); % Indices of the strictly upper triangular entries of all N slices of % an n-by-n-by-N matrix L = sub2ind([n n N], repmat(I, N, 1), repmat(J, N, 1), K(:)); % Allocate memory for N random skew matrices of size n-by-n and % populate each upper triangular entry with a random number following a % normal distribution and copy them with opposite sign on the % corresponding lower triangular side. S = zeros(n, n, N); S(L) = randn(size(L)); S = S-multitransp(S); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/rotations/rotationsfactory.m ================================================ function M = rotationsfactory(n, k) % Returns a manifold structure to optimize over rotation matrices. % % function M = rotationsfactory(n) % function M = rotationsfactory(n, k) % % Special orthogonal group (the manifold of rotations): deals with matrices % R of size n x n x k (or n x n if k = 1, which is the default) such that % each n x n matrix is orthogonal, with determinant 1, i.e., X'*X = eye(n) % if k = 1, or X(:, :, i)' * X(:, :, i) = eye(n) for i = 1 : k if k > 1. % % This is a description of SO(n)^k with the induced metric from the % embedding space (R^nxn)^k, i.e., this manifold is a Riemannian % submanifold of (R^nxn)^k endowed with the usual trace inner product. % % Tangent vectors are represented in the Lie algebra, i.e., as skew % symmetric matrices. Use the function M.tangent2ambient(X, H) to switch % from the Lie algebra representation to the embedding space % representation. This is often necessary when defining % problem.ehess(X, H). % % By default, the retraction is only a first-order approximation of the % exponential. To force the use of a second-order approximation, call % M.retr = M.retr2 after creating M. This switches from a QR-based % computation to an SVD-based computation. % % By default, k = 1. % % See also: stiefelfactory % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % Jan. 31, 2013 (NB) % Added egrad2rgrad and ehess2rhess % Oct. 21, 2016 (NB) % Added M.retr2: a second-order retraction based on SVD. if ~exist('k', 'var') || isempty(k) k = 1; end if k == 1 M.name = @() sprintf('Rotations manifold SO(%d)', n); elseif k > 1 M.name = @() sprintf('Product rotations manifold SO(%d)^%d', n, k); else error('k must be an integer no less than 1.'); end M.dim = @() k*nchoosek(n, 2); M.inner = @(x, d1, d2) d1(:).'*d2(:); M.norm = @(x, d) norm(d(:)); M.typicaldist = @() pi*sqrt(n*k); M.proj = @(X, H) multiskew(multiprod(multitransp(X), H)); M.tangent = @(X, H) multiskew(H); M.tangent2ambient = @(X, U) multiprod(X, U); M.egrad2rgrad = M.proj; M.ehess2rhess = @ehess2rhess; function Rhess = ehess2rhess(X, Egrad, Ehess, H) % Reminder : H contains skew-symmeric matrices. The actual % direction that the point X is moved along is X*H. Xt = multitransp(X); XtEgrad = multiprod(Xt, Egrad); symXtEgrad = multisym(XtEgrad); XtEhess = multiprod(Xt, Ehess); Rhess = multiskew( XtEhess - multiprod(H, symXtEgrad) ); end M.retr = @retraction; function Y = retraction(X, U, t) if nargin == 3 tU = t*U; else tU = U; end Y = X + multiprod(X, tU); for i = 1 : k % This QR-based retraction is only a first-order approximation % of the exponential map, not a second-order one. [Q, R] = qr(Y(:, :, i)); % The instruction with R ensures we are not flipping signs % of some columns, which should never happen in modern Matlab % versions but may be an issue with older versions. Y(:, :, i) = Q * diag(sign(sign(diag(R))+.5)); % This is guaranteed to always yield orthogonal matrices with % determinant +1. Simply look at the eigenvalues of a skew % symmetric matrix, than at those of identity plus that matrix, % and compute their product for the determinant: it's stricly % positive in all cases. end end % A second order retraction is implemented here. To force its use, % after creating the factory M, execute M.retr = M.retr2. M.retr2 = @retraction2; function Y = retraction2(X, U, t) if nargin == 3 tU = t*U; else tU = U; end Y = X + multiprod(X, tU); for i = 1 : k [Uk, ~, Vk] = svd(Y(:, :, k)); Y(:, :, k) = Uk*Vk'; end end M.exp = @exponential; function Y = exponential(X, U, t) if nargin == 3 exptU = t*U; else exptU = U; end for i = 1 : k exptU(:, :, i) = expm(exptU(:, :, i)); end Y = multiprod(X, exptU); end M.log = @logarithm; function U = logarithm(X, Y) U = multiprod(multitransp(X), Y); for i = 1 : k % The result of logm should be real in theory, but it is % numerically useful to force it. U(:, :, i) = real(logm(U(:, :, i))); end % Ensure the tangent vector is in the Lie algebra. U = multiskew(U); end M.hash = @(X) ['z' hashmd5(X(:))]; M.rand = @() randrot(n, k); M.randvec = @randomvec; function U = randomvec(X) %#ok U = randskew(n, k); nrmU = sqrt(U(:).'*U(:)); U = U / nrmU; end M.lincomb = @matrixlincomb; M.zerovec = @(x) zeros(n, n, k); M.transp = @(x1, x2, d) d; M.pairmean = @pairmean; function Y = pairmean(X1, X2) V = M.log(X1, X2); Y = M.exp(X1, .5*V); end M.dist = @(x, y) M.norm(x, M.log(x, y)); M.vec = @(x, u_mat) u_mat(:); M.mat = @(x, u_vec) reshape(u_vec, [n, n, k]); M.vecmatareisometries = @() true; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/specialeuclidean/specialeuclideanfactory.m ================================================ function M = specialeuclideanfactory(n, k) % Returns a manifold structure to optimize over the special Euclidean group % % function M = specialeuclideanfactory(n) % function M = specialeuclideanfactory(n, k) % % The special Euclidean group (the manifold of rigid transformations): % This is a product manifold of the rotations group SO(n) and the % translation group R^n, copied k times. (See note below.) % % Points on the manifold are represented as structures X with two fields. % X.R is a 3D array of size nxnxk such that each slice X.R(:, :, i) % corresponds to a rotation matrix (orthogonal with determinant 1). % X.t is a matrix of size nxk such that each column X.t(:, i) corresponds % to a translation vector. % % Tangent vectors are represented as structures with the same fields. Note % that rotational components of the tangent vectors are represented in the % Lie algebra, i.e., each slice Xdot.R(:, :, i) is a skew-symmetric matrix. % Use M.tangent2ambient(X, Xdot) to obtain a representation in the ambient % space. This is often necessary when defining problem.ehess(X, Xdot). % % This is a description of SE(n)^k with the induced metric from the % embedding space (R^nxn)^k x (R^n)^k, i.e., this manifold is a Riemannian % submanifold of the embedding Euclidean space with the usual inner % product. % % By default, k = 1. % % Note: this is a product geometry: it may not be the "appropriate" % geometry to give to SE(n) for your application. In particular, this is % not the Lie geometry of SE(n), because SE(n) is not a direct product of % SO(n) and R^n: it is only a semidirect product. Following a comment by % Martijn Zeestraten on the Manopt forum, see this file for more % information about the Lie geometry: % http://ethaneade.com/lie.pdf % % See rotationsfactory and euclideanfactory for details. % % See also: rotationsfactory euclideanfactory % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Sep. 23, 2014. % Contributors: % Change log: if ~exist('k', 'var') || isempty(k) k = 1; end elements = struct(); elements.R = rotationsfactory(n, k); elements.t = euclideanfactory(n, k); M = productmanifold(elements); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/sphere/spherecomplexfactory.m ================================================ function M = spherecomplexfactory(n, m) % Returns a manifold struct to optimize over unit-norm complex matrices. % % function M = spherecomplexfactory(n) % function M = spherecomplexfactory(n, m) % % Manifold of n-by-m complex matrices of unit Frobenius norm. % By default, m = 1, which corresponds to the unit sphere in C^n. The % metric is such that the sphere is a Riemannian submanifold of the space % of 2nx2m real matrices with the usual trace inner product, i.e., the % usual metric. % % See also: spherefactory % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % % Sep. 4, 2014 (NB): % Added ehess2rhess. % % April 7, 2015 (NB): % Added vec/mat pair (for use with hessianspectrum, for example). % % April 13, 2015 (NB): % Added logarithm % % Oct. 8, 2016 (NB) % Code for exponential was simplified to only treat the zero vector % as a particular case. % % Oct. 22, 2016 (NB) % Distance function dist now significantly more accurate for points % within 1e-7 and less from each other. if ~exist('m', 'var') m = 1; end if m == 1 M.name = @() sprintf('Complex sphere S^%d', n-1); else M.name = @() sprintf('Unit F-norm %dx%d complex matrices', n, m); end M.dim = @() 2*(n*m)-1; M.inner = @(x, d1, d2) real(d1(:)'*d2(:)); M.norm = @(x, d) norm(d, 'fro'); M.dist = @(x, y) real(2*asin(.5*norm(x - y, 'fro'))); M.typicaldist = @() pi; M.proj = @(x, d) reshape(d(:) - x(:)*(real(x(:)'*d(:))), n, m); % For Riemannian submanifolds, converting a Euclidean gradient into a % Riemannian gradient amounts to an orthogonal projection. M.egrad2rgrad = M.proj; M.ehess2rhess = @ehess2rhess; function rhess = ehess2rhess(x, egrad, ehess, u) rhess = M.proj(x, ehess) - real((x(:)'*egrad(:)))*u; end M.tangent = M.proj; M.exp = @exponential; M.retr = @retraction; M.log = @logarithm; function v = logarithm(x1, x2) v = M.proj(x1, x2 - x1); di = M.dist(x1, x2); % If the two points are "far apart", correct the norm. if di > 1e-6 nv = norm(v, 'fro'); v = v * (di / nv); end end M.hash = @(x) ['z' hashmd5([real(x(:)) ; imag(x(:))])]; M.rand = @() random(n, m); M.randvec = @(x) randomvec(n, m, x); M.lincomb = @matrixlincomb; M.zerovec = @(x) zeros(n, m); M.transp = @(x1, x2, d) M.proj(x2, d); M.pairmean = @pairmean; function y = pairmean(x1, x2) y = x1+x2; y = y / norm(y, 'fro'); end mn = m*n; M.vec = @(x, u_mat) [real(u_mat(:)) ; imag(u_mat(:))]; M.mat = @(x, u_vec) reshape(u_vec(1:mn), m, n) + 1i*reshape(u_vec((mn+1):end), m, n); M.vecmatareisometries = @() true; end % Exponential on the sphere function y = exponential(x, d, t) if nargin == 2 % t = 1; td = d; else td = t*d; end nrm_td = norm(td, 'fro'); if nrm_td > 0 y = x*cos(nrm_td) + td*(sin(nrm_td)/nrm_td); else y = x; end end % Retraction on the sphere function y = retraction(x, d, t) if nargin == 2 t = 1; end y = x+t*d; y = y/norm(y, 'fro'); end % Uniform random sampling on the sphere. function x = random(n, m) x = randn(n, m) + 1i*randn(n, m); x = x/norm(x, 'fro'); end % Random normalized tangent vector at x. function d = randomvec(n, m, x) d = randn(n, m) + 1i*randn(n, m); d = reshape(d(:) - x(:)*(real(x(:)'*d(:))), n, m); d = d / norm(d, 'fro'); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/sphere/spherefactory.m ================================================ function M = spherefactory(n, m) % Returns a manifold struct to optimize over unit-norm vectors or matrices. % % function M = spherefactory(n) % function M = spherefactory(n, m) % % Manifold of n-by-m real matrices of unit Frobenius norm. % By default, m = 1, which corresponds to the unit sphere in R^n. The % metric is such that the sphere is a Riemannian submanifold of the space % of nxm matrices with the usual trace inner product, i.e., the usual % metric. % % See also: obliquefactory spherecomplexfactory % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % % Oct. 8, 2016 (NB) % Code for exponential was simplified to only treat the zero vector % as a particular case. % % Oct. 22, 2016 (NB) % Distance function dist now significantly more accurate for points % within 1e-7 and less from each other. % % July 20, 2017 (NB) % Following conversations with Bruno Iannazzo and P.-A. Absil, % the distance function is now even more accurate. % % Sep. 7, 2017 (NB) % New isometric vector transport available in M.isotransp, % contributed by Changshuo Liu. if ~exist('m', 'var') m = 1; end if m == 1 M.name = @() sprintf('Sphere S^%d', n-1); else M.name = @() sprintf('Unit F-norm %dx%d matrices', n, m); end M.dim = @() n*m-1; M.inner = @(x, d1, d2) d1(:).'*d2(:); M.norm = @(x, d) norm(d, 'fro'); M.dist = @dist; function d = dist(x, y) % The following code is mathematically equivalent to the % computation d = acos(x(:)'*y(:)) but is much more accurate when % x and y are close. chordal_distance = norm(x - y, 'fro'); d = real(2*asin(.5*chordal_distance)); % Note: for x and y almost antipodal, the accuracy is good but not % as good as possible. One way to improve it is by using the % following branching: % % if chordal_distance > 1.9 % % d = pi - dist(x, -y); % % end % It is rarely necessary to compute distance between % almost-antipodal points with full accuracy in Manopt, hence we % favor a simpler code. end M.typicaldist = @() pi; M.proj = @(x, d) d - x*(x(:).'*d(:)); M.tangent = M.proj; % For Riemannian submanifolds, converting a Euclidean gradient into a % Riemannian gradient amounts to an orthogonal projection. M.egrad2rgrad = M.proj; M.ehess2rhess = @ehess2rhess; function rhess = ehess2rhess(x, egrad, ehess, u) rhess = M.proj(x, ehess) - (x(:)'*egrad(:))*u; end M.exp = @exponential; M.retr = @retraction; M.log = @logarithm; function v = logarithm(x1, x2) v = M.proj(x1, x2 - x1); di = M.dist(x1, x2); % If the two points are "far apart", correct the norm. if di > 1e-6 nv = norm(v, 'fro'); v = v * (di / nv); end end M.hash = @(x) ['z' hashmd5(x(:))]; M.rand = @() random(n, m); M.randvec = @(x) randomvec(n, m, x); M.lincomb = @matrixlincomb; M.zerovec = @(x) zeros(n, m); M.transp = @(x1, x2, d) M.proj(x2, d); % Isometric vector transport of d from the tangent space at x1 to x2. % This is actually a parallel vector transport, see 5 in % http://epubs.siam.org/doi/pdf/10.1137/16M1069298 % "A Riemannian Gradient Sampling Algorithm for Nonsmooth Optimization % on Manifolds", by Hosseini and Uschmajew, SIOPT 2017 M.isotransp = @(x1, x2, d) isometricTransp(x1, x2, d); function Td = isometricTransp(x1, x2, d) v = logarithm(x1, x2); dist_x1x2 = norm(v, 'fro'); if dist_x1x2 > 0 u = v / dist_x1x2; utd = u(:)'*d(:); Td = d + (cos(dist_x1x2)-1)*utd*u ... - sin(dist_x1x2) *utd*x1; else % x1 == x2, so the transport is identity Td = d; end end M.pairmean = @pairmean; function y = pairmean(x1, x2) y = x1+x2; y = y / norm(y, 'fro'); end M.vec = @(x, u_mat) u_mat(:); M.mat = @(x, u_vec) reshape(u_vec, [n, m]); M.vecmatareisometries = @() true; end % Exponential on the sphere function y = exponential(x, d, t) if nargin == 2 % t = 1 td = d; else td = t*d; end nrm_td = norm(td, 'fro'); % Former versions of Manopt avoided the computation of sin(a)/a for % small a, but further investigations suggest this computation is % well-behaved numerically. if nrm_td > 0 y = x*cos(nrm_td) + td*(sin(nrm_td)/nrm_td); else y = x; end end % Retraction on the sphere function y = retraction(x, d, t) if nargin == 2 % t = 1; td = d; else td = t*d; end y = x + td; y = y / norm(y, 'fro'); end % Uniform random sampling on the sphere. function x = random(n, m) x = randn(n, m); x = x / norm(x, 'fro'); end % Random normalized tangent vector at x. function d = randomvec(n, m, x) d = randn(n, m); d = d - x*(x(:).'*d(:)); d = d / norm(d, 'fro'); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/sphere/spheresymmetricfactory.m ================================================ function M = spheresymmetricfactory(n) % Returns a manifold struct to optimize over unit-norm symmetric matrices. % % function M = spheresymmetricfactory(n) % % Manifold of n-by-n real symmetric matrices of unit Frobenius norm. % The metric is such that the sphere is a Riemannian submanifold of the % space of nxn symmetric matrices with the usual trace inner product, i.e., % the usual metric = trace(A'*B). % % See also: spherefactory obliquefactory spherecomplexfactory % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, April 17, 2015. % Contributors: % Change log: % % Oct. 8, 2016 (NB) % Code for exponential was simplified to only treat the zero vector % as a particular case. % % Oct. 22, 2016 (NB) % Distance function dist now significantly more accurate for points % within 1e-7 and less from each other. % % July 20, 2017 (NB) % The distance function is now even more accurate. M.name = @() sprintf('Sphere of symmetric matrices of size %d', n); M.dim = @() n*(n+1)/2 - 1; M.inner = @(x, d1, d2) d1(:).'*d2(:); M.norm = @(x, d) norm(d, 'fro'); M.dist = @(x, y) real(2*asin(.5*norm(x - y, 'fro'))); M.typicaldist = @() pi; M.proj = @proj; function xdot = proj(x, d) d = (d+d.')/2; xdot = d - x*(x(:).'*d(:)); end M.tangent = @proj; % For Riemannian submanifolds, converting a Euclidean gradient into a % Riemannian gradient amounts to an orthogonal projection. M.egrad2rgrad = @proj; M.ehess2rhess = @ehess2rhess; function rhess = ehess2rhess(x, egrad, ehess, u) % these are not explicitly required, given the use. % egrad = (egrad + egrad.')/2; % ehess = (ehess + ehess.')/2; rhess = proj(x, ehess) - (x(:)'*egrad(:))*u; end M.exp = @exponential; M.retr = @retraction; M.log = @logarithm; function v = logarithm(x1, x2) v = proj(x1, x2 - x1); di = M.dist(x1, x2); % If the two points are "far apart", correct the norm. if di > 1e-6 nv = norm(v, 'fro'); v = v * (di / nv); end end M.hash = @(x) ['z' hashmd5(x(:))]; M.rand = @() random(n); M.randvec = @(x) randomvec(n, x); M.lincomb = @matrixlincomb; M.zerovec = @(x) zeros(n); M.transp = @(x1, x2, d) proj(x2, d); M.pairmean = @pairmean; function y = pairmean(x1, x2) y = x1+x2; y = y / norm(y, 'fro'); end % TODO : check isometry and fix. M.vec = @(x, u_mat) u_mat(:); M.mat = @(x, u_vec) reshape(u_vec, [n, m]); M.vecmatareisometries = @() false; end % Exponential on the sphere function y = exponential(x, d, t) if nargin == 2 % t = 1; td = d; else td = t*d; end nrm_td = norm(td, 'fro'); if nrm_td > 0 y = x*cos(nrm_td) + td*(sin(nrm_td)/nrm_td); else y = x; end end % Retraction on the sphere function y = retraction(x, d, t) if nargin == 2 t = 1; end y = x + t*d; y = y / norm(y, 'fro'); end % Uniform random sampling on the sphere. function x = random(n) x = randn(n); x = (x + x.')/2; x = x/norm(x, 'fro'); end % Random normalized tangent vector at x. function d = randomvec(n, x) d = randn(n); d = (d + d.')/2; d = d - x*(x(:).'*d(:)); d = d / norm(d, 'fro'); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/stiefel/stiefelcomplexfactory.m ================================================ function M = stiefelcomplexfactory(n, p, k) % Returns a manifold struct. to optimize over complex orthonormal matrices. % % function M = stiefelcomplexfactory(n, p) % function M = stiefelcomplexfactory(n, p, k) % % The complex Stiefel manifold is the set of complex orthonormal nxp % matrices. If k is larger than 1, this is the Cartesian product of the % complex Stiefel manifold taken k times. The metric is such that the % manifold is a Riemannian submanifold of C^nxp equipped with the usual % real-trace inner product, that is, it is the usual metric for the complex % plane identified with R^2. % % Points are represented as matrices X of size n x p x k (or n x p if k=1, % which is the default) such that each complex n x p matrix is orthonormal, % i.e., X'*X = eye(p) if k = 1, or X(:, :, i)' * X(:, :, i) = eye(p) for % i = 1 : k if k > 1. Tangent vectors are represented as matrices the same % size as points. % % By default, k = 1. % % % Please cite the Manopt paper as well as either of these research papers % pertaining to this specific geometry: % @InProceedings{sato2013complex, % Title = {A complex singular value decomposition algorithm based on the {R}iemannian {N}ewton method}, % Author = {Sato, H. and Iwai, T.}, % Booktitle = {Decision and Control ({CDC}), 2013 {IEEE} 52nd Annual Conference on}, % Year = {2013}, % Organization = {IEEE}, % Pages = {2972--2978} % } % @InProceedings{sato2014Riemannian, % Title = {{R}iemannian conjugate gradient method for complex singular value decomposition problem}, % Author = {Sato, H.}, % Booktitle = {Decision and Control ({CDC}), 2014 {IEEE} 53rd Annual Conference on}, % Year = {2014}, % Organization = {IEEE}, % Pages = {5849--5854} % } % % % See also: stiefelfactory % This file is part of Manopt: www.manopt.org. % Original author: Hiroyuki Sato, April 27, 2015. % Contributors: % Change log: if ~exist('k', 'var') || isempty(k) k = 1; end if k == 1 M.name = @() sprintf('Complex Stiefel manifold St(%d, %d)', n, p); elseif k > 1 M.name = @() sprintf('Product complex Stiefel manifold St(%d, %d)^%d', n, p, k); else error('k must be an integer no less than 1.'); end M.dim = @() k*(2*n*p - p^2); %! k*(n*p - .5*p*(p+1)) -> k*(2*n*p - p^2) M.inner = @(x, d1, d2) real(d1(:)'*d2(:)); %! trace -> real-trace M.norm = @(x, d) norm(d(:)); M.dist = @(x, y) error('stiefel.dist not implemented yet.'); M.typicaldist = @() sqrt(p*k); M.proj = @projection; function Up = projection(X, U) XHU = multiprod(multihconj(X), U); %! XtU -> XHU, multitransp -> multihconj herXHU = multiherm(XHU); %! symXtU -> herXHU, multisym -> multiherm Up = U - multiprod(X, herXHU); %! symXtU -> herXHU end M.tangent = M.proj; % For Riemannian submanifolds, converting a Euclidean gradient into a % Riemannian gradient amounts to an orthogonal projection. M.egrad2rgrad = M.proj; M.ehess2rhess = @ehess2rhess; function rhess = ehess2rhess(X, egrad, ehess, H) XHG = multiprod(multihconj(X), egrad); %! XtG -> XHG, multitransp -> multihconj herXHG = multiherm(XHG); %! symXtG -> herXHG, multisym(XtG) -> multiherm(XHG) HherXHG = multiprod(H, herXHG); %! HsymXtG -> HherXHG, symXtG -> herXHG rhess = projection(X, ehess - HherXHG); %! HsymXtG -> HherXHG end M.retr = @retraction; function Y = retraction(X, U, t) if nargin < 3 t = 1.0; end Y = X + t*U; for i = 1 : k [Q, R] = qr(Y(:, :, i), 0); % The instruction with R assures we are not flipping signs % of some columns, which should never happen in modern Matlab % versions but may be an issue with older versions. Y(:, :, i) = Q * diag(sign(sign(diag(R))+.5)); end end M.exp = @exponential; function Y = exponential(X, U, t) if nargin == 2 t = 1; end tU = t*U; Y = zeros(size(X)); for i = 1 : k % From a formula by Ross Lippert, Example 5.4.2 in AMS08. Xi = X(:, :, i); Ui = tU(:, :, i); Y(:, :, i) = [Xi Ui] * ... expm([Xi'*Ui , -Ui'*Ui ; eye(p) , Xi'*Ui]) * ... [ expm(-Xi'*Ui) ; zeros(p) ]; end end M.hash = @(X) ['z' hashmd5([real(X(:)) ; imag(X(:))])]; %! X(:) -> [real(X(:)) ; imag(X(:))] M.rand = @random; function X = random() X = zeros(n, p, k); for i = 1 : k [Q, unused] = qr(randn(n, p) + 1i*randn(n,p), 0); %#ok %! Complex version X(:, :, i) = Q; end end M.randvec = @randomvec; function U = randomvec(X) U = projection(X, randn(n, p, k) + 1i*randn(n, p, k)); %! Complex version U = U / norm(U(:)); end M.lincomb = @matrixlincomb; M.zerovec = @(x) zeros(n, p, k); M.transp = @(x1, x2, d) projection(x2, d); M.vec = @(x, u_mat) [real(u_mat(:)) ; imag(u_mat(:))]; M.mat = @(x, u_vec) reshape(u_vec(1:(n*p*k)) + 1i*u_vec((n*p*k+1):end), [n, p, k]); M.vecmatareisometries = @() true; % TODO : to check. end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/stiefel/stiefelfactory.m ================================================ function M = stiefelfactory(n, p, k) % Returns a manifold structure to optimize over orthonormal matrices. % % function M = stiefelfactory(n, p) % function M = stiefelfactory(n, p, k) % % The Stiefel manifold is the set of orthonormal nxp matrices. If k % is larger than 1, this is the Cartesian product of the Stiefel manifold % taken k times. The metric is such that the manifold is a Riemannian % submanifold of R^nxp equipped with the usual trace inner product, that % is, it is the usual metric. % % Points are represented as matrices X of size n x p x k (or n x p if k=1, % which is the default) such that each n x p matrix is orthonormal, % i.e., X'*X = eye(p) if k = 1, or X(:, :, i)' * X(:, :, i) = eye(p) for % i = 1 : k if k > 1. Tangent vectors are represented as matrices the same % size as points. % % By default, k = 1. % % See also: grassmannfactory rotationsfactory % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % July 5, 2013 (NB) : Added ehess2rhess. % Jan. 27, 2014 (BM) : Bug in ehess2rhess corrected. % June 24, 2014 (NB) : Added true exponential map and changed the randvec % function so that it now returns a globally % normalized vector, not a vector where each % component is normalized (this only matters if k>1). if ~exist('k', 'var') || isempty(k) k = 1; end if k == 1 M.name = @() sprintf('Stiefel manifold St(%d, %d)', n, p); elseif k > 1 M.name = @() sprintf('Product Stiefel manifold St(%d, %d)^%d', n, p, k); else error('k must be an integer no less than 1.'); end M.dim = @() k*(n*p - .5*p*(p+1)); M.inner = @(x, d1, d2) d1(:).'*d2(:); M.norm = @(x, d) norm(d(:)); M.dist = @(x, y) error('stiefel.dist not implemented yet.'); M.typicaldist = @() sqrt(p*k); M.proj = @projection; function Up = projection(X, U) XtU = multiprod(multitransp(X), U); symXtU = multisym(XtU); Up = U - multiprod(X, symXtU); % The code above is equivalent to, but much faster than, the code below. % % Up = zeros(size(U)); % function A = sym(A), A = .5*(A+A'); end % for i = 1 : k % Xi = X(:, :, i); % Ui = U(:, :, i); % Up(:, :, i) = Ui - Xi*sym(Xi'*Ui); % end end M.tangent = M.proj; % For Riemannian submanifolds, converting a Euclidean gradient into a % Riemannian gradient amounts to an orthogonal projection. M.egrad2rgrad = M.proj; M.ehess2rhess = @ehess2rhess; function rhess = ehess2rhess(X, egrad, ehess, H) XtG = multiprod(multitransp(X), egrad); symXtG = multisym(XtG); HsymXtG = multiprod(H, symXtG); rhess = projection(X, ehess - HsymXtG); end M.retr = @retraction; function Y = retraction(X, U, t) if nargin < 3 t = 1.0; end Y = X + t*U; for i = 1 : k [Q, R] = qr(Y(:, :, i), 0); % The instruction with R assures we are not flipping signs % of some columns, which should never happen in modern Matlab % versions but may be an issue with older versions. Y(:, :, i) = Q * diag(sign(sign(diag(R))+.5)); end end M.exp = @exponential; function Y = exponential(X, U, t) if nargin == 2 t = 1; end tU = t*U; Y = zeros(size(X)); for i = 1 : k % From a formula by Ross Lippert, Example 5.4.2 in AMS08. Xi = X(:, :, i); Ui = tU(:, :, i); Y(:, :, i) = [Xi Ui] * ... expm([Xi'*Ui , -Ui'*Ui ; eye(p) , Xi'*Ui]) * ... [ expm(-Xi'*Ui) ; zeros(p) ]; end end M.hash = @(X) ['z' hashmd5(X(:))]; M.rand = @random; function X = random() X = zeros(n, p, k); for i = 1 : k [Q, unused] = qr(randn(n, p), 0); %#ok X(:, :, i) = Q; end end M.randvec = @randomvec; function U = randomvec(X) U = projection(X, randn(n, p, k)); U = U / norm(U(:)); end M.lincomb = @matrixlincomb; M.zerovec = @(x) zeros(n, p, k); M.transp = @(x1, x2, d) projection(x2, d); M.vec = @(x, u_mat) u_mat(:); M.mat = @(x, u_vec) reshape(u_vec, [n, p, k]); M.vecmatareisometries = @() true; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/stiefel/stiefelgeneralizedfactory.m ================================================ function M = stiefelgeneralizedfactory(n, p, B) % Returns a manifold structure of "scaled" orthonormal matrices. % % function M = stiefelgeneralizedfactory(n, p) % function M = stiefelgeneralizedfactory(n, p, B) % % The generalized Stiefel manifold is the set of "scaled" orthonormal % nxp matrices X such that X'*B*X is identity. B must be positive definite. % If B is identity, then this is the standard Stiefel manifold. % % The generalized Stiefel manifold is endowed with a scaled metric % by making it a Riemannian submanifold of the Euclidean space, % again endowed with the scaled inner product. % % Some notions (not all) are from Section 4.5 of the paper % "The geometry of algorithms with orthogonality constraints", % A. Edelman, T. A. Arias, S. T. Smith, SIMAX, 1998. % % Paper link: http://arxiv.org/abs/physics/9806030. % % Note: egrad2rgrad and ehess2rhess involve solving linear systems in B. If % this is a bottleneck for a specific application, then a way forward is to % create a modified version of this file which preprocesses B to speed this % up (typically, by computing a Cholesky factorization of it, then calling % an appropriate solver). % % See also: stiefelfactory grassmannfactory grassmanngeneralizedfactory % This file is part of Manopt: www.manopt.org. % Original author: Bamdev Mishra, June 30, 2015. % Contributors: % % Change log: % if ~exist('B', 'var') || isempty(B) B = speye(n); % Standard Stiefel manifold. end M.name = @() sprintf('Generalized Stiefel manifold St(%d, %d)', n, p); M.dim = @() (n*p - .5*p*(p+1)); M.inner = @(X, eta, zeta) trace(eta'*(B*zeta)); % Scaled metric. M.norm = @(X, eta) sqrt(M.inner(X, eta, eta)); M.dist = @(X, Y) error('stiefelgeneralizedfactory.dist not implemented yet.'); M.typicaldist = @() sqrt(p); % Orthogonal projection of an ambient vector U to the tangent space % at X. M.proj = @projection; function Up = projection(X, U) BX = B*X; % Projection onto the tangent space Up = U - X*symm(BX'*U); end M.tangent = M.proj; M.egrad2rgrad = @egrad2rgrad; function rgrad = egrad2rgrad(X, egrad) % First, scale egrad according the to the scaled metric in the % Euclidean space. egrad_scaled = B\egrad; % Second, project onto the tangent space. % rgrad = egrad_scaled - X*symm((B*X)'*egrad_scaled); % % Verify that symm(BX'*egrad_scaled) = symm(X'*egrad). rgrad = egrad_scaled - X*symm(X'*egrad); end M.ehess2rhess = @ehess2rhess; function rhess = ehess2rhess(X, egrad, ehess, H) egraddot = ehess; Xdot = H; % Directional derivative of the Riemannian gradient. egrad_scaleddot = B\egraddot; rgraddot = egrad_scaleddot - Xdot*symm(X'*egrad)... - X*symm(Xdot'*egrad)... - X*symm(X'*egraddot); % Project onto the tangent space. rhess = M.proj(X, rgraddot); end M.retr = @retraction; function Y = retraction(X, U, t) if nargin < 3 t = 1.0; end Y = guf(X + t*U); % Ensure that Y'*B*Y is identity. end M.exp = @exponential; function Y = exponential(X, Z, t) if nargin < 3 t = 1.0; end Y = retraction(X, Z, t); warning('manopt:stiefelgeneralizedfactory:exp', ... ['Exponential for generalized Stiefel manifold ' ... 'manifold not implemented yet. Used retraction instead.']); end M.hash = @(X) ['z' hashmd5(X(:))]; M.rand = @random; function X = random() X = guf(randn(n, p)); % Ensure that X'*B*X is identity; end M.randvec = @randomvec; function U = randomvec(X) U = projection(X, randn(n, p)); U = U / norm(U(:)); end M.lincomb = @matrixlincomb; M.zerovec = @(X) zeros(n, p); % This transport is compatible with the generalized polar retraction. M.transp = @(X1, X2, d) projection(X2, d); M.vec = @(X, u_mat) u_mat(:); M.mat = @(X, u_vec) reshape(u_vec, [n, p]); M.vecmatareisometries = @() false; % Some auxiliary functions symm = @(D) (D + D')/2; function X = guf(Y) % Generalized polar decomposition of an n-by-p matrix Y. % X'*B*X is identity. % Method 1 [u, ~, v] = svd(Y, 0); % Instead of the following three steps, an equivalent, but an % expensive way is to do X = u*(sqrtm(u'*(B*u))\(v')). [q, ssquare] = eig(u'*(B*u)); qsinv = q/sparse(diag(sqrt(diag(ssquare)))); X = u*((qsinv*q')*v'); % X'*B*X is identity. % Another computation using restricted_svd % [u, ~, v] = restricted_svd(Y); % X = u*v'; % X'*B*X is identity. end function [u, s, v] = restricted_svd(Y) % We compute a thin svd-like decomposition of an n-by-p matrix Y % into matrices u, s, and v such that u is an n-by-p matrix % with u'*B*u being identity, s is a p-by-p diagonal matrix % with positive entries, and v is a p-by-p orthogonal matrix. % Y = u*s*v'. [v, ssquare] = eig(symm(Y'*(B*Y))); % Y*B*Y is positive definite ssquarevec = diag(ssquare); s = sparse(diag(abs(sqrt(ssquarevec)))); u = Y*(v/s); % u'*B*u is identity. end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/stiefel/stiefelstackedfactory.m ================================================ function M = stiefelstackedfactory(m, d, k) % Stiefel(k, d)^m, represented as matrices of size m*d-by-k. % % function M = stiefelstackedfactory(m, d, k) % % Points on this manifold are matrices Y of size n x k, with n = m*d. % Y is thought of as m matrices of size d x k each, stacked on top of each % other. Call them Y1, ..., Ym. Each Yi is an orthonormal matrix, that is, % its d rows are unit norm and are orthogonal to each other. Thus, this % geometry is a product of Stiefel manifolds. % % To easily transform matrices Y to 3D arrays Y3 of size d x k x m such % that each slice Y3(:, :, i) corresponds to one of the matrices Yi, use % the functions % % Y3 = M.to3D(Y) and Y = M.to2D(Y3). % % The ambient space R^(nxk) is endowed with the usual inner product % = trace(A'*B). This inner product is restricted to the tangent % spaces of the present manifold, thus making it a Riemannian submanifold % of the Euclidean space R^(nxk). Tangent vectors are represented as % matrices of the same size as Y, and can likewise be converted to 3D % arrays and back using to3D() and to2D(). % % In dealing with this geometry, especially when dealing with the 3D array % representations of points and tangent vectors, the tools multiprod, % multitransp, multitrace, multiscale etc. available in Manopt are often % useful. % % See also: stiefelfactory obliquefactory multiprod multitransp % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, May 4, 2015. % Contributors: % Change log: assert(k >= d, 'k must be at least as large as d.'); n = m*d; M.name = @() sprintf('Manifold of %d orthonormal matrices of size %dx%d, stacked', m, d, k); M.dim = @() m*(k*d - .5*d*(d+1)); M.size = @() [m, d, k]; M.inner = @(x, d1, d2) d1(:).'*d2(:); M.norm = @(x, d) norm(d(:)); M.dist = @(x, y) error('stiefelstackedfactory.dist not implemented yet.'); M.typicaldist = @() sqrt(M.dim()); % Convert a dxkxm matrix to an nxk matrix M.to2D = @to2D; function A2 = to2D(A3) A2 = reshape(multitransp(A3), [k, m*d])'; end % Convert an nxk matrix to a dxkxm matrix M.to3D = @to3D; function A3 = to3D(A2) A3 = multitransp(reshape(A2', [k, d, m])); end % Given 2 3D matrices A and B of size dxkxm, returns a 3D matrix C of % size dxdxm such that each slice C(:, :, i) is the symmetric part of % the product A(:, :, i) * B(:, :, i)'. The name is short for % "symmetric-block-diagonal", because if A and B were transformed to % their 2D equivalents via to2D, then the output would contain the % symmetric parts of the diagonal blocks of A*B'. M.symbdiag = @symbdiag; function C = symbdiag(A, B) C = multisym(multiprod(A, multitransp(B))); end % Orthogonal projection from the ambient space R^(nxk) to the tangent % space at X. M.proj = @projection; function Zt = projection(Y, Z) Y3 = to3D(Y); Z3 = to3D(Z); Lambda = symbdiag(Y3, Z3); Zt3 = Z3 - multiprod(Lambda, Y3); Zt = to2D(Zt3); end M.tangent = M.proj; M.egrad2rgrad = M.proj; M.ehess2rhess = @ehess2rhess; function rhess = ehess2rhess(Y, egrad, ehess, Ydot) Y3 = to3D(Y); Ydot3 = to3D(Ydot); egrad3 = to3D(egrad); C = symbdiag(Y3, egrad3); CYdot = to2D(multiprod(C, Ydot3)); rhess = projection(Y, ehess - CYdot); end M.retr = @retraction; function Y = retraction(Y, U, t) if nargin < 3 t = 1.0; end Y = Y + t*U; Y3 = to3D(Y); for i = 1 : m % Orthonormalize the rows of Y3(:, :, i): [u, s, v] = svd(Y3(:, :, i), 'econ'); %#ok Y3(:, :, i) = u*v'; % Alternative code if one desires to use QR instead of SVD. % The instruction with the signs of R assures we are not % flipping signs of some columns. % [Q, R] = qr(Y3(:, :, i)', 0); % Y3(:, :, i) = (Q * diag(sign(sign(diag(R))+.5)))'; end Y = to2D(Y3); end M.exp = @exponential; function Y = exponential(Y, U, t) if nargin == 2 t = 1; end tU3 = multitransp(to3D(t*U)); Y3 = multitransp(to3D(Y)); % From a formula by Ross Lippert, Example 5.4.2 in AMS08. for i = 1 : m X = Y3(:, :, i); Z = tU3(:, :, i); Y3(:, :, i) = [X, Z] * ... expm([ X'*Z , -Z'*Z ; eye(d) , X'*Z]) * ... [ expm(-X'*Z) ; zeros(d) ]; % We may loose orthonormality here. Just to be sure: [u, s, v] = svd(Y3(:, :, i), 'econ'); %#ok Y3(:, :, i) = u*v'; end Y = to2D(multitransp(Y3)); end M.hash = @(Y) ['z' hashmd5(Y(:))]; M.rand = @random; function Y = random() Y3 = zeros(d, k, m); for i = 1 : m [Q, unused] = qr(randn(k, d), 0); %#ok Y3(:, :, i) = Q'; end Y = to2D(Y3); end M.randvec = @randomvec; function U = randomvec(Y) U = projection(Y, randn(n, k)); U = U / M.norm(Y, U); end M.lincomb = @matrixlincomb; M.zerovec = @(x) zeros(n, k); M.transp = @(x1, x2, u) projection(x2, u); M.vec = @(x, u_mat) u_mat(:); M.mat = @(x, u_vec) reshape(u_vec, [n, k]); M.vecmatareisometries = @() true; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/symfixedrank/elliptopefactory.m ================================================ function M = elliptopefactory(n, k) % Manifold of n-by-n psd matrices of rank k with unit diagonal elements. % % function M = elliptopefactory(n, k) % % A point X on the manifold is parameterized as YY^T where Y is a matrix of % size nxk. As such, X is symmetric, positive semidefinite. We restrict to % full-rank Y's, such that X has rank exactly k. The point X is numerically % represented by Y (this is more efficient than working with X, which may % be big). Tangent vectors are represented as matrices of the same size as % Y, call them Ydot, so that Xdot = Y Ydot' + Ydot Y and diag(Xdot) == 0. % The metric is the canonical Euclidean metric on Y. % % The diagonal constraints on X (X(i, i) == 1 for all i) translate to % unit-norm constraints on the rows of Y: norm(Y(i, :)) == 1 for all i. % The set of such Y's forms the oblique manifold. But because for any % orthogonal Q of size k, it holds that (YQ)(YQ)' = YY', we "group" all % matrices of the form YQ in an equivalence class. The set of equivalence % classes is a Riemannian quotient manifold, implemented here. % % Note that this geometry formally breaks down at rank-deficient Y's. % This does not appear to be a major issue in practice when optimization % algorithms converge to rank-deficient Y's, but convergence theorems no % longer hold. As an alternative, you may use the oblique manifold (it has % larger dimension, but does not break down at rank drop.) % % The geometry is taken from the 2010 paper: % M. Journee, P.-A. Absil, F. Bach and R. Sepulchre, % "Low-Rank Optimization on the Cone of Positive Semidefinite Matrices". % Paper link: http://www.di.ens.fr/~fbach/journee2010_sdp.pdf % % % Please cite the Manopt paper as well as the research paper: % @Article{journee2010low, % Title = {Low-rank optimization on the cone of positive semidefinite matrices}, % Author = {Journ{\'e}e, M. and Bach, F. and Absil, P.-A. and Sepulchre, R.}, % Journal = {SIAM Journal on Optimization}, % Year = {2010}, % Number = {5}, % Pages = {2327--2351}, % Volume = {20}, % Doi = {10.1137/080731359} % } % % % See also: obliquefactory symfixedrankYYfactory spectrahedronfactory % This file is part of Manopt: www.manopt.org. % Original author: Bamdev Mishra, July 12, 2013. % Contributors: % Change log: % July 18, 2013 (NB): % Fixed projection operator for rank-deficient Y'Y. % % Aug. 8, 2013 (NB): % No longer using nested functions, to aim at Octave compatibility. % Sign error in right hand side of the call to minres corrected. % % June 24, 2014 (NB): % Used code snippets from obliquefactory to speed up projection, % retraction, egrad2rgrad and rand: the code now uses bsxfun for this. % % April 3, 2015 (NB): % Replaced trace(A'*B) by A(:)'*B(:) : equivalent but faster. % TODO: modify normalize_rows and project_rows to work without transposes. % TODO: enhance ehess2rhess to also use bsxfun. if ~exist('lyap', 'file') warning('manopt:elliptopefactory:slowlyap', ... ['The function lyap to solve Lyapunov equations seems not to ' ... 'be available. This may slow down optimization over this ' ... 'manifold significantly. lyap is part of the control system ' ... 'toolbox.']); end if k < 2 warning('manopt:elliptopefactory:lowk', ... 'k should be an integer >= 2. At k = 1, the set is discrete.'); end M.name = @() sprintf('YY'' quotient manifold of %dx%d psd matrices of rank %d with diagonal elements being 1', n, k); M.dim = @() n*(k-1) - k*(k-1)/2; % Extra -1 is because of the diagonal constraint that % Euclidean metric on the total space M.inner = @(Y, eta, zeta) eta(:)'*zeta(:); M.norm = @(Y, eta) sqrt(M.inner(Y, eta, eta)); M.dist = @(Y, Z) error('elliptopefactory.dist not implemented yet.'); M.typicaldist = @() 10*k; M.proj = @projection; M.tangent = M.proj; M.tangent2ambient = @(Y, eta) eta; M.retr = @retraction; M.egrad2rgrad = @egrad2rgrad; M.ehess2rhess = @ehess2rhess; M.exp = @exponential; % Notice that the hash of two equivalent points will be different... M.hash = @(Y) ['z' hashmd5(Y(:))]; M.rand = @() random(n, k); M.randvec = @randomvec; M.lincomb = @matrixlincomb; M.zerovec = @(Y) zeros(n, k); M.transp = @(Y1, Y2, d) projection(Y2, d); M.vec = @(Y, u_mat) u_mat(:); M.mat = @(Y, u_vec) reshape(u_vec, [n, k]); M.vecmatareisometries = @() true; end % Given a matrix X, returns the same matrix but with each column scaled so % that they have unit 2-norm. % See obliquefactory. function X = normalize_rows(X) X = X'; norms = sqrt(sum(X.^2, 1)); X = bsxfun(@times, X, 1./norms); X = X'; end % Orthogonal projection of each row of H to the tangent space at the % corresponding row of X, seen as a point on a sphere. % See obliquefactory. function PXH = project_rows(X, H) X = X'; H = H'; % Compute the inner product between each vector H(:, i) with its root % point X(:, i), that is, X(:, i).' * H(:, i). Returns a row vector. inners = sum(X.*H, 1); % Subtract from H the components of the H(:, i)'s that are parallel to % the root points X(:, i). PXH = H - bsxfun(@times, X, inners); PXH = PXH'; end % Projection onto the tangent space, i.e., on the tangent space of % ||Y(i, :)|| = 1 function etaproj = projection(Y, eta) [unused, k] = size(Y); %#ok eta = project_rows(Y, eta); % Projection onto the horizontal space YtY = Y'*Y; SS = YtY; AS = Y'*eta - eta'*Y; try % This is supposed to work and indeed return a skew-symmetric % solution Omega. Omega = lyap(SS, -AS); catch up %#ok % It can happen though that SS will be rank deficient. The % Lyapunov equation we solve still has a unique skew-symmetric % solution, but solutions with a symmetric part now also exist, % and the lyap function doesn't like that. So we want to % extract the minimum norm solution. This is also useful if lyap is % not available (it is part of the control system toolbox). mat = @(x) reshape(x, [k k]); vec = @(X) X(:); is_octave = exist('OCTAVE_VERSION', 'builtin'); if ~is_octave [vecomega, unused] = minres(@(x) vec(SS*mat(x) + mat(x)*SS), vec(AS)); %#ok else [vecomega, unused] = gmres(@(x) vec(SS*mat(x) + mat(x)*SS), vec(AS)); %#ok end Omega = mat(vecomega); end % % Make sure the result is skew-symmetric (does not seem necessary). % Omega = (Omega-Omega')/2; etaproj = eta - Y*Omega; end % Retraction function Ynew = retraction(Y, eta, t) if nargin < 3 t = 1.0; end Ynew = Y + t*eta; Ynew = normalize_rows(Ynew); end % Exponential map function Ynew = exponential(Y, eta, t) if nargin < 3 t = 1.0; end Ynew = retraction(Y, eta, t); warning('manopt:elliptopefactory:exp', ... ['Exponential for fixed rank spectrahedron ' ... 'manifold not implemented yet. Used retraction instead.\n' ... 'To disable this warning: warning(''off'', ''manopt:elliptopefactory:exp'')']); end % Euclidean gradient to Riemannian gradient conversion. % We only need the ambient space projection: the remainder of the % projection function is not necessary because the Euclidean gradient must % already be orthogonal to the vertical space. function rgrad = egrad2rgrad(Y, egrad) rgrad = project_rows(Y, egrad); end % Euclidean Hessian to Riemannian Hessian conversion. % TODO: speed this function up using bsxfun. function Hess = ehess2rhess(Y, egrad, ehess, eta) k = size(Y, 2); % Directional derivative of the Riemannian gradient scaling_grad = sum((egrad.*Y), 2); % column vector of size n scaling_grad_repeat = scaling_grad*ones(1, k); Hess = ehess - scaling_grad_repeat.*eta; scaling_hess = sum((eta.*egrad) + (Y.*ehess), 2); scaling_hess_repeat = scaling_hess*ones(1, k); % directional derivative of scaling_grad_repeat Hess = Hess - scaling_hess_repeat.*Y; % Project on the horizontal space Hess = projection(Y, Hess); end % Random point generation on the manifold function Y = random(n, k) Y = randn(n, k); Y = normalize_rows(Y); end % Random vector generation at Y function eta = randomvec(Y) eta = randn(size(Y)); eta = projection(Y, eta); nrm = norm(eta, 'fro'); eta = eta / nrm; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/symfixedrank/spectrahedronfactory.m ================================================ function M = spectrahedronfactory(n, k) % Manifold of n-by-n symmetric positive semidefinite matrices of rank k % with trace (sum of diagonal elements) equal to 1. % % function M = spectrahedronfactory(n, k) % % A point X on the manifold is parameterized as YY^T where Y is a matrix of % size nxk. As such, X is symmetric, positive semidefinite. We restrict to % full-rank Y's, such that X has rank exactly k. The point X is numerically % represented by Y (this is more efficient than working with X, which may % be big). Tangent vectors are represented as matrices of the same size as % Y, call them Ydot, so that Xdot = Y Ydot' + Ydot Y and trace(Xdot) == 0. % The metric is the canonical Euclidean metric on Y. % % The trace constraint on X (trace(X) == 1) translates to a unit Frobenius % norm constraint on Y: trace(X) = norm(Y, 'fro')^2 == 1. The set of such % Y's forms the unit sphere in R^(nxk): see spherefactory. But because for % any orthogonal Q of size k, it holds that (YQ)(YQ)' = YY', we "group" all % matrices of the form YQ in an equivalence class. The set of equivalence % classes is a Riemannian quotient manifold, implemented here. % % % Note that this geometry formally breaks down at rank-deficient Y's. % As an alternative, you may use the sphere manifold (it has larger % dimension (by 1), but does not break down at rank drop.) % % The geometry is taken from the 2010 paper: % M. Journee, P.-A. Absil, F. Bach and R. Sepulchre, % "Low-Rank Optimization on the Cone of Positive Semidefinite Matrices". % Paper link: http://www.di.ens.fr/~fbach/journee2010_sdp.pdf % % % Please cite the Manopt paper as well as the research paper: % @Article{journee2010low, % Title = {Low-rank optimization on the cone of positive semidefinite matrices}, % Author = {Journ{\'e}e, M. and Bach, F. and Absil, P.-A. and Sepulchre, R.}, % Journal = {SIAM Journal on Optimization}, % Year = {2010}, % Number = {5}, % Pages = {2327--2351}, % Volume = {20}, % Doi = {10.1137/080731359} % } % % % See also: spherefactory elliptopefactory symfixedrankYYfactory % This file is part of Manopt: www.manopt.org. % Original author: Bamdev Mishra, July 11, 2013. % Contributors: Nicolas Boumal % Change log: % % April 2, 2015 (NB): % Replaced trace(A'*B) by A(:)'*B(:) (equivalent but faster). % Updated documentation. M.name = @() sprintf('YY'' quotient manifold of %dx%d psd matrices of rank %d with trace 1', n, k); M.dim = @() n*k - 1 - k*(k-1)/2; % Euclidean metric on the total space M.inner = @(Y, eta, zeta) eta(:)'*zeta(:); M.norm = @(Y, eta) sqrt(M.inner(Y, eta, eta)); M.dist = @(Y, Z) error('spectrahedronfactory.dist not implemented yet.'); M.typicaldist = @() 10*k; M.proj = @projection; function etaproj = projection(Y, eta) % Projection onto the tangent space, i.e., on the tangent space of % ||Y|| = 1 eta = eta - (eta(:)'*Y(:))*Y; % Projection onto the horizontal space YtY = Y'*Y; SS = YtY; AS = Y'*eta - eta'*Y; Omega = lyap(SS, -AS); etaproj = eta - Y*Omega; end M.tangent = M.proj; M.tangent2ambient = @(Y, eta) eta; M.retr = @retraction; function Ynew = retraction(Y, eta, t) if nargin < 3 t = 1.0; end Ynew = Y + t*eta; Ynew = Ynew/norm(Ynew, 'fro'); end M.egrad2rgrad = @(Y, eta) eta - (eta(:)'*Y(:))*Y; M.ehess2rhess = @ehess2rhess; function Hess = ehess2rhess(Y, egrad, ehess, eta) % Directional derivative of the Riemannian gradient Hess = ehess - (egrad(:)'*Y(:))*eta - ( (ehess(:)'*Y(:)) + (eta(:)'*egrad(:)) )*Y; Hess = Hess - (Hess(:)'*Y(:))*Y; % Project on the horizontal space Hess = M.proj(Y, Hess); end M.exp = @exponential; function Ynew = exponential(Y, eta, t) if nargin < 3 t = 1.0; end Ynew = retraction(Y, eta, t); warning('manopt:spectrahedronfactory:exp', ... ['Exponential for fixed rank spectrahedron ' ... 'manifold not implenented yet. Used retraction instead.']); end % Notice that the hash of two equivalent points will be different... M.hash = @(Y) ['z' hashmd5(Y(:))]; M.rand = @random; function Y = random() Y = randn(n, k); Y = Y/norm(Y,'fro'); end M.randvec = @randomvec; function eta = randomvec(Y) eta = randn(n, k); eta = projection(Y, eta); nrm = M.norm(Y, eta); eta = eta / nrm; end M.lincomb = @matrixlincomb; M.zerovec = @(Y) zeros(n, k); M.transp = @(Y1, Y2, d) projection(Y2, d); M.vec = @(Y, u_mat) u_mat(:); M.mat = @(Y, u_vec) reshape(u_vec, [n, k]); M.vecmatareisometries = @() true; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/symfixedrank/symfixedrankYYcomplexfactory.m ================================================ function M = symfixedrankYYcomplexfactory(n, k) % Manifold of n x n complex Hermitian pos. semidefinite matrices of rank k. % % function M = symfixedrankYYcomplexfactory(n, k) % % Manifold of n-by-n complex Hermitian positive semidefinite matrices of % fixed rank k. This follows the quotient geometry described % in Sarod Yatawatta's 2013 paper: % "Radio interferometric calibration using a Riemannian manifold", ICASSP. % % Paper link: http://dx.doi.org/10.1109/ICASSP.2013.6638382. % % A point X on the manifold M is parameterized as YY^*, where % Y is a complex matrix of size nxk. For any point Y on the manifold M, % given any kxk complex unitary matrix U, we say Y*U is equivalent to Y, % i.e., YY^* does not change. Therefore, M is the set of equivalence % classes and is a Riemannian quotient manifold C^{nk}/SU(k). % The metric is the usual real-trace inner product, that is, % it is the usual metric for the complex plane identified with R^2. % % Notice that this manifold is not complete: if optimization leads Y to be % rank-deficient, the geometry will break down. Hence, this geometry should % only be used if it is expected that the points of interest will have rank % exactly k. Reduce k if that is not the case. % % The geometry is based on the following papers (and references therein). % Please cite the Manopt paper as well as the research papers: % % @INPROCEEDINGS{Yatawatta2013A, % author={Yatawatta, S.}, % booktitle={Acoustics, Speech and Signal Processing (ICASSP), 2013 IEEE International Conference on}, % title={Radio interferometric calibration using a {R}iemannian manifold}, % year={2013}, % month={May}, % pages={3866--3870}, % doi={10.1109/ICASSP.2013.6638382}, % ISSN={1520-6149}, % } % % @article{Yatawatta2013B, % author = {Yatawatta, S.}, % title = {On the interpolation of calibration solutions obtained in radio interferometry}, % volume = {428}, % number = {1}, % pages = {828--833}, % year = {2013}, % doi = {10.1093/mnras/sts069}, % journal = {Monthly Notices of the Royal Astronomical Society} % } % % See also: symfixedrankYYfactory sympositivedefinitefactory % This file is part of Manopt: www.manopt.org. % Original author: Sarod Yatawatta, June 29, 2015. % Contributors: Bamdev Mishra. % Change log: % % June 28, 2016 (NB): % Metric scaled down by factor 2 to match the metric used in % euclideancomplexfactory. M.name = @() sprintf('YY'' quotient manifold of Hermitian %dx%d complex matrices of rank %d.', n, n, k); M.dim = @() 2*k*n - k*k; % SY: dim of ambient space (2*k*n) - dim of kxk unitary matrix (k^2). % Euclidean metric on the total space. % BM: equivalent to real(trace(eta'*zeta)), but more efficient. M.inner = @(Y, eta, zeta) real(eta(:)'*zeta(:)); M.norm = @(Y, eta) sqrt(M.inner(Y, eta, eta)); % Find unitary U to minimize ||Y - Z*U||, % i.e., the Procrustes problem, with svd(Y'*Z). M.dist = @(Y, Z) distance; function distval = distance(Y, Z) [u, ignore, v] = svd(Z'*Y); %#ok E = Y - Z*u*v'; % SY: checked. distval = real(E(:)'*E(:)); end M.typicaldist = @() 10*k; % BM: To do. M.proj = @projection; function etaproj = projection(Y, eta) % Projection onto the horizontal space xx = Y'*Y; rr = Y'*eta - eta'*Y; Omega = lyap(xx, -rr); etaproj = eta - Y*Omega; end M.tangent = M.proj; M.tangent2ambient = @(Y, eta) eta; M.retr = @retraction; function Ynew = retraction(Y, eta, t) if nargin < 3 t = 1.0; end Ynew = Y + t*eta; end M.egrad2rgrad = @(Y, eta) eta; M.ehess2rhess = @(Y, egrad, ehess, U) M.proj(Y, ehess); M.exp = @exponential; function Ynew = exponential(Y, eta, t) if nargin < 3 t = 1.0; end Ynew = retraction(Y, eta, t); warning('manopt:symfixedrankYYcomplexfactory:exp', ... ['Exponential for symmetric fixed-rank complex ' ... 'manifold not implemented yet. Used retraction instead.']); end % Notice that the hash of two equivalent points will be different... M.hash = @(Y) ['z' hashmd5([real(Y(:)); imag(Y(:))])]; M.rand = @random; function Y = random() Y = randn(n, k) + 1i*randn(n,k); end M.randvec = @randomvec; function eta = randomvec(Y) eta = randn(n, k) + 1i*randn(n,k); eta = projection(Y, eta); nrm = M.norm(Y, eta); eta = eta / nrm; end M.lincomb = @matrixlincomb; M.zerovec = @(Y) zeros(n, k); M.transp = @(Y1, Y2, d) projection(Y2, d); M.vec = @(Y, u_mat) [real(u_mat(:)); imag(u_mat(:))]; M.mat = @(Y, u_vec) reshape(u_vec(1 : n*k), [n, k]) + 1i*reshape(u_vec(n*k + 1: end), [n, k]); M.vecmatareisometries = @() true; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/symfixedrank/symfixedrankYYfactory.m ================================================ function M = symfixedrankYYfactory(n, k) % Manifold of n-by-n symmetric positive semidefinite matrices of rank k. % % function M = symfixedrankYYfactory(n, k) % % A point X on the manifold is parameterized as YY^T where Y is a matrix of % size nxk. As such, X is symmetric, positive semidefinite. We restrict to % full-rank Y's, such that X has rank exactly k. The point X is numerically % represented by Y (this is more efficient than working with X, which may % be big). Tangent vectors are represented as matrices of the same size as % Y, call them Ydot, so that Xdot = Y Ydot' + Ydot Y. The metric is the % canonical Euclidean metric on Y. % % Since for any orthogonal Q of size k, it holds that (YQ)(YQ)' = YY', % we "group" all matrices of the form YQ in an equivalence class. The set % of equivalence classes is a Riemannian quotient manifold, implemented % here. % % Notice that this manifold is not complete: if optimization leads Y to be % rank-deficient, the geometry will break down. Hence, this geometry should % only be used if it is expected that the points of interest will have rank % exactly k. Reduce k if that is not the case. % % An alternative, complete, geometry for positive semidefinite matrices of % rank k is described in Bonnabel and Sepulchre 2009, "Riemannian Metric % and Geometric Mean for Positive Semidefinite Matrices of Fixed Rank", % SIAM Journal on Matrix Analysis and Applications. % % % The geometry here implemented is the simplest case of the 2010 paper: % M. Journee, P.-A. Absil, F. Bach and R. Sepulchre, % "Low-Rank Optimization on the Cone of Positive Semidefinite Matrices". % Paper link: http://www.di.ens.fr/~fbach/journee2010_sdp.pdf % % % Please cite the Manopt paper as well as the research paper: % @Article{journee2010low, % Title = {Low-rank optimization on the cone of positive semidefinite matrices}, % Author = {Journ{\'e}e, M. and Bach, F. and Absil, P.-A. and Sepulchre, R.}, % Journal = {SIAM Journal on Optimization}, % Year = {2010}, % Number = {5}, % Pages = {2327--2351}, % Volume = {20}, % Doi = {10.1137/080731359} % } % % See also: elliptopefactory spectrahedronfactory % This file is part of Manopt: www.manopt.org. % Original author: Bamdev Mishra, Dec. 30, 2012. % Contributors: % Change log: % % July 10, 2013 (NB): % Added vec, mat, tangent, tangent2ambient ; % Correction for the dimension of the manifold. % % April 2, 2015 (NB): % Replaced trace(A'*B) by A(:)'*B(:) (equivalent but faster). M.name = @() sprintf('YY'' quotient manifold of %dx%d psd matrices of rank %d', n, k); M.dim = @() k*n - k*(k-1)/2; % Euclidean metric on the total space M.inner = @(Y, eta, zeta) eta(:)'*zeta(:); M.norm = @(Y, eta) sqrt(M.inner(Y, eta, eta)); M.dist = @(Y, Z) error('symfixedrankYYfactory.dist not implemented yet.'); M.typicaldist = @() 10*k; M.proj = @projection; function etaproj = projection(Y, eta) % Projection onto the horizontal space YtY = Y'*Y; SS = YtY; AS = Y'*eta - eta'*Y; Omega = lyap(SS, -AS); etaproj = eta - Y*Omega; end M.tangent = M.proj; M.tangent2ambient = @(Y, eta) eta; M.retr = @retraction; function Ynew = retraction(Y, eta, t) if nargin < 3 t = 1.0; end Ynew = Y + t*eta; end M.egrad2rgrad = @(Y, eta) eta; M.ehess2rhess = @(Y, egrad, ehess, U) M.proj(Y, ehess); M.exp = @exponential; function Ynew = exponential(Y, eta, t) if nargin < 3 t = 1.0; end Ynew = retraction(Y, eta, t); warning('manopt:symfixedrankYYfactory:exp', ... ['Exponential for symmetric, fixed-rank ' ... 'manifold not implemented yet. Used retraction instead.']); end % Notice that the hash of two equivalent points will be different... M.hash = @(Y) ['z' hashmd5(Y(:))]; M.rand = @random; function Y = random() Y = randn(n, k); end M.randvec = @randomvec; function eta = randomvec(Y) eta = randn(n, k); eta = projection(Y, eta); nrm = M.norm(Y, eta); eta = eta / nrm; end M.lincomb = @matrixlincomb; M.zerovec = @(Y) zeros(n, k); M.transp = @(Y1, Y2, d) projection(Y2, d); M.vec = @(Y, u_mat) u_mat(:); M.mat = @(Y, u_vec) reshape(u_vec, [n, k]); M.vecmatareisometries = @() true; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/symfixedrank/sympositivedefinitefactory.m ================================================ function M = sympositivedefinitefactory(n) % Manifold of n-by-n symmetric positive definite matrices with % the bi-invariant geometry. % % function M = sympositivedefinitefactory(n) % % A point X on the manifold is represented as a symmetric positive definite % matrix X (nxn). Tangent vectors are symmetric matrices of the same size % (but not necessarily definite). % % The Riemannian metric is the bi-invariant metric, described notably in % Chapter 6 of the 2007 book "Positive definite matrices" % by Rajendra Bhatia, Princeton University Press. % % % The retraction / exponential map involves expm (the matrix exponential). % If too large a vector is retracted / exponentiated (e.g., a solver tries % to make too big a step), this may result in NaN's in the returned point, % which most likely would lead to NaN's in the cost / gradient / ... and % will result in failure of the optimization. For trustregions, this can be % controlled by setting options.Delta0 and options.Delta_bar, to prevent % too large steps. % % % Note also that many of the functions involve solving linear systems in X % (a point on the manifold), taking matrix exponentals and logarithms, etc. % It could therefore be beneficial to do some precomputation on X (an % eigenvalue decomposition for example) and store both X and the % preprocessing in a structure. This would require modifying the present % factory to work with such structures to represent both points and tangent % vectors. We omit this in favor of simplicity, but it may be good to keep % this in mind if efficiency becomes an issue in your application. % This file is part of Manopt: www.manopt.org. % Original author: Bamdev Mishra, August 29, 2013. % Contributors: Nicolas Boumal % Change log: % % March 5, 2014 (NB) % There were a number of mistakes in the code owing to the tacit % assumption that if X and eta are symmetric, then X\eta is % symmetric too, which is not the case. See discussion on the Manopt % forum started on Jan. 19, 2014. Functions norm, dist, exp and log % were modified accordingly. Furthermore, they only require matrix % inversion (as well as matrix log or matrix exp), not matrix square % roots or their inverse. % % July 28, 2014 (NB) % The dim() function returned n*(n-1)/2 instead of n*(n+1)/2. % Implemented proper parallel transport from Sra and Hosseini (not % used by default). % Also added symmetrization in exp and log (to be sure). % % April 3, 2015 (NB): % Replaced trace(A*B) by a faster equivalent that does not compute % the whole product A*B, for inner product, norm and distance. % % May 23, 2017 (NB): % As seen in a talk of Wen Huang at the SIAM Optimization Conference % today, replaced the retraction of this factory (which was simply % equal to the exponential map) with a simpler, second-order % retraction. That this retraction is second order can be verified % numerically with checkretraction(sympositivedefinitefactory(5)); % Notice that, for this retraction, it would be cheap to evaluate for % many values of t, that is, it is cheap to retract many points along % the same tangent direction. This could in principle be exploited to % speed up line-searches. symm = @(X) .5*(X+X'); M.name = @() sprintf('Symmetric positive definite geometry of %dx%d matrices', n, n); M.dim = @() n*(n+1)/2; % Helpers to avoid computing full matrices simply to extract their trace vec = @(A) A(:); trinner = @(A, B) vec(A')'*vec(B); % = trace(A*B) trnorm = @(A) sqrt(trinner(A, A)); % = sqrt(trace(A^2)) % Choice of the metric on the orthonormal space is motivated by the % symmetry present in the space. The metric on the positive definite % cone is its natural bi-invariant metric. % The result is equal to: trace( (X\eta) * (X\zeta) ) M.inner = @(X, eta, zeta) trinner(X\eta, X\zeta); % Notice that X\eta is *not* symmetric in general. % The result is equal to: sqrt(trace((X\eta)^2)) % There should be no need to take the real part, but rounding errors % may cause a small imaginary part to appear, so we discard it. M.norm = @(X, eta) real(trnorm(X\eta)); % Same here: X\Y is not symmetric in general. % Same remark about taking the real part. M.dist = @(X, Y) real(trnorm(real(logm(X\Y)))); M.typicaldist = @() sqrt(n*(n+1)/2); M.egrad2rgrad = @egrad2rgrad; function eta = egrad2rgrad(X, eta) eta = X*symm(eta)*X; end M.ehess2rhess = @ehess2rhess; function Hess = ehess2rhess(X, egrad, ehess, eta) % Directional derivatives of the Riemannian gradient Hess = X*symm(ehess)*X + 2*symm(eta*symm(egrad)*X); % Correction factor for the non-constant metric Hess = Hess - symm(eta*symm(egrad)*X); end M.proj = @(X, eta) symm(eta); M.tangent = M.proj; M.tangent2ambient = @(X, eta) eta; M.retr = @retraction; function Y = retraction(X, eta, t) if nargin < 3 teta = eta; else teta = t*eta; end % The symm() call is mathematically unnecessary but numerically % necessary. Y = symm(X + teta + .5*teta*(X\teta)); end M.exp = @exponential; function Y = exponential(X, eta, t) if nargin < 3 t = 1.0; end % The symm() and real() calls are mathematically not necessary but % are numerically necessary. Y = symm(X*real(expm(X\(t*eta)))); end M.log = @logarithm; function H = logarithm(X, Y) % Same remark regarding the calls to symm() and real(). H = symm(X*real(logm(X\Y))); end M.hash = @(X) ['z' hashmd5(X(:))]; % Generate a random symmetric positive definite matrix following a % certain distribution. The particular choice of a distribution is of % course arbitrary, and specific applications might require different % ones. M.rand = @random; function X = random() D = diag(1+rand(n, 1)); [Q, R] = qr(randn(n)); %#ok X = Q*D*Q'; end % Generate a uniformly random unit-norm tangent vector at X. M.randvec = @randomvec; function eta = randomvec(X) eta = symm(randn(n)); nrm = M.norm(X, eta); eta = eta / nrm; end M.lincomb = @matrixlincomb; M.zerovec = @(X) zeros(n); % Poor man's vector transport: exploit the fact that all tangent spaces % are the set of symmetric matrices, so that the identity is a sort of % vector transport. It may perform poorly if the origin and target (X1 % and X2) are far apart though. This should not be the case for typical % optimization algorithms, which perform small steps. M.transp = @(X1, X2, eta) eta; % For reference, a proper vector transport is given here, following % work by Sra and Hosseini: "Conic geometric optimisation on the % manifold of positive definite matrices", to appear in SIAM J. Optim. % in 2015; also available here: http://arxiv.org/abs/1312.1039 % This will not be used by default. To force the use of this transport, % execute "M.transp = M.paralleltransp;" on your M returned by the % present factory. M.paralleltransp = @parallel_transport; function zeta = parallel_transport(X, Y, eta) E = sqrtm((Y/X)); zeta = E*eta*E'; end % vec and mat are not isometries, because of the unusual inner metric. M.vec = @(X, U) U(:); M.mat = @(X, u) reshape(u, n, n); M.vecmatareisometries = @() false; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/readme ================================================ test ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/barzilaiborwein/barzilaiborwein.m ================================================ function [x, cost, info, options] = barzilaiborwein(problem, x, options) % Riemannian Barzilai-Borwein solver with non-monotone line-search. % % function [x, cost, info, options] = barzilaiborwein(problem) % function [x, cost, info, options] = barzilaiborwein(problem, x0) % function [x, cost, info, options] = barzilaiborwein(problem, x0, options) % function [x, cost, info, options] = barzilaiborwein(problem, [], options) % % Apply the Barzilai-Borwein minimization algorithm to the problem defined % in the problem structure, starting at x0 if it is provided (otherwise, at % a random point on the manifold). To specify options whilst not specifying % an initial guess, give x0 as [] (the empty matrix). % % The algorithm uses its own special non-monotone line-search strategy. % Therefore, no lin-search algorithm should be specified in the problem % structure or in the options structure. % % In most of the examples bundled with the toolbox (see link below), the % solver can be replaced by the present one if need be. % % The outputs x and cost are the last reached point on the manifold and its % cost. This is not necessarily the best point generated since the method % is not monotone. The struct-array info contains information about the % iterations: % iter : the iteration number (0 for the initial guess) % cost : cost value % time : elapsed time in seconds % gradnorm : Riemannian norm of the gradient % stepsize : norm of the last tangent vector retracted % linesearch : information logged by the line-search algorithm % And possibly additional information logged by options.statsfun. % For example, type [info.gradnorm] to obtain a vector of the successive % gradient norms reached. % % The options structure is used to overwrite the default values. All % options have a default value and are hence optional. To force an option % value, pass an options structure with a field options.optionname, where % optionname is one of the following and the default value is indicated % between parentheses: % % tolgradnorm (1e-6) % The algorithm terminates if the norm of the gradient drops below this. % maxiter (1000) % The algorithm terminates if maxiter iterations have been executed. % maxtime (Inf) % The algorithm terminates if maxtime seconds elapsed. % minstepsize (1e-10) % The algorithm terminates if the linesearch returns a displacement % vector (to be retracted) smaller in norm than this value. % linesearch (@linesearch_hint) % This option should not be changed, as the present solver has its % own dedicated line-search strategy. % strategy ('direct') % The strategy used for the Barzilai-Borwein stepsize % 'direct', compute the direct step / % 'inverse', compute the inverse step / % 'alternate', alternates between direct and inverse step % lambdamax (1e3) % The maximum stepsize allowed by the Barzilai-Borwein method % lambdamin (1e-3) % The minimum stepsize allowed by the Barzilai-Borwein method % lambda0 (1/10) % The initial stepsize of the Barzilai-Borwein method % ls_nmsteps (10) % The non-monotone line-search checks a sufficient decrease with respect % to the previous ls_nmsteps objective function values. % statsfun (none) % Function handle to a function that will be called after each % iteration to provide the opportunity to log additional statistics. % They will be returned in the info struct. See the generic Manopt % documentation about solvers for further information. % stopfun (none) % Function handle to a function that will be called at each iteration % to provide the opportunity to specify additional stopping criteria. % See the generic Manopt documentation about solvers for further % information. % verbosity (3) % Integer number used to tune the amount of output the algorithm % generates during execution (mostly as text in the command window). % The higher, the more output. 0 means silent. % storedepth (2) % Maximum number of different points x of the manifold for which a % store structure will be kept in memory in the storedb. If the % caching features of Manopt are not used, this is irrelevant. For % this algorithm, a store depth of 2 should always be sufficient. % % % The implementation of the Barzilai-Borwein method is based on the paper: % % B. Iannazzo, M. Porcelli, "The Riemannian Barzilai-Borwein method with % nonmonotone line-search and the matrix geometric mean computation", % IMA Journal of Numerical Analysis, to appear, https://doi.org/10.1093/imanum/drx015. % % See also: steepestdescent conjugategradient trustregions % This file is part of Manopt: www.manopt.org. % Original author: Margherita Porcelli, May 31, 2017 % Contributors: Nicolas Boumal, Bruno Iannazzo % Change log: % Verify that the problem description is sufficient for the solver. if ~canGetCost(problem) warning('manopt:getCost', ... 'No cost provided. The algorithm will likely abort.'); end if ~canGetGradient(problem) && ~canGetApproxGradient(problem) % Note: we do not give a warning if an approximate gradient is % explicitly given in the problem description, as in that case the % user seems to be aware of the issue. warning('manopt:getGradient:approx', ... ['No gradient provided. Using an FD approximation instead (slow).\n' ... 'It may be necessary to increase options.tolgradnorm.\n' ... 'To disable this warning: warning(''off'', ''manopt:getGradient:approx'')']); problem.approxgrad = approxgradientFD(problem); end % Ensure options exists as a structure if ~exist('options', 'var') || isempty(options) options = struct(); end % Set local defaults here localdefaults.minstepsize = 1e-10; localdefaults.maxiter = 1000; localdefaults.tolgradnorm = 1e-6; % Upper and lower bound for the Barzilai-Borwein stepsize localdefaults.lambdamax = 1e3; localdefaults.lambdamin = 1e-3; % Initial Barzilai-Borwein stepsize localdefaults.lambda0 = 1/10; % Barzilai-Borwein strategy (direct, inverse or alternate) localdefaults.strategy = 'direct'; % Line-search parameters % 1) Make sure the user didn't try to define a line search if canGetLinesearch(problem) || isfield(options, 'linesearch') error('manopt:BB:ls', ... ['The problem structure may not specify a line-search ' ... 'hint for the BB solver,\nand the options structure ' ... 'may not specify a line-search algorithm for BB.']); end % 2) Define the line-search parameters problem.linesearch = @(x, d, storedb, key) 1; options.linesearch = @linesearch_hint; % The Armijo sufficient decrease parameter localdefaults.ls_suff_decr = 1e-4; % The previous steps checked in the non-monotone line-search strategy localdefaults.ls_nmsteps = 10; % Merge global and local defaults, then merge w/ user options, if any. localdefaults = mergeOptions(getGlobalDefaults(), localdefaults); options = mergeOptions(localdefaults, options); % Shorthands for some parameters strategy = options.strategy; lambdamax = options.lambdamax; lambdamin = options.lambdamin; lambda0 = options.lambda0; timetic = tic(); % If no initial point x is given by the user, generate one at random. if ~exist('x', 'var') || isempty(x) x = problem.M.rand(); end % Create a store database and get a key for the current x storedb = StoreDB(options.storedepth); key = storedb.getNewKey(); % Compute objective-related quantities for x [cost, grad] = getCostGrad(problem, x, storedb, key); gradnorm = problem.M.norm(x, grad); % Some variables below need to store information about iterations. We % preallocate for a reasonable amount of intended iterations to avoid % memory re-allocations. mem_init_size = min(10000, options.maxiter+1); % Store the cost value f = zeros(mem_init_size, 1); f(1) = cost; % Iteration counter (at any point, iter is the number of fully executed % iterations so far) iter = 0; % Save stats in a struct array info, and preallocate. stats = savestats(); info(1) = stats; info(mem_init_size).iter = []; if options.verbosity >= 2 fprintf(' iter\t cost val\t grad. norm\n'); end % Set the initial Barzilai-Borwein stepsize lambda = lambda0; % Start iterating until stopping criterion triggers while true % Display iteration information if options.verbosity >= 2 fprintf('%5d\t%+.16e\t%.8e\n', iter, cost, gradnorm); end % Start timing this iteration timetic = tic(); % Run standard stopping criterion checks [stop, reason] = stoppingcriterion(problem, x, options, ... info, iter+1); % If none triggered, run specific stopping criterion check if ~stop && stats.stepsize < options.minstepsize stop = true; reason = sprintf(['Last stepsize smaller than minimum ' ... 'allowed; options.minstepsize = %g.'], ... options.minstepsize); end if stop if options.verbosity >= 1 fprintf([reason '\n']); end break; end % Pick the descent direction as minus the gradient (scaled) desc_dir = problem.M.lincomb(x, -lambda, grad); % Execute the nonmonotone line search k = iter + 1; start = max(1, k - options.ls_nmsteps + 1); [stepsize, newx, newkey, lsstats] = ... options.linesearch(problem, x, desc_dir, max(f(start:k)), ... -lambda * gradnorm^2, options, storedb, key); % Updates the value of lambda lambda = lambda * lsstats.alpha; % Compute the new cost-related quantities for newx [newcost, newgrad] = getCostGrad(problem, newx, storedb, newkey); newgradnorm = problem.M.norm(newx, newgrad); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % BARZILAI-BORWEIN STRATEGY % Store the cost value f(iter+2) = newcost; % Transport the old gradient to newx grad_transp = problem.M.transp(x, newx, grad); % Compute the difference between grandients Y = problem.M.lincomb(newx, 1, newgrad, -1, grad_transp); % Compute the transported step Stransp = problem.M.lincomb(x, -lambda, grad_transp); % Compute the new Barzilai-Borwein step following the strategy % direct strategy if strcmp(strategy, 'direct') num = problem.M.norm(newx, Stransp)^2; den = problem.M.inner(newx, Stransp, Y); if den > 0 lambda = min( lambdamax, max(lambdamin, num/den) ); else lambda = lambdamax; end end % inverse strategy if strcmp(strategy, 'inverse') num = problem.M.inner(newx, Stransp, Y); den = problem.M.norm(newx, Y)^2; if num > 0 lambda = min( lambdamax, max(lambdamin, num/den) ); else lambda = lambdamax; end end % alternate strategy if strcmp(strategy, 'alternate') num = problem.M.norm(newx, Stransp)^2; den = problem.M.inner(newx, Stransp, Y); den2 = problem.M.norm(newx, Y)^2; if (den > 0) if mod(iter,2)==0 lambda = min( lambdamax, max(lambdamin, num/den) ); else lambda = min( lambdamax, max(lambdamin, den/den2) ); end else lambda = lambdamax; end end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Make sure we don't use too much memory for the store database storedb.purge(); % Update iterate info x = newx; key = newkey; cost = newcost; grad = newgrad; gradnorm = newgradnorm; % iter is the number of iterations we have accomplished. iter = iter + 1; % Log statistics for freshly executed iteration stats = savestats(); info(iter+1) = stats; end info = info(1:iter+1); if options.verbosity >= 1 fprintf('Total time is %f [s] (excludes statsfun)\n', ... info(end).time); end % Routine in charge of collecting the current iteration stats function stats = savestats() stats.iter = iter; stats.cost = cost; stats.gradnorm = gradnorm; if iter == 0 stats.stepsize = NaN; stats.time = toc(timetic); stats.linesearch = []; else stats.stepsize = stepsize; stats.time = info(iter).time + toc(timetic); stats.linesearch = lsstats; end stats = applyStatsfun(problem, x, storedb, key, options, stats); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/bfgs/rlbfgs.m ================================================ function [x, cost, info, options] = rlbfgs(problem, x0, options) % Riemannian limited memory BFGS solver for smooth objective functions. % % function [x, cost, info, options] = rlbfgs(problem) % function [x, cost, info, options] = rlbfgs(problem, x0) % function [x, cost, info, options] = rlbfgs(problem, x0, options) % function [x, cost, info, options] = rlbfgs(problem, [], options) % % % This is a Riemannian limited memory BFGS solver (quasi-Newton method), % which aims to minimize the cost function in the given problem structure. % It requires access to the gradient of the cost function. % % Parameter options.memory can be used to specify the number of iterations % the algorithm remembers and uses to approximate the inverse Hessian of % the cost. Default value is 30. % For unlimited memory, set options.memory = Inf. % % % For a description of the algorithm and theorems offering convergence % guarantees, see the references below. % % The initial iterate is x0 if it is provided. Otherwise, a random point on % the manifold is picked. To specify options whilst not specifying an % initial iterate, give x0 as [] (the empty matrix). % % The two outputs 'x' and 'cost' are the last reached point on the manifold % and its cost. % % The output 'info' is a struct-array which contains information about the % iterations: % iter (integer) % The iteration number. The initial guess is 0. % cost (double) % The corresponding cost value. % gradnorm (double) % The (Riemannian) norm of the gradient. % time (double) % The total elapsed time in seconds to reach the corresponding cost. % stepsize (double) % The size of the step from the previous to the new iterate. % accepted (Boolean) % true if step is accepted in the cautious update. 0 otherwise. % And possibly additional information logged by options.statsfun. % For example, type [info.gradnorm] to obtain a vector of the successive % gradient norms reached at each iteration. % % The options structure is used to overwrite the default values. All % options have a default value and are hence optional. To force an option % value, pass an options structure with a field options.optionname, where % optionname is one of the following and the default value is indicated % between parentheses: % % tolgradnorm (1e-6) % The algorithm terminates if the norm of the gradient drops below % this. For well-scaled problems, a rule of thumb is that you can % expect to reduce the gradient norm by 8 orders of magnitude % (sqrt(eps)) compared to the gradient norm at a "typical" point (a % rough initial iterate for example). Further decrease is sometimes % possible, but inexact floating point arithmetic will eventually % limit the final accuracy. If tolgradnorm is set too low, the % algorithm may end up iterating forever (or at least until another % stopping criterion triggers). % maxiter (1000) % The algorithm terminates if maxiter iterations were executed. % maxtime (Inf) % The algorithm terminates if maxtime seconds elapsed. % minstepsize (1e-10) % The minimum norm of the tangent vector that points from the current % point to the next point. If the norm is less than minstepsize, the % program will terminate. % memory (30) % The number of previous iterations the program remembers. This is used % to approximate the inverse Hessian at the current point. Because of % difficulty of maintaining a representation of operators in terms of % coordinates, a recursive method is used. The number of steps in the % recursion is at most options.memory. This parameter can take any % integer value >= 0, or Inf, which is taken to be options.maxiter. If % options.maxiter has value Inf, then it will take value 10000 and a % warning will be displayed. % linesearch (@linesearch_hint) % Function handle to a line search function. The options structure is % passed to the line search too, so you can pass it parameters. See % each line search's documentation for info. % By default, the intial multiplier tried is alpha = 1. This can be % changed with options.linesearch: see help of linesearch_hint. % strict_inc_func (@(t) t) % The Cautious step needs a real function that has value 0 at t = 0, % and is strictly increasing. See details in Wen Huang's paper % "A Riemannian BFGS Method without Differentiated Retraction for % Nonconvex Optimization Problems" % statsfun (none) % Function handle to a function that will be called after each % iteration to provide the opportunity to log additional statistics. % They will be returned in the info struct. See the generic Manopt % documentation about solvers for further information. statsfun is % called with the point x that was reached last. % stopfun (none) % Function handle to a function that will be called at each iteration % to provide the opportunity to specify additional stopping criteria. % See the generic Manopt documentation about solvers for further % information. % verbosity (2) % Integer number used to tune the amount of output the algorithm % generates during execution (mostly as text in the command window). % The higher, the more output. 0 means silent. 3 and above includes a % display of the options structure at the beginning of the execution. % debug (false) % Set to true to allow the algorithm to perform additional % computations for debugging purposes. If a debugging test fails, you % will be informed of it, usually via the command window. Be aware % that these additional computations appear in the algorithm timings % too, and may interfere with operations such as counting the number % of cost evaluations, etc. (the debug calls get storedb too). % storedepth (30) % Maximum number of different points x of the manifold for which a % store structure will be kept in memory in the storedb. If the % caching features of Manopt are not used, this is irrelevant. If % memory usage is an issue, you may try to lower this number. % Profiling may then help to investigate if a performance hit was % incurred as a result. % % % Please cite the Manopt paper as well as the research paper: % @InBook{Huang2016, % title = {A {R}iemannian {BFGS} Method for Nonconvex Optimization Problems}, % author = {Huang, W. and Absil, P.-A. and Gallivan, K.A.}, % year = {2016}, % publisher = {Springer International Publishing}, % editor = {Karas{\"o}zen, B{\"u}lent and Manguo{\u{g}}lu, Murat and Tezer-Sezgin, M{\"u}nevver and G{\"o}ktepe, Serdar and U{\u{g}}ur, {\"O}m{\"u}r}, % address = {Cham}, % booktitle = {Numerical Mathematics and Advanced Applications ENUMATH 2015}, % pages = {627--634}, % doi = {10.1007/978-3-319-39929-4_60} % } % % This file is part of Manopt: www.manopt.org. % Original author: Changshuo Liu, July 19, 2017. % Contributors: Nicolas Boumal % Change log: % Verify that the problem description is sufficient for the solver. if ~canGetCost(problem) warning('manopt:getCost', ... 'No cost provided. The algorithm will likely abort.'); end if ~canGetGradient(problem) && ~canGetApproxGradient(problem) % Note: we do not give a warning if an approximate gradient is % explicitly given in the problem description, as in that case the user % seems to be aware of the issue. warning('manopt:getGradient:approx', ... ['No gradient provided. Using an FD approximation instead (slow).\n' ... 'It may be necessary to increase options.tolgradnorm.\n' ... 'To disable this warning: warning(''off'', ''manopt:getGradient:approx'')']); problem.approxgrad = approxgradientFD(problem); end % Local defaults for the program localdefaults.minstepsize = 1e-10; localdefaults.maxiter = 1000; localdefaults.tolgradnorm = 1e-6; localdefaults.memory = 30; localdefaults.strict_inc_func = @(t) t; localdefaults.ls_max_steps = 25; localdefaults.storedepth = 30; localdefaults.linesearch = @linesearch_hint; % Merge global and local defaults, then merge w/ user options, if any. localdefaults = mergeOptions(getGlobalDefaults(), localdefaults); if ~exist('options', 'var') || isempty(options) options = struct(); end options = mergeOptions(localdefaults, options); % To make sure memory in range [0, Inf) options.memory = max(options.memory, 0); if options.memory == Inf if isinf(options.maxiter) options.memory = 10000; warning('rlbfgs:memory', ['options.memory and options.maxiter' ... ' are both Inf; options.memory has been changed to 10000.']); else options.memory = options.maxiter; end end M = problem.M; % Create a random starting point if no starting point is provided. if ~exist('x0', 'var')|| isempty(x0) xCur = M.rand(); else xCur = x0; end timetic = tic(); % Create a store database and get a key for the current x storedb = StoreDB(options.storedepth); key = storedb.getNewKey(); % __________Initialization of variables______________ % Number of iterations since the last restart k = 0; % Total number of BFGS iterations iter = 0; % This cell stores step vectors which point from x_{t} to x_{t+1} for t % indexing the last iterations, capped at options.memory. % That is, it stores up to options.memory of the most recent step % vectors. However, the implementation below does not need step vectors % in their respective tangent spaces at x_{t}'s. Rather, it requires % them transported to the current point's tangent space by vector % tranport. For details regarding the requirements on the the vector % tranport, see the reference paper by Huang et al. % In this implementation, those step vectors are iteratively % transported to the current point's tangent space after every % iteration. Thus, at every iteration, vectors in sHistory are in the % current point's tangent space. sHistory = cell(1, options.memory); % This cell stores the differences for latest t's of the gradient at % x_{t+1} and the gradient at x_{t}, transported to x_{t+1}'s tangent % space. The memory is also capped at options.memory. yHistory = cell(1, options.memory); % rhoHistory{t} stores the reciprocal of the inner product between % sHistory{t} and yHistory{t}. rhoHistory = cell(1, options.memory); % Scaling of direction given by getDirection for acceptable step alpha = 1; % Scaling of initial matrix, Barzilai-Borwein. scaleFactor = 1; % Norm of the step stepsize = 1; % Stores whether the step is accepted by the cautious update check. accepted = true; % Query the cost function and its gradient [xCurCost, xCurGradient] = getCostGrad(problem, xCur, storedb, key); xCurGradNorm = M.norm(xCur, xCurGradient); % Line-search statistics for recording in info. lsstats = []; % Flag to control restarting scheme to avoid infinite loops (see below) ultimatum = false; % Save stats in a struct array info, and preallocate. stats = savestats(); info(1) = stats; info(min(10000, options.maxiter+1)).iter = []; if options.verbosity >= 2 fprintf(' iter cost val grad. norm alpha\n'); end % Main iteration while true % Display iteration information if options.verbosity >= 2 fprintf('%5d %+.16e %.8e %.4e\n', ... iter, xCurCost, xCurGradNorm, alpha); end % Start timing this iteration timetic = tic(); % Run standard stopping criterion checks [stop, reason] = stoppingcriterion(problem, xCur, options, ... info, iter+1); % If none triggered, run specific stopping criterion check if ~stop if stats.stepsize < options.minstepsize % To avoid infinite loop and to push the search further % in case BFGS approximation of Hessian is off towards % the end, we erase the memory by setting k = 0; % In this way, it starts off like a steepest descent. % If even steepest descent does not work, then it is % hopeless and we will terminate. if ~ultimatum if options.verbosity >= 2 fprintf(['stepsize is too small, restarting ' ... 'the bfgs procedure at the current point.\n']); end k = 0; ultimatum = true; else stop = true; reason = sprintf(['Last stepsize smaller than ' ... 'minimum allowed; options.minstepsize = %g.'], ... options.minstepsize); end else % We are not in trouble: lift the ultimatum if it was on. ultimatum = false; end end if stop if options.verbosity >= 1 fprintf([reason '\n']); end break; end % Compute BFGS direction p = getDirection(M, xCur, xCurGradient, sHistory,... yHistory, rhoHistory, scaleFactor, min(k, options.memory)); % Execute line-search [stepsize, xNext, newkey, lsstats] = ... linesearch_hint(problem, xCur, p, xCurCost, ... M.inner(xCur, xCurGradient, p), ... options, storedb, key); % Record the BFGS step-multiplier alpha which as effectively % selected. Toward convergence, we hope to see alpha = 1. alpha = stepsize/M.norm(xCur, p); step = M.lincomb(xCur, alpha, p); % Query cost and gradient at the candidate new point. [xNextCost, xNextGrad] = getCostGrad(problem, xNext, storedb, newkey); % Compute sk and yk sk = M.transp(xCur, xNext, step); yk = M.lincomb(xNext, 1, xNextGrad, ... -1, M.transp(xCur, xNext, xCurGradient)); % Computation of the BFGS step is invariant under scaling of sk and % yk by a common factor. For numerical reasons, we scale sk and yk % so that sk is a unit norm vector. norm_sk = M.norm(xNext, sk); sk = M.lincomb(xNext, 1/norm_sk, sk); yk = M.lincomb(xNext, 1/norm_sk, yk); inner_sk_yk = M.inner(xNext, sk, yk); inner_sk_sk = M.norm(xNext, sk)^2; % ensures nonnegativity % If the cautious step is accepted (which is the intended % behavior), we record sk, yk and rhok and need to do some % housekeeping. If the cautious step is rejected, these are not % recorded. In all cases, xNext is the next iterate: the notion of % accept/reject here is limited to whether or not we keep track of % sk, yk, rhok to update the BFGS operator. cap = options.strict_inc_func(xCurGradNorm); if inner_sk_sk ~= 0 && (inner_sk_yk / inner_sk_sk) >= cap accepted = true; rhok = 1/inner_sk_yk; scaleFactor = inner_sk_yk / M.norm(xNext, yk)^2; % Time to store the vectors sk, yk and the scalar rhok. % Remember: we need to transport all vectors to the most % current tangent space. % If we are out of memory if k >= options.memory % sk and yk are saved from 1 to the end with the most % current recorded to the rightmost hand side of the cells % that are occupied. When memory is full, do a shift so % that the rightmost is earliest and replace it with the % most recent sk, yk. for i = 2 : options.memory sHistory{i} = M.transp(xCur, xNext, sHistory{i}); yHistory{i} = M.transp(xCur, xNext, yHistory{i}); end if options.memory > 1 sHistory = sHistory([2:end, 1]); yHistory = yHistory([2:end, 1]); rhoHistory = rhoHistory([2:end 1]); end if options.memory > 0 sHistory{options.memory} = sk; yHistory{options.memory} = yk; rhoHistory{options.memory} = rhok; end % If we are not out of memory else for i = 1:k sHistory{i} = M.transp(xCur, xNext, sHistory{i}); yHistory{i} = M.transp(xCur, xNext, yHistory{i}); end sHistory{k+1} = sk; yHistory{k+1} = yk; rhoHistory{k+1} = rhok; end k = k + 1; % The cautious step is rejected: we do not store sk, yk, rhok but % we still need to transport stored vectors to the new tangent % space. else accepted = false; for i = 1 : min(k, options.memory) sHistory{i} = M.transp(xCur, xNext, sHistory{i}); yHistory{i} = M.transp(xCur, xNext, yHistory{i}); end end % Update variables to new iterate iter = iter + 1; xCur = xNext; key = newkey; xCurGradient = xNextGrad; xCurGradNorm = M.norm(xNext, xNextGrad); xCurCost = xNextCost; % Make sure we don't use too much memory for the store database % (this is independent from the BFGS memory.) storedb.purge(); % Log statistics for freshly executed iteration stats = savestats(); info(iter+1) = stats; end % Housekeeping before we return info = info(1:iter+1); x = xCur; cost = xCurCost; if options.verbosity >= 1 fprintf('Total time is %f [s] (excludes statsfun)\n', ... info(end).time); end % Routine in charge of collecting the current iteration stats function stats = savestats() stats.iter = iter; stats.cost = xCurCost; stats.gradnorm = xCurGradNorm; if iter == 0 stats.stepsize = NaN; stats.time = toc(timetic); stats.accepted = NaN; else stats.stepsize = stepsize; stats.time = info(iter).time + toc(timetic); stats.accepted = accepted; end stats.linesearch = lsstats; stats = applyStatsfun(problem, xCur, storedb, key, options, stats); end end % BFGS step, see Wen's paper for details. This functon takes in a tangent % vector g, and applies an approximate inverse Hessian P to it to get Pg. % Then, -Pg is returned. % % Theory requires the vector transport to be isometric and to satisfy the % locking condition (see paper), but these properties do not seem to be % crucial in practice. If your manifold provides M.isotransp, it may be % good to do M.transp = M.isotransp; after loading M with a factory. % % This implementation operates in the tangent space of the most recent % point since all vectors in sHistory and yHistory have been transported % there. function dir = getDirection(M, xCur, xCurGradient, sHistory, yHistory, ... rhoHistory, scaleFactor, k) q = xCurGradient; inner_s_q = zeros(1, k); for i = k : -1 : 1 inner_s_q(1, i) = rhoHistory{i} * M.inner(xCur, sHistory{i}, q); q = M.lincomb(xCur, 1, q, -inner_s_q(1, i), yHistory{i}); end r = M.lincomb(xCur, scaleFactor, q); for i = 1 : k omega = rhoHistory{i} * M.inner(xCur, yHistory{i}, r); r = M.lincomb(xCur, 1, r, inner_s_q(1, i)-omega, sHistory{i}); end dir = M.lincomb(xCur, -1, r); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/conjugategradient/conjugategradient.m ================================================ function [x, cost, info, options] = conjugategradient(problem, x, options) % Conjugate gradient minimization algorithm for Manopt. % % function [x, cost, info, options] = conjugategradient(problem) % function [x, cost, info, options] = conjugategradient(problem, x0) % function [x, cost, info, options] = conjugategradient(problem, x0, options) % function [x, cost, info, options] = conjugategradient(problem, [], options) % % Apply the conjugate gradient minimization algorithm to the problem % defined in the problem structure, starting at x0 if it is provided % (otherwise, at a random point on the manifold). To specify options whilst % not specifying an initial guess, give x0 as [] (the empty matrix). % % The outputs x and cost are the best reached point on the manifold and its % cost. The struct-array info contains information about the iterations: % iter : the iteration number (0 for the initial guess) % cost : cost value % time : elapsed time in seconds % gradnorm : Riemannian norm of the gradient % stepsize : norm of the last tangent vector retracted % beta : value of the beta parameter (see options.beta_type) % linesearch : information logged by options.linesearch % And possibly additional information logged by options.statsfun. % For example, type [info.gradnorm] to obtain a vector of the successive % gradient norms reached. % % The options structure is used to overwrite the default values. All % options have a default value and are hence optional. To force an option % value, pass an options structure with a field options.optionname, where % optionname is one of the following and the default value is indicated % between parentheses: % % tolgradnorm (1e-6) % The algorithm terminates if the norm of the gradient drops below this. % maxiter (1000) % The algorithm terminates if maxiter iterations have been executed. % maxtime (Inf) % The algorithm terminates if maxtime seconds elapsed. % minstepsize (1e-10) % The algorithm terminates if the linesearch returns a displacement % vector (to be retracted) smaller in norm than this value. % beta_type ('H-S') % Conjugate gradient beta rule used to construct the new search % direction, based on a linear combination of the previous search % direction and the new (preconditioned) gradient. Possible values % for this parameter are: % 'S-D', 'steep' for beta = 0 (preconditioned steepest descent) % 'F-R' for Fletcher-Reeves's rule % 'P-R' for Polak-Ribiere's modified rule % 'H-S' for Hestenes-Stiefel's modified rule % 'H-Z' for Hager-Zhang's modified rule % See Hager and Zhang 2006, "A survey of nonlinear conjugate gradient % methods" for a description of these rules in the Euclidean case and % for an explanation of how to adapt them to the preconditioned case. % The adaption to the Riemannian case is straightforward: see in code % for details. Modified rules take the max between 0 and the computed % beta value, which provides automatic restart, except for H-Z which % uses a different modification. % orth_value (Inf) % Following Powell's restart strategy (Math. prog. 1977), restart CG % (that is, make a -preconditioned- gradient step) if two successive % -preconditioned- gradients are "too" parallel. See for example % Hager and Zhang 2006, "A survey of nonlinear conjugate gradient % methods", page 12. An infinite value disables this strategy. See in % code formula for the specific criterion used. % linesearch (@linesearch_adaptive or @linesearch_hint) % Function handle to a line search function. The options structure is % passed to the line search too, so you can pass it parameters. See % each line search's documentation for info. Another available line % search in manopt is @linesearch, in /manopt/linesearch/linesearch.m % If the problem structure includes a line search hint, then the % default line search used is @linesearch_hint. % statsfun (none) % Function handle to a function that will be called after each % iteration to provide the opportunity to log additional statistics. % They will be returned in the info struct. See the generic Manopt % documentation about solvers for further information. % stopfun (none) % Function handle to a function that will be called at each iteration % to provide the opportunity to specify additional stopping criteria. % See the generic Manopt documentation about solvers for further % information. % verbosity (3) % Integer number used to tune the amount of output the algorithm % generates during execution (mostly as text in the command window). % The higher, the more output. 0 means silent. % storedepth (2) % Maximum number of different points x of the manifold for which a % store structure will be kept in memory in the storedb. If the % caching features of Manopt are not used, this is irrelevant. For % the CG algorithm, a store depth of 2 should always be sufficient. % % % In most of the examples bundled with the toolbox (see link below), the % solver can be replaced by the present one if need be. % % See also: steepestdescent trustregions manopt/solvers/linesearch manopt/examples % An explicit, general listing of this algorithm, with preconditioning, % can be found in the following paper: % @Article{boumal2015lowrank, % Title = {Low-rank matrix completion via preconditioned optimization on the {G}rassmann manifold}, % Author = {Boumal, N. and Absil, P.-A.}, % Journal = {Linear Algebra and its Applications}, % Year = {2015}, % Pages = {200--239}, % Volume = {475}, % Doi = {10.1016/j.laa.2015.02.027}, % } % This file is part of Manopt: www.manopt.org. % Original author: Bamdev Mishra, Dec. 30, 2012. % Contributors: Nicolas Boumal % Change log: % % March 14, 2013, NB: % Added preconditioner support : see Section 8 in % https://www.math.lsu.edu/~hozhang/papers/cgsurvey.pdf % % Sept. 13, 2013, NB: % Now logging beta parameter too. % % Nov. 7, 2013, NB: % The search direction is no longer normalized before it is passed % to the linesearch. This way, it is up to the designers of the % linesearch to decide whether they want to use the norm of the % search direction in their algorithm or not. There are reasons % against it, but practical evidence that it may help too, so we % allow it. The default linesearch_adaptive used does exploit the % norm information. The base linesearch does not. You may select it % by setting options.linesearch = @linesearch; % % Nov. 29, 2013, NB: % Documentation improved: options are now explicitly described. % Removed the Daniel rule for beta: it was not appropriate for % preconditioned CG and I could not find a proper reference for it. % % April 3, 2015 (NB): % Works with the new StoreDB class system. % Verify that the problem description is sufficient for the solver. if ~canGetCost(problem) warning('manopt:getCost', ... 'No cost provided. The algorithm will likely abort.'); end if ~canGetGradient(problem) && ~canGetApproxGradient(problem) warning('manopt:getGradient:approx', ... ['No gradient provided. Using an FD approximation instead (slow).\n' ... 'It may be necessary to increase options.tolgradnorm.\n' ... 'To disable this warning: warning(''off'', ''manopt:getGradient:approx'')']); problem.approxgrad = approxgradientFD(problem); end % Set local defaults here localdefaults.minstepsize = 1e-10; localdefaults.maxiter = 1000; localdefaults.tolgradnorm = 1e-6; localdefaults.storedepth = 20; % Changed by NB : H-S has the "auto restart" property. % See Hager-Zhang 2005/2006 survey about CG methods. % The auto restart comes from the 'max(0, ...)', not so much from the % reason stated in Hager-Zhang I think. P-R also has auto restart. localdefaults.beta_type = 'H-S'; localdefaults.orth_value = Inf; % by BM as suggested in Nocedal and Wright % Depending on whether the problem structure specifies a hint for % line-search algorithms, choose a default line-search that works on % its own (typical) or that uses the hint. if ~canGetLinesearch(problem) localdefaults.linesearch = @linesearch_adaptive; else localdefaults.linesearch = @linesearch_hint; end % Merge global and local defaults, then merge w/ user options, if any. localdefaults = mergeOptions(getGlobalDefaults(), localdefaults); if ~exist('options', 'var') || isempty(options) options = struct(); end options = mergeOptions(localdefaults, options); % For convenience inner = problem.M.inner; lincomb = problem.M.lincomb; timetic = tic(); % If no initial point x is given by the user, generate one at random. if ~exist('x', 'var') || isempty(x) x = problem.M.rand(); end % Create a store database and generate a key for the current x storedb = StoreDB(options.storedepth); key = storedb.getNewKey(); % Compute cost-related quantities for x [cost, grad] = getCostGrad(problem, x, storedb, key); gradnorm = problem.M.norm(x, grad); Pgrad = getPrecon(problem, x, grad, storedb, key); gradPgrad = inner(x, grad, Pgrad); % Iteration counter (at any point, iter is the number of fully executed % iterations so far) iter = 0; % Save stats in a struct array info and preallocate. stats = savestats(); info(1) = stats; info(min(10000, options.maxiter+1)).iter = []; if options.verbosity >= 2 fprintf(' iter\t cost val\t grad. norm\n'); end % Compute a first descent direction (not normalized) desc_dir = lincomb(x, -1, Pgrad); % Start iterating until stopping criterion triggers while true % Display iteration information if options.verbosity >= 2 fprintf('%5d\t%+.16e\t%.8e\n', iter, cost, gradnorm); end % Start timing this iteration timetic = tic(); % Run standard stopping criterion checks [stop, reason] = stoppingcriterion(problem, x, options, info, iter+1); % Run specific stopping criterion check if ~stop && abs(stats.stepsize) < options.minstepsize stop = true; reason = sprintf(['Last stepsize smaller than minimum ' ... 'allowed; options.minstepsize = %g.'], ... options.minstepsize); end if stop if options.verbosity >= 1 fprintf([reason '\n']); end break; end % The line search algorithms require the directional derivative of the % cost at the current point x along the search direction. df0 = inner(x, grad, desc_dir); % If we didn't get a descent direction: restart, i.e., switch to the % negative gradient. Equivalent to resetting the CG direction to a % steepest descent step, which discards the past information. if df0 >= 0 % Or we switch to the negative gradient direction. if options.verbosity >= 3 fprintf(['Conjugate gradient info: got an ascent direction '... '(df0 = %2e), reset to the (preconditioned) '... 'steepest descent direction.\n'], df0); end % Reset to negative gradient: this discards the CG memory. desc_dir = lincomb(x, -1, Pgrad); df0 = -gradPgrad; end % Execute line search [stepsize, newx, newkey, lsstats] = options.linesearch( ... problem, x, desc_dir, cost, df0, options, storedb, key); % Compute the new cost-related quantities for newx [newcost, newgrad] = getCostGrad(problem, newx, storedb, newkey); newgradnorm = problem.M.norm(newx, newgrad); Pnewgrad = getPrecon(problem, newx, newgrad, storedb, newkey); newgradPnewgrad = inner(newx, newgrad, Pnewgrad); % Apply the CG scheme to compute the next search direction. % % This paper https://www.math.lsu.edu/~hozhang/papers/cgsurvey.pdf % by Hager and Zhang lists many known beta rules. The rules defined % here can be found in that paper (or are provided with additional % references), adapted to the Riemannian setting. % if strcmpi(options.beta_type, 'steep') || ... strcmpi(options.beta_type, 'S-D') % Gradient Descent beta = 0; desc_dir = lincomb(x, -1, Pnewgrad); else oldgrad = problem.M.transp(x, newx, grad); orth_grads = inner(newx, oldgrad, Pnewgrad) / newgradPnewgrad; % Powell's restart strategy (see page 12 of Hager and Zhang's % survey on conjugate gradient methods, for example) if abs(orth_grads) >= options.orth_value, beta = 0; desc_dir = lincomb(x, -1, Pnewgrad); else % Compute the CG modification desc_dir = problem.M.transp(x, newx, desc_dir); switch upper(options.beta_type) case 'F-R' % Fletcher-Reeves beta = newgradPnewgrad / gradPgrad; case 'P-R' % Polak-Ribiere+ % vector grad(new) - transported grad(current) diff = lincomb(newx, 1, newgrad, -1, oldgrad); ip_diff = inner(newx, Pnewgrad, diff); beta = ip_diff / gradPgrad; beta = max(0, beta); case 'H-S' % Hestenes-Stiefel+ diff = lincomb(newx, 1, newgrad, -1, oldgrad); ip_diff = inner(newx, Pnewgrad, diff); beta = ip_diff / inner(newx, diff, desc_dir); beta = max(0, beta); case 'H-Z' % Hager-Zhang+ diff = lincomb(newx, 1, newgrad, -1, oldgrad); Poldgrad = problem.M.transp(x, newx, Pgrad); Pdiff = lincomb(newx, 1, Pnewgrad, -1, Poldgrad); deno = inner(newx, diff, desc_dir); numo = inner(newx, diff, Pnewgrad); numo = numo - 2*inner(newx, diff, Pdiff)*... inner(newx, desc_dir, newgrad) / deno; beta = numo / deno; % Robustness (see Hager-Zhang paper mentioned above) desc_dir_norm = problem.M.norm(newx, desc_dir); eta_HZ = -1 / ( desc_dir_norm * min(0.01, gradnorm) ); beta = max(beta, eta_HZ); otherwise error(['Unknown options.beta_type. ' ... 'Should be steep, S-D, F-R, P-R, H-S or H-Z.']); end desc_dir = lincomb(newx, -1, Pnewgrad, beta, desc_dir); end end % Make sure we don't use too much memory for the store database storedb.purge(); % Transfer iterate info x = newx; key = newkey; cost = newcost; grad = newgrad; Pgrad = Pnewgrad; gradnorm = newgradnorm; gradPgrad = newgradPnewgrad; % iter is the number of iterations we have accomplished. iter = iter + 1; % Log statistics for freshly executed iteration stats = savestats(); info(iter+1) = stats; %#ok end info = info(1:iter+1); if options.verbosity >= 1 fprintf('Total time is %f [s] (excludes statsfun)\n', info(end).time); end % Routine in charge of collecting the current iteration stats function stats = savestats() stats.iter = iter; stats.cost = cost; stats.gradnorm = gradnorm; if iter == 0 stats.stepsize = nan; stats.time = toc(timetic); stats.linesearch = []; stats.beta = 0; else stats.stepsize = stepsize; stats.time = info(iter).time + toc(timetic); stats.linesearch = lsstats; stats.beta = beta; end stats = applyStatsfun(problem, x, storedb, key, options, stats); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/conjugategradient/linear_conjugategradient.m ================================================ function [x, cost, info, options] = linear_conjugategradient(problem, x, options) % Conjugate gradient minimization algorithm for Manopt. % % function [x, cost, info, options] = linear_conjugategradient(problem) % function [x, cost, info, options] = linear_conjugategradient(problem, x0) % function [x, cost, info, options] = linear_conjugategradient(problem, x0, options) % function [x, cost, info, options] = linear_conjugategradient(problem, [], options) % % Apply the conjugate gradient minimization algorithm to the problem % defined in the problem structure, starting at x0 if it is provided % (otherwise, at a random point on the manifold). To specify options whilst % not specifying an initial guess, give x0 as [] (the empty matrix). % % The outputs x and cost are the best reached point on the manifold and its % cost. The struct-array info contains information about the iterations: % iter : the iteration number (0 for the initial guess) % cost : cost value % time : elapsed time in seconds % gradnorm : Riemannian norm of the gradient % stepsize : norm of the last tangent vector retracted % beta : value of the beta parameter (see options.beta_type) % linesearch : information logged by options.linesearch % And possibly additional information logged by options.statsfun. % For example, type [info.gradnorm] to obtain a vector of the successive % gradient norms reached. % % The options structure is used to overwrite the default values. All % options have a default value and are hence optional. To force an option % value, pass an options structure with a field options.optionname, where % optionname is one of the following and the default value is indicated % between parentheses: % % tolgradnorm (1e-6) % The algorithm terminates if the norm of the gradient drops below this. % maxiter (1000) % The algorithm terminates if maxiter iterations have been executed. % maxtime (Inf) % The algorithm terminates if maxtime seconds elapsed. % minstepsize (1e-10) % The algorithm terminates if the linesearch returns a displacement % vector (to be retracted) smaller in norm than this value. % beta_type ('H-S') % Conjugate gradient beta rule used to construct the new search % direction, based on a linear combination of the previous search % direction and the new (preconditioned) gradient. Possible values % for this parameter are: % 'S-D', 'steep' for beta = 0 (preconditioned steepest descent) % 'F-R' for Fletcher-Reeves's rule % 'P-R' for Polak-Ribiere's modified rule % 'H-S' for Hestenes-Stiefel's modified rule % 'H-Z' for Hager-Zhang's modified rule % See Hager and Zhang 2006, "A survey of nonlinear conjugate gradient % methods" for a description of these rules in the Euclidean case and % for an explanation of how to adapt them to the preconditioned case. % The adaption to the Riemannian case is straightforward: see in code % for details. Modified rules take the max between 0 and the computed % beta value, which provides automatic restart, except for H-Z which % uses a different modification. % orth_value (Inf) % Following Powell's restart strategy (Math. prog. 1977), restart CG % (that is, make a -preconditioned- gradient step) if two successive % -preconditioned- gradients are "too" parallel. See for example % Hager and Zhang 2006, "A survey of nonlinear conjugate gradient % methods", page 12. An infinite value disables this strategy. See in % code formula for the specific criterion used. % linesearch (@linesearch_adaptive or @linesearch_hint) % Function handle to a line search function. The options structure is % passed to the line search too, so you can pass it parameters. See % each line search's documentation for info. Another available line % search in manopt is @linesearch, in /manopt/linesearch/linesearch.m % If the problem structure includes a line search hint, then the % default line search used is @linesearch_hint. % statsfun (none) % Function handle to a function that will be called after each % iteration to provide the opportunity to log additional statistics. % They will be returned in the info struct. See the generic Manopt % documentation about solvers for further information. % stopfun (none) % Function handle to a function that will be called at each iteration % to provide the opportunity to specify additional stopping criteria. % See the generic Manopt documentation about solvers for further % information. % verbosity (3) % Integer number used to tune the amount of output the algorithm % generates during execution (mostly as text in the command window). % The higher, the more output. 0 means silent. % storedepth (2) % Maximum number of different points x of the manifold for which a % store structure will be kept in memory in the storedb. If the % caching features of Manopt are not used, this is irrelevant. For % the CG algorithm, a store depth of 2 should always be sufficient. % % % In most of the examples bundled with the toolbox (see link below), the % solver can be replaced by the present one if need be. % % See also: steepestdescent trustregions manopt/solvers/linesearch manopt/examples % An explicit, general listing of this algorithm, with preconditioning, % can be found in the following paper: % @Article{boumal2015lowrank, % Title = {Low-rank matrix completion via preconditioned optimization on the {G}rassmann manifold}, % Author = {Boumal, N. and Absil, P.-A.}, % Journal = {Linear Algebra and its Applications}, % Year = {2015}, % Pages = {200--239}, % Volume = {475}, % Doi = {10.1016/j.laa.2015.02.027}, % } % This file is part of Manopt: www.manopt.org. % Original author: Bamdev Mishra, Dec. 30, 2012. % Contributors: Nicolas Boumal % Change log: % % March 14, 2013, NB: % Added preconditioner support : see Section 8 in % https://www.math.lsu.edu/~hozhang/papers/cgsurvey.pdf % % Sept. 13, 2013, NB: % Now logging beta parameter too. % % Nov. 7, 2013, NB: % The search direction is no longer normalized before it is passed % to the linesearch. This way, it is up to the designers of the % linesearch to decide whether they want to use the norm of the % search direction in their algorithm or not. There are reasons % against it, but practical evidence that it may help too, so we % allow it. The default linesearch_adaptive used does exploit the % norm information. The base linesearch does not. You may select it % by setting options.linesearch = @linesearch; % % Nov. 29, 2013, NB: % Documentation improved: options are now explicitly described. % Removed the Daniel rule for beta: it was not appropriate for % preconditioned CG and I could not find a proper reference for it. % % April 3, 2015 (NB): % Works with the new StoreDB class system. % Verify that the problem description is sufficient for the solver. if ~canGetCost(problem) warning('manopt:getCost', ... 'No cost provided. The algorithm will likely abort.'); end if ~canGetGradient(problem) && ~canGetApproxGradient(problem) warning('manopt:getGradient:approx', ... ['No gradient provided. Using an FD approximation instead (slow).\n' ... 'It may be necessary to increase options.tolgradnorm.\n' ... 'To disable this warning: warning(''off'', ''manopt:getGradient:approx'')']); problem.approxgrad = approxgradientFD(problem); end % Set local defaults here localdefaults.minstepsize = 1e-10; localdefaults.maxiter = 1000; localdefaults.tolgradnorm = 1e-6; localdefaults.storedepth = 20; % Changed by NB : H-S has the "auto restart" property. % See Hager-Zhang 2005/2006 survey about CG methods. % The auto restart comes from the 'max(0, ...)', not so much from the % reason stated in Hager-Zhang I think. P-R also has auto restart. localdefaults.beta_type = 'H-S'; localdefaults.orth_value = Inf; % by BM as suggested in Nocedal and Wright % Depending on whether the problem structure specifies a hint for % line-search algorithms, choose a default line-search that works on % its own (typical) or that uses the hint. if ~canGetLinesearch(problem) localdefaults.linesearch = @linesearch_adaptive; else localdefaults.linesearch = @linesearch_hint; end % Merge global and local defaults, then merge w/ user options, if any. localdefaults = mergeOptions(getGlobalDefaults(), localdefaults); if ~exist('options', 'var') || isempty(options) options = struct(); end options = mergeOptions(localdefaults, options); % For convenience inner = problem.M.inner; lincomb = problem.M.lincomb; timetic = tic(); % If no initial point x is given by the user, generate one at random. if ~exist('x', 'var') || isempty(x) x = problem.M.rand(); end % Create a store database and generate a key for the current x storedb = StoreDB(options.storedepth); key = storedb.getNewKey(); % Compute cost-related quantities for x [cost, grad] = getCostGrad(problem, x, storedb, key); gradnorm = problem.M.norm(x, grad); Pgrad = getPrecon(problem, x, grad, storedb, key); gradPgrad = inner(x, grad, Pgrad); % Iteration counter (at any point, iter is the number of fully executed % iterations so far) iter = 0; % Save stats in a struct array info and preallocate. stats = savestats(); info(1) = stats; info(min(10000, options.maxiter+1)).iter = []; if options.verbosity >= 2 fprintf(' iter\t cost val\t grad. norm\n'); end % Compute a first descent direction (not normalized) desc_dir = lincomb(x, -1, Pgrad); % Start iterating until stopping criterion triggers while true % Display iteration information if options.verbosity >= 2 fprintf('%5d\t%+.16e\t%.8e\n', iter, cost, gradnorm); end % Start timing this iteration timetic = tic(); % Run standard stopping criterion checks [stop, reason] = stoppingcriterion(problem, x, options, info, iter+1); % Run specific stopping criterion check if ~stop && abs(stats.stepsize) < options.minstepsize stop = true; reason = sprintf(['Last stepsize smaller than minimum ' ... 'allowed; options.minstepsize = %g.'], ... options.minstepsize); end if stop if options.verbosity >= 1 fprintf([reason '\n']); end break; end % The line search algorithms require the directional derivative of the % cost at the current point x along the search direction. df0 = inner(x, grad, desc_dir); % If we didn't get a descent direction: restart, i.e., switch to the % negative gradient. Equivalent to resetting the CG direction to a % steepest descent step, which discards the past information. if df0 >= 0 % Or we switch to the negative gradient direction. if options.verbosity >= 3 fprintf(['Conjugate gradient info: got an ascent direction '... '(df0 = %2e), reset to the (preconditioned) '... 'steepest descent direction.\n'], df0); end % Reset to negative gradient: this discards the CG memory. desc_dir = lincomb(x, -1, Pgrad); df0 = -gradPgrad; end % Execute line search [stepsize, newx, newkey, lsstats] = options.linesearch( ... problem, x, desc_dir, cost, df0, options, storedb, key); % Compute the new cost-related quantities for newx [newcost, newgrad] = getCostGrad(problem, newx, storedb, newkey); newgradnorm = problem.M.norm(newx, newgrad); Pnewgrad = getPrecon(problem, newx, newgrad, storedb, newkey); newgradPnewgrad = inner(newx, newgrad, Pnewgrad); % Apply the CG scheme to compute the next search direction. % % This paper https://www.math.lsu.edu/~hozhang/papers/cgsurvey.pdf % by Hager and Zhang lists many known beta rules. The rules defined % here can be found in that paper (or are provided with additional % references), adapted to the Riemannian setting. % if strcmpi(options.beta_type, 'steep') || ... strcmpi(options.beta_type, 'S-D') % Gradient Descent beta = 0; desc_dir = lincomb(x, -1, Pnewgrad); else oldgrad = problem.M.transp(x, newx, grad); orth_grads = inner(newx, oldgrad, Pnewgrad) / newgradPnewgrad; % Powell's restart strategy (see page 12 of Hager and Zhang's % survey on conjugate gradient methods, for example) if abs(orth_grads) >= options.orth_value, beta = 0; desc_dir = lincomb(x, -1, Pnewgrad); else % Compute the CG modification desc_dir = problem.M.transp(x, newx, desc_dir); switch upper(options.beta_type) case 'F-R' % Fletcher-Reeves beta = newgradPnewgrad / gradPgrad; case 'P-R' % Polak-Ribiere+ % vector grad(new) - transported grad(current) diff = lincomb(newx, 1, newgrad, -1, oldgrad); ip_diff = inner(newx, Pnewgrad, diff); beta = ip_diff / gradPgrad; beta = max(0, beta); case 'H-S' % Hestenes-Stiefel+ diff = lincomb(newx, 1, newgrad, -1, oldgrad); ip_diff = inner(newx, Pnewgrad, diff); beta = ip_diff / inner(newx, diff, desc_dir); beta = max(0, beta); case 'H-Z' % Hager-Zhang+ diff = lincomb(newx, 1, newgrad, -1, oldgrad); Poldgrad = problem.M.transp(x, newx, Pgrad); Pdiff = lincomb(newx, 1, Pnewgrad, -1, Poldgrad); deno = inner(newx, diff, desc_dir); numo = inner(newx, diff, Pnewgrad); numo = numo - 2*inner(newx, diff, Pdiff)*... inner(newx, desc_dir, newgrad) / deno; beta = numo / deno; % Robustness (see Hager-Zhang paper mentioned above) desc_dir_norm = problem.M.norm(newx, desc_dir); eta_HZ = -1 / ( desc_dir_norm * min(0.01, gradnorm) ); beta = max(beta, eta_HZ); otherwise error(['Unknown options.beta_type. ' ... 'Should be steep, S-D, F-R, P-R, H-S or H-Z.']); end desc_dir = lincomb(newx, -1, Pnewgrad, beta, desc_dir); end end % Make sure we don't use too much memory for the store database storedb.purge(); % Transfer iterate info x = newx; key = newkey; cost = newcost; grad = newgrad; Pgrad = Pnewgrad; gradnorm = newgradnorm; gradPgrad = newgradPnewgrad; % iter is the number of iterations we have accomplished. iter = iter + 1; % Log statistics for freshly executed iteration stats = savestats(); info(iter+1) = stats; %#ok end info = info(1:iter+1); if options.verbosity >= 1 fprintf('Total time is %f [s] (excludes statsfun)\n', info(end).time); end % Routine in charge of collecting the current iteration stats function stats = savestats() stats.iter = iter; stats.cost = cost; stats.gradnorm = gradnorm; if iter == 0 stats.stepsize = nan; stats.time = toc(timetic); stats.linesearch = []; stats.beta = 0; else stats.stepsize = stepsize; stats.time = info(iter).time + toc(timetic); stats.linesearch = lsstats; stats.beta = beta; end stats = applyStatsfun(problem, x, storedb, key, options, stats); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/gradientapproximations/approxgradientFD.m ================================================ function gradfun = approxgradientFD(problem, options) % Gradient approx. fnctn handle based on finite differences of the cost. % % function gradfun = approxgradientFD(problem) % function gradfun = approxgradientFD(problem, options) % % Input: % % A Manopt problem structure (already containing the manifold and enough % information to compute the cost) and an options structure (optional), % containing one option: % options.stepsize (positive double; default: 2^-23). % options.subspacedim (positive integer; default: [], for M.dim()). % % If the cost cannot be computed on 'problem', a warning is issued. % % Output: % % Returns a function handle, encapsulating a generic finite difference % approximation of the gradient of the problem cost. The finite difference % is based on M.dim()+1 computations of the cost. % % The returned gradfun has this calling pattern: % % function gradfd = gradfun(x) % function gradfd = gradfun(x, storedb) % function gradfd = gradfun(x, storedb, key) % % x is a point on the manifold problem.M, storedb is a StoreDB object, % and key is the StoreDB key to point x. % % Usage: % % Typically, the user will set problem.M and other fields to define the % cost (typically, problem.cost). Then, to use this generic purpose % gradient approximation: % % problem.approxgrad = approxgradientFD(problem, options); % % See also: steepestdescent conjugategradient % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Nov. 1, 2016. % Contributors: % Change log: % This gradient approximation is based on the cost: % check availability. if ~canGetCost(problem) warning('manopt:approxgradFD:nocost', ... 'approxgradFD requires the cost to be computable.'); end % Set local defaults here, and merge with user options, if any. localdefaults.stepsize = 2^-23; localdefaults.subspacedim = []; if ~exist('options', 'var') || isempty(options) options = struct(); end options = mergeOptions(localdefaults, options); % % Finite-difference parameters % How far do we look? stepsize = options.stepsize; % Approximate the projection of the gradient on a random subspace of % what dimension? If [], uses full tangent space. subspacedim = options.subspacedim; % Build and return the function handle here. This extra construct via % funhandle makes it possible to make storedb and key optional. gradfun = @funhandle; function gradfd = funhandle(x, storedb, key) % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end gradfd = gradientFD(stepsize, subspacedim, problem, x, storedb, key); end end function gradfd = gradientFD(stepsize, subspacedim, problem, x, storedb, key) % This function does the actual work. % % Original code: Nov. 1, 2016 (NB). % Evaluate the cost at the root point fx = getCost(problem, x, storedb, key); % Pick an orthonormal basis for the tangent space at x, or a subspace % thereof. The default is a full subspace. If a strict subspace is % picked, the returned vector approximates the orthogonal projection of % the gradient to that subspace. B = tangentorthobasis(problem.M, x, subspacedim); % Use finite differences to approximate the directional derivative % along each direction in the basis B. df = zeros(size(B)); for k = 1 : numel(B) % Move in the B{k} direction xk = problem.M.retr(x, B{k}, stepsize); % Evaluate the cost there fxk = getCost(problem, xk, storedb); % Finite difference df(k) = (fxk - fx)/stepsize; end % Build the gradient approximation. gradfd = lincomb(problem.M, x, B, df); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/hessianapproximations/approxhessianFD.m ================================================ function hessfun = approxhessianFD(problem, options) % Hessian approx. fnctn handle based on finite differences of the gradient. % % function hessfun = approxhessianFD(problem) % function hessfun = approxhessianFD(problem, options) % % Input: % % A Manopt problem structure (already containing the manifold and enough % information to compute the cost gradient) and an options structure % (optional), containing one option: % options.stepsize (positive double; default: 2^-14). % % If the gradient cannot be computed or approximated on 'problem', % a warning is issued. % % Output: % % Returns a function handle, encapsulating a generic finite difference % approximation of the Hessian of the problem cost. The finite difference % is based on computations of the gradient. % % The returned hessfun has this calling pattern: % % function hessfd = hessfun(x, xdot) % function hessfd = hessfun(x, xdot, storedb) % function hessfd = hessfun(x, xdot, storedb, key) % % x is a point on the manifold problem.M, xdot is a tangent vector to that % manifold at x, storedb is a StoreDB object, and key is the StoreDB key to % point x. % % Usage: % % Typically, the user will set problem.M and other fields to define the % cost and the gradient (typically, problem.cost and problem.grad or % problem.egrad). Then, to use this generic purpose Hessian approximation: % % problem.approxhess = approxhessianFD(problem, options); % % See also: trustregions % The Riemannian Trust-Region method, used in combination with the present % Hessian approximation, is called RTR-FD. Some convergence theory for it % is available in this paper: % % @incollection{boumal2015rtrfd % author={Boumal, N.}, % title={Riemannian trust regions with finite-difference Hessian approximations are globally convergent}, % year={2015}, % booktitle={Geometric Science of Information} % } % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, April 8, 2015. % Contributors: % Change log: % % Feb. 19, 2015 (NB): % It is sufficient to ensure positive radial linearity to guarantee % (together with other assumptions) that this approximation of the % Hessian will confer global convergence to the trust-regions method. % Formerly, in-code comments referred to the necessity of having % complete radial linearity, and that this was harder to achieve. % This appears not to be necessary after all, which simplifies the % code. % % April 3, 2015 (NB): % Works with the new StoreDB class system. % % April 8, 2015 (NB): % Changed to approxhessianFD, which now returns a function handle % that encapsulates the getHessianFD functionality. Will be better % aligned with the other Hessian approximations to come (which may % want to use storedb.internal), and now allows specifying the step % size. % This Hessian approximation is based on the gradient: % check availability. if ~canGetGradient(problem) && ~canGetApproxGradient(problem) warning('manopt:approxhessianFD:nogradient', ... 'approxhessianFD requires the gradient to be computable.'); end % Set local defaults here, and merge with user options, if any. localdefaults.stepsize = 2^-14; if ~exist('options', 'var') || isempty(options) options = struct(); end options = mergeOptions(localdefaults, options); % Finite-difference parameter: how far do we look? stepsize = options.stepsize; % Build and return the function handle here. This extra construct via % funhandle makes it possible to make storedb and key optional. hessfun = @funhandle; function hessfd = funhandle(x, xdot, storedb, key) % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end hessfd = hessianFD(stepsize, problem, x, xdot, storedb, key); end end function hessfd = hessianFD(stepsize, problem, x, xdot, storedb, key) % This function does the actual work. % % Original code: Dec. 30, 2012 (NB). % Extract the input vector norm. norm_xdot = problem.M.norm(x, xdot); % First, check whether the step xdot is not too small. if norm_xdot < eps hessfd = problem.M.zerovec(x); return; end % Determine how far to retract xdot, so that the point reached does not % depend on the norm of xdot. This is what ensures radial linearity of % this present Hessian approximation. c = stepsize / norm_xdot; % Compute the gradient at the current point. grad = getGradient(problem, x, storedb, key); % Compute a point a little further along xdot, and the gradient there. % Since this is a new point, we need a new key for it, for storedb. x1 = problem.M.retr(x, xdot, c); key1 = storedb.getNewKey(); grad1 = getGradient(problem, x1, storedb, key1); % Transport grad1 back from x1 to x. grad1 = problem.M.transp(x1, x, grad1); % Return the finite difference of them: (grad1 - grad)/c. hessfd = problem.M.lincomb(x, 1/c, grad1, -1/c, grad); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/linesearch/linesearch.m ================================================ function [stepsize, newx, newkey, lsstats] = ... linesearch(problem, x, d, f0, df0, options, storedb, key) % Standard line-search algorithm (step size selection) for descent methods. % % function [stepsize, newx, newkey, lsstats] = % linesearch(problem, x, d, f0, df0, options, storedb, key) % % Base line-search algorithm for descent methods, based on a simple % backtracking method. The search direction provided has to be a descent % direction, as indicated by a negative df0 = directional derivative of f % at x along d. % % The algorithm is invariant under positive scaling of the cost function % and under offsetting, that is: if the cost function f is replaced by % 8*f+3 for example, the returned step size will be the same. Furthermore, % the returned step size is independent of the norm of the search direction % vector d: only the direction of d is important. % % Below, the step is constructed as alpha*d, and the step size is the norm % of that vector, thus: stepsize = alpha*norm_d. The step is executed by % retracting the vector alpha*d from the current point x, giving newx. % % This line-search may create and maintain a structure called lsmem inside % storedb.internal. This gives the linesearch the opportunity to remember % what happened in the previous calls. This is typically used to make a % first guess at the step size, based on previous events. % % Inputs % % problem : structure holding the description of the optimization problem % x : current point on the manifold problem.M % d : tangent vector at x (descent direction) -- its norm is irrelevant % f0 : cost value at x % df0 : directional derivative at x along d % options : options structure (see in code for usage) % storedb : StoreDB object (handle class: passed by reference) for caching % key : key associated to point x in storedb % % options, storedb and key are optional. % % Outputs % % stepsize : norm of the vector retracted to reach newx from x. % newx : next iterate suggested by the line-search algorithm, such that % the retraction at x of the vector alpha*d reaches newx. % newkey : key associated to newx in storedb % lsstats : statistics about the line-search procedure % (stepsize, number of trials etc). % % See also: steepestdescent conjugategradients linesearch_adaptive % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % % Sept. 13, 2013 (NB): % User control over the parameters of the linesearch via the options % ls_contraction_factor, ls_optimism, ls_suff_decr and ls_max_steps. % See in code for the effect of those. % % Sept. 13, 2013 (NB): % The automatic direction reversal feature was removed (it triggered % when df0 > 0). Direction reversal is a decision that needs to be % made by the solver, so it can know about it. % % Sept. 13, 2013 (NB): % The linesearch is now invariant under rescaling of the cost % function f. That is, if f is replaced by 8*f (and hence the % directional derivatives of f are scaled accordingly), the % stepsizes computed will not change. % % Nov. 7, 2013 (NB): % The linesearch is now invariant under rescaling of the search % direction d. The meaning of stepsize is also more clear in the % comments. Added a parameter ls_initial_stepsize to give users % control over the first step size trial. % % April 3, 2015 (NB): % Works with the new StoreDB class system. % % April 8, 2015 (NB): % Got rid of lsmem input/output: now maintained in storedb.internal. % % Oct. 7, 2016 (NB): % Thanks to Wen Huang, a bug was corrected in the logic around % lsmem handling. Specifically, lsmem = storedb.internal.lsmem; % was erroneously coded as lsmem = storedb.internal; % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end % Backtracking default parameters. These can be overwritten in the % options structure which is passed to the solver. default_options.ls_contraction_factor = .5; default_options.ls_optimism = 1/.5; default_options.ls_suff_decr = 1e-4; default_options.ls_max_steps = 25; default_options.ls_initial_stepsize = 1; if ~exist('options', 'var') || isempty(options) options = struct(); end options = mergeOptions(default_options, options); contraction_factor = options.ls_contraction_factor; optimism = options.ls_optimism; suff_decr = options.ls_suff_decr; max_ls_steps = options.ls_max_steps; initial_stepsize = options.ls_initial_stepsize; % Compute the norm of the search direction. % This is useful to make the linesearch algorithm invariant under the % scaling of d. The rationale is that the important information is the % search direction, not the size of that vector. The question of how % far we should go is precisely what the linesearch algorithm is % supposed to answer: the calling algorithm should not need to care. norm_d = problem.M.norm(x, d); % At first, we have no idea of what the step size should be. alpha = NaN; % If we know about what happened at the previous step, we can leverage % that to compute an initial guess for the step size, as inspired from % Nocedal&Wright, p59. if isfield(storedb.internal, 'lsmem') lsmem = storedb.internal.lsmem; if isfield(lsmem, 'f0') % Pick initial step size based on where we were last time, alpha = 2*(f0 - lsmem.f0) / df0; % and go look a little further (or less far), just in case. alpha = optimism*alpha; end end % If we have no information about the previous iteration (maybe this is % the first one?) or if the above formula gave a too small step size % (perhaps it is even negative), then fall back to a user supplied % suggestion for the first step size (the "a priori"). % At any rate, the choice should be invariant under rescaling of the % cost function f and of the search direction d, and it should be % bounded away from zero for convergence guarantees. We must allow it % to be close to zero though, for fine convergence. if isnan(alpha) || alpha*norm_d <= eps alpha = initial_stepsize/norm_d; end % Make the chosen step and compute the cost there. newx = problem.M.retr(x, d, alpha); newkey = storedb.getNewKey(); newf = getCost(problem, newx, storedb, newkey); cost_evaluations = 1; % Backtrack while the Armijo criterion is not satisfied while newf > f0 + suff_decr*alpha*df0 % Reduce the step size, alpha = contraction_factor * alpha; % and look closer down the line newx = problem.M.retr(x, d, alpha); newkey = storedb.getNewKey(); newf = getCost(problem, newx, storedb, newkey); cost_evaluations = cost_evaluations + 1; % Make sure we don't run out of budget if cost_evaluations >= max_ls_steps break; end end % If we got here without obtaining a decrease, we reject the step. if newf > f0 alpha = 0; newx = x; newkey = key; newf = f0; %#ok end % As seen outside this function, stepsize is the size of the vector we % retract to make the step from x to newx. Since the step is alpha*d: stepsize = alpha * norm_d; % Save the situtation faced now so that, at the next iteration, % we will know something about the previous decision. storedb.internal.lsmem.f0 = f0; storedb.internal.lsmem.df0 = df0; storedb.internal.lsmem.stepsize = stepsize; % Return some statistics also, for possible analysis. lsstats.costevals = cost_evaluations; lsstats.stepsize = stepsize; lsstats.alpha = alpha; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/linesearch/linesearch_adaptive.m ================================================ function [stepsize, newx, newkey, lsstats] = ... linesearch_adaptive(problem, x, d, f0, df0, options, storedb, key) % Adaptive line search algorithm (step size selection) for descent methods. % % function [stepsize, newx, newkey, lsstats] = % linesearch_adaptive(problem, x, d, f0, df0, options, storedb, key) % % Adaptive linesearch algorithm for descent methods, based on a simple % backtracking method. Contrary to linesearch.m, this function is not % invariant under rescaling of the search direction d. These two line % search methods vary mainly in their strategy to pick the initial step % size. % % Below, the step is constructed as alpha*d, and the step size is the norm % of that vector, thus: stepsize = alpha*norm_d. The step is executed by % retracting the vector alpha*d from the current point x, giving newx. % % This line-search may create and maintain a structure called lsmem inside % storedb.internal. This gives the linesearch the opportunity to remember % what happened in the previous calls. This is typically used to make a % first guess at the step size, based on previous events. % % Inputs/Outputs : see help for linesearch % % See also: steepestdescent conjugategradients linesearch % This file is part of Manopt: www.manopt.org. % Original author: Bamdev Mishra, Dec. 30, 2012. % Contributors: Nicolas Boumal % Change log: % % Sept. 13, 2013 (NB) : % The automatic direction reversal feature was removed (it triggered % when df0 > 0). Direction reversal is a decision that needs to be % made by the solver, so it can know about it. % % Nov. 7, 2013 (NB) : % The whole function has been recoded to mimick more closely the new % version of linesearch.m. The parameters are available through the % options structure passed to the solver and have the same names and % same meaning as for the base linesearch. The information is logged % more reliably. % % April 3, 2015 (NB): % Works with the new StoreDB class system. % % April 8, 2015 (NB): % Got rid of lsmem input/output: now maintained in storedb.internal. % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end % Backtracking default parameters. These can be overwritten in the % options structure which is passed to the solver. default_options.ls_contraction_factor = .5; default_options.ls_suff_decr = .5; default_options.ls_max_steps = 10; default_options.ls_initial_stepsize = 1; if ~exist('options', 'var') || isempty(options) options = struct(); end options = mergeOptions(default_options, options); contraction_factor = options.ls_contraction_factor; suff_decr = options.ls_suff_decr; max_ls_steps = options.ls_max_steps; initial_stepsize = options.ls_initial_stepsize; % Compute the norm of the search direction. norm_d = problem.M.norm(x, d); % If this is not the first iteration, then lsmem should have been % filled with a suggestion for the initial step. if isfield(storedb.internal, 'lsmem') lsmem = storedb.internal.lsmem; if isfield(lsmem, 'init_alpha') % Pick initial step size based on where we were last time, alpha = lsmem.init_alpha; end % Otherwise, fall back to a user supplied suggestion. else alpha = initial_stepsize / norm_d; end % Make the chosen step and compute the cost there. newx = problem.M.retr(x, d, alpha); newkey = storedb.getNewKey(); newf = getCost(problem, newx, storedb, newkey); cost_evaluations = 1; % Backtrack while the Armijo criterion is not satisfied while newf > f0 + suff_decr*alpha*df0 % Reduce the step size, alpha = contraction_factor * alpha; % and look closer down the line newx = problem.M.retr(x, d, alpha); newkey = storedb.getNewKey(); newf = getCost(problem, newx, storedb, newkey); cost_evaluations = cost_evaluations + 1; % Make sure we don't run out of budget if cost_evaluations >= max_ls_steps break; end end % If we got here without obtaining a decrease, we reject the step. if newf > f0 alpha = 0; newx = x; newkey = key; newf = f0; %#ok end % As seen outside this function, stepsize is the size of the vector we % retract to make the step from x to newx. Since the step is alpha*d: stepsize = alpha * norm_d; % Fill lsmem with a suggestion for what the next initial step size % trial should be. On average we intend to do only one extra cost % evaluation. Notice how the suggestion is not about stepsize but about % alpha. This is the reason why this line search is not invariant under % rescaling of the search direction d. switch cost_evaluations case 1 % If things go very well, push your luck. init_alpha = 2 * alpha; case 2 % If things go reasonably well, try to keep pace. init_alpha = alpha; otherwise % If we backtracked a lot, the new stepsize is probably quite % small: try to recover. init_alpha = 2 * alpha; end storedb.internal.lsmem.init_alpha = init_alpha; % Return some statistics also, for possible analysis. lsstats.costevals = cost_evaluations; lsstats.stepsize = stepsize; lsstats.alpha = alpha; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/linesearch/linesearch_decrease.m ================================================ function [stepsize, newx, newkey, lsstats] = ... linesearch_decrease(problem, x, d, f0, ~, options, storedb, key) % Backtracking line-search aiming merely for a decrease in cost value. % % function [stepsize, newx, newkey, lsstats] = % linesearch_decrease(problem, x, d, f0, df0, options, storedb, key) % % Line-search algorithm based on a simple backtracking method. The search % direction provided has to be a descent direction, but needs not be a % first-order descent, i.e.: this line-search can be used even if x is a % critical point, as long as the cost function is strictly decreasing % along the direction d. % % The line-search merely guarantees a decrease in the cost (unless a % stopping criterion triggers first, such as exceeding a maximal number of % iterations). This is typically useful to escape saddle points (critical % points admitting descent directions at the second order). Escape % directions can be computed using hessianextreme, for example. % % Below, the step is constructed as alpha*d, and the step size is the norm % of that vector, thus: stepsize = alpha*norm_d. The step is executed by % retracting the vector alpha*d from the current point x, giving newx. % An initial stepsize of norm_d thus means the first candidate x is % obtained by retracting d at x, as is. % % Options: % options.ls_max_steps (25): maximum number of cost evaluations. % options.ls_initial_stepsize (norm_d): first stepsize trial. % options.ls_contraction_factor (0.5): stepsize reduction per iteration. % % % Inputs/Outputs : see help for linesearch. % f0 is the cost at x. % df0 is unused. % options, storedb and key are optional. % Thus, a simplified calling pattern is (with all outputs still % available): linesearch_decrease(problem, x, d, f0) % % See also: steepestdescent linesearch hessianextreme % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, April 8, 2015. % Contributors: % Change log: % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end norm_d = problem.M.norm(x, d); % Backtracking default parameters. These can be overwritten in the % options structure which is passed to the solver. default_options.ls_contraction_factor = .5; default_options.ls_initial_stepsize = norm_d; default_options.ls_max_steps = 25; if ~exist('options', 'var') || isempty(options) options = struct(); end options = mergeOptions(default_options, options); contraction_factor = options.ls_contraction_factor; initial_stepsize = options.ls_initial_stepsize; max_ls_steps = options.ls_max_steps; % Initial step size as a mutliplier of d. alpha = initial_stepsize / norm_d; % Make the chosen step and compute the cost there. newx = problem.M.retr(x, d, alpha); newkey = storedb.getNewKey(); newf = getCost(problem, newx, storedb, newkey); cost_evaluations = 1; % Backtrack while no cost decrease is obtained. while newf >= f0 % Reduce the step size, alpha = contraction_factor * alpha; % and look closer down the line newx = problem.M.retr(x, d, alpha); newkey = storedb.getNewKey(); newf = getCost(problem, newx, storedb, newkey); cost_evaluations = cost_evaluations + 1; % Make sure we don't run out of budget if cost_evaluations >= max_ls_steps break; end end % If we got here without obtaining a decrease, we reject the step. % Equal cost is accepted, since if x is critical, it is important to % move away from x more than it is important to decrease the cost. if newf > f0 alpha = 0; newx = x; newkey = key; newf = f0; %#ok end % As seen outside this function, stepsize is the size of the vector we % retract to make the step from x to newx. Since the step is alpha*d: stepsize = alpha * norm_d; % Return some statistics also, for possible analysis. lsstats.costevals = cost_evaluations; lsstats.stepsize = stepsize; lsstats.alpha = alpha; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/linesearch/linesearch_hint.m ================================================ function [stepsize, newx, newkey, lsstats] = ... linesearch_hint(problem, x, d, f0, df0, options, storedb, key) % Armijo line-search based on the line-search hint in the problem structure. % % function [stepsize, newx, newkey, lsstats] = % linesearch_hint(problem, x, d, f0, df0, options, storedb, key) % % Base line-search algorithm for descent methods, based on a simple % backtracking method. The search direction provided has to be a descent % direction, as indicated by a negative df0 = directional derivative of f % at x along d. % % The algorithm obtains an initial step size candidate from the problem % structure, typically through the problem.linesearch function. If that % step does not fulfill the Armijo sufficient decrease criterion, that step % size is reduced geometrically until a satisfactory step size is obtained % or until a failure criterion triggers. If the problem structure does not % provide an initial alpha, then alpha = 1 is tried first. % % Below, the step is constructed as alpha*d, and the step size is the norm % of that vector, thus: stepsize = alpha*norm_d. The step is executed by % retracting the vector alpha*d from the current point x, giving newx. % % Inputs/Outputs : see help for linesearch % % See also: steepestdescent conjugategradients linesearch % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, July 17, 2014. % Contributors: % Change log: % % April 3, 2015 (NB): % Works with the new StoreDB class system. % % April 8, 2015 (NB): % Got rid of lsmem input/output. % % July 20, 2017 (NB): % Now using alpha = 1 by default. % % Aug. 28, 2017 (NB): % Adding two options: ls_backtrack and ls_force_decrease, both true % by default. Setting them to false can disable parts of the line % search that, respectively, execute an Armijo backtracking and % reject a cost increasing step. % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end % Backtracking default parameters. These can be overwritten in the % options structure which is passed to the solver. default_options.ls_contraction_factor = .5; default_options.ls_suff_decr = 1e-4; default_options.ls_max_steps = 25; default_options.ls_backtrack = true; default_options.ls_force_decrease = true; if ~exist('options', 'var') || isempty(options) options = struct(); end options = mergeOptions(default_options, options); contraction_factor = options.ls_contraction_factor; suff_decr = options.ls_suff_decr; max_ls_steps = options.ls_max_steps; % Obtain an initial guess at alpha from the problem structure. It is % assumed that the present line-search is only called when the problem % structure provides enough information for the call here to work. if canGetLinesearch(problem) alpha = getLinesearch(problem, x, d, storedb, key); else alpha = 1; end % Make the chosen step and compute the cost there. newx = problem.M.retr(x, d, alpha); newkey = storedb.getNewKey(); newf = getCost(problem, newx, storedb, newkey); cost_evaluations = 1; % Backtrack while the Armijo criterion is not satisfied while options.ls_backtrack && newf > f0 + suff_decr*alpha*df0 % Reduce the step size, alpha = contraction_factor * alpha; % and look closer down the line newx = problem.M.retr(x, d, alpha); newkey = storedb.getNewKey(); newf = getCost(problem, newx, storedb, newkey); cost_evaluations = cost_evaluations + 1; % Make sure we don't run out of budget if cost_evaluations >= max_ls_steps break; end end % If we got here without obtaining a decrease, we reject the step. if options.ls_force_decrease && newf > f0 alpha = 0; newx = x; newkey = key; newf = f0; %#ok end % As seen outside this function, stepsize is the size of the vector we % retract to make the step from x to newx. Since the step is alpha*d: norm_d = problem.M.norm(x, d); stepsize = alpha * norm_d; % Return some statistics also, for possible analysis. lsstats.costevals = cost_evaluations; lsstats.stepsize = stepsize; lsstats.alpha = alpha; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/neldermead/centroid.m ================================================ function y = centroid(M, x) % Attempts the computation of a centroid of a set of points on a manifold. % % function y = centroid(M, x) % % M is a structure representing a manifold. % x is a cell of points on that manifold. % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % For now, just apply a few steps of gradient descent for Karcher means n = numel(x); problem.M = M; problem.cost = @cost; function val = cost(y) val = 0; for i = 1 : n val = val + M.dist(y, x{i})^2; end val = val/2; end problem.grad = @grad; function g = grad(y) g = M.zerovec(y); for i = 1 : n g = M.lincomb(y, 1, g, -1, M.log(y, x{i})); end end % This line can be uncommented to check that the gradient is indeed % correct. This should always be the case if the dist and the log % functions in the manifold are correct. % checkgradient(problem); pause; query = warning('query', 'manopt:getHessian:approx'); warning('off', 'manopt:getHessian:approx'); options.verbosity = 0; options.maxiter = 15; y = trustregions(problem, x{randi(n)}, options); warning(query.state, 'manopt:getHessian:approx'); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/neldermead/neldermead.m ================================================ function [x, cost, info, options] = neldermead(problem, x, options) % Nelder Mead optimization algorithm for derivative-free minimization. % % function [x, cost, info, options] = neldermead(problem) % function [x, cost, info, options] = neldermead(problem, x0) % function [x, cost, info, options] = neldermead(problem, x0, options) % function [x, cost, info, options] = neldermead(problem, [], options) % % Apply a Nelder-Mead minimization algorithm to the problem defined in % the problem structure, starting with the population x0 if it is provided % (otherwise, a random population on the manifold is generated). A % population is a cell containing points on the manifold. The number of % elements in the cell must be dim+1, where dim is the dimension of the % manifold: problem.M.dim(). % % To specify options whilst not specifying an initial guess, give x0 as [] % (the empty matrix). % % This algorithm is a plain adaptation of the Euclidean Nelder-Mead method % to the Riemannian setting. It comes with no convergence guarantees and % there is room for improvement. In particular, we compute centroids as % Karcher means, which seems overly expensive: cheaper forms of % average-like quantities might work better. % This solver is useful nonetheless for problems for which no derivatives % are available, and it may constitute a starting point for the development % of other Riemannian derivative-free methods. % % None of the options are mandatory. See in code for details. % % Requires problem.M.pairmean(x, y) to be defined (computes the average % between two points, x and y). % % If options.statsfun is defined, it will receive a cell of points x (the % current simplex being considered at that iteration), and, if required, % one store structure corresponding to the best point, x{1}. The points are % ordered by increasing cost: f(x{1}) <= f(x{2}) <= ... <= f(x{dim+1}), % where dim = problem.M.dim(). % % Based on http://www.optimization-online.org/DB_FILE/2007/08/1742.pdf. % % See also: manopt/solvers/pso/pso % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % % April 4, 2015 (NB): % Working with the new StoreDB class system. % Clarified interactions with statsfun and store. % % Nov. 11, 2016 (NB): % If options.verbosity is < 2, prints minimal output. % Verify that the problem description is sufficient for the solver. if ~canGetCost(problem) warning('manopt:getCost', ... 'No cost provided. The algorithm will likely abort.'); end % Dimension of the manifold dim = problem.M.dim(); % Set local defaults here localdefaults.storedepth = 0; % no need for caching localdefaults.maxcostevals = max(1000, 2*dim); localdefaults.maxiter = max(2000, 4*dim); localdefaults.reflection = 1; localdefaults.expansion = 2; localdefaults.contraction = .5; % forced to .5 to enable using pairmean functions in manifolds. % localdefaults.shrinkage = .5; % Merge global and local defaults, then merge w/ user options, if any. localdefaults = mergeOptions(getGlobalDefaults(), localdefaults); if ~exist('options', 'var') || isempty(options) options = struct(); end options = mergeOptions(localdefaults, options); % Start timing for initialization. timetic = tic(); % If no initial simplex x is given by the user, generate one at random. if ~exist('x', 'var') || isempty(x) x = cell(dim+1, 1); for i = 1 : dim+1 x{i} = problem.M.rand(); end end % Create a store database and a key for each point. storedb = StoreDB(options.storedepth); key = cell(size(x)); for i = 1 : dim+1; key{i} = storedb.getNewKey(); end % Compute objective-related quantities for x, and setup a % function evaluations counter. costs = zeros(dim+1, 1); for i = 1 : dim+1 costs(i) = getCost(problem, x{i}, storedb, key{i}); end costevals = dim+1; % Sort simplex points by cost. [costs, order] = sort(costs); x = x(order); key = key(order); % Iteration counter. % At any point, iter is the number of fully executed iterations so far. iter = 0; % Save stats in a struct array info, and preallocate. % savestats will be called twice for the initial iterate (number 0), % which is unfortunate, but not problematic. stats = savestats(); info(1) = stats; info(min(10000, options.maxiter+1)).iter = []; % Start iterating until stopping criterion triggers. while true % Make sure we don't use to much memory for the store database. storedb.purge(); stats = savestats(); info(iter+1) = stats; %#ok iter = iter + 1; % Start timing this iteration. timetic = tic(); % Sort simplex points by cost. [costs, order] = sort(costs); x = x(order); key = key(order); % Log / display iteration information here. if options.verbosity >= 2 fprintf('Cost evals: %7d\tBest cost: %+.4e\t', ... costevals, costs(1)); end % Run standard stopping criterion checks. [stop, reason] = stoppingcriterion(problem, x, options, info, iter); if stop if options.verbosity >= 1 fprintf([reason '\n']); end break; end % Compute a centroid for the dim best points. xbar = centroid(problem.M, x(1:end-1)); % Compute the direction for moving along the axis xbar - worst x. vec = problem.M.log(xbar, x{end}); % Reflection step xr = problem.M.exp(xbar, vec, -options.reflection); keyr = storedb.getNewKey(); costr = getCost(problem, xr, storedb, keyr); costevals = costevals + 1; % If the reflected point is honorable, drop the worst point, % replace it by the reflected point and start new iteration. if costr >= costs(1) && costr < costs(end-1) if options.verbosity >= 2 fprintf('Reflection\n'); end costs(end) = costr; x{end} = xr; key{end} = keyr; continue; end % If the reflected point is better than the best point, expand. if costr < costs(1) xe = problem.M.exp(xbar, vec, -options.expansion); keye = storedb.getNewKey(); coste = getCost(problem, xe, storedb, keye); costevals = costevals + 1; if coste < costr if options.verbosity >= 2 fprintf('Expansion\n'); end costs(end) = coste; x{end} = xe; key{end} = keye; continue; else if options.verbosity >= 2 fprintf('Reflection (failed expansion)\n'); end costs(end) = costr; x{end} = xr; key{end} = keyr; continue; end end % If the reflected point is worse than the second to worst point, % contract. if costr >= costs(end-1) if costr < costs(end) % do an outside contraction xoc = problem.M.exp(xbar, vec, -options.contraction); keyoc = storedb.getNewKey(); costoc = getCost(problem, xoc, storedb, keyoc); costevals = costevals + 1; if costoc <= costr if options.verbosity >= 2 fprintf('Outside contraction\n'); end costs(end) = costoc; x{end} = xoc; key{end} = keyoc; continue; end else % do an inside contraction xic = problem.M.exp(xbar, vec, options.contraction); keyic = storedb.getNewKey(); costic = getCost(problem, xic, storedb, keyic); costevals = costevals + 1; if costic <= costs(end) if options.verbosity >= 2 fprintf('Inside contraction\n'); end costs(end) = costic; x{end} = xic; key{end} = keyic; continue; end end end % If we get here, shrink the simplex around x{1}. if options.verbosity >= 2 fprintf('Shrinkage\n'); end for i = 2 : dim+1 x{i} = problem.M.pairmean(x{1}, x{i}); key{i} = storedb.getNewKey(); costs(i) = getCost(problem, x{i}, storedb, key{i}); end costevals = costevals + dim; end info = info(1:iter); % Iteration done: return only the best point found. cost = costs(1); x = x{1}; key = key{1}; % Routine in charge of collecting the current iteration stats. function stats = savestats() stats.iter = iter; stats.cost = costs(1); stats.costevals = costevals; if iter == 0 stats.time = toc(timetic); else stats.time = info(iter).time + toc(timetic); end % The statsfun can only possibly receive one store structure. We % pass the key to the best point, so that the best point's store % will be passed. But the whole cell x of points is passed through. stats = applyStatsfun(problem, x, storedb, key{1}, options, stats); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/preconditioners/preconhessiansolve.m ================================================ function preconfun = preconhessiansolve(problem, options) % Preconditioner based on the inverse Hessian, by solving linear systems. % % function preconfun = preconhessiansolve(problem) % function preconfun = preconhessiansolve(problem, options) % % Input: % % A Manopt problem structure (already containing the manifold and enough % information to compute the Hessian of the cost) and an options structure % (optional, currently ignored). Notice that if the Hessian is not positive % definite, then its inverse is not positive definite either and this % preconditioner is not suitable. % % If the Hessian cannot be computed on 'problem', a warning is issued. An % approximation of the Hessian will be used instead, and the present % preconditioner will attempt to invert that (although it may not be a % linear operator). If no approximate Hessian is provided either, a generic % approximation is used. Behavior is unspecified. % % Output: % % Returns a function handle, encapsulating a generic preconditioner of the % Hessian based on solving linear systems of the form: % Hessian(x)[preconfun(x, xdot)] = xdot, % where x is the point on the manifold, xdot is the input to the % preconditioner (a tangent vector) and preconfun(x, xdot) is returned % (also a tangent vector). The solve may be approximate. % % The returned preconfun has this calling pattern: % % function precxdot = preconfun(x, xdot) % function precxdot = preconfun(x, xdot, storedb) % function precxdot = preconfun(x, xdot, storedb, key) % % x is a point on the manifold problem.M, xdot is a tangent vector to that % manifold at x, storedb is a StoreDB object, and key is the StoreDB key to % point x. % % Usage: % % Typically, the user will set problem.M and other fields to define the % cost, the gradient and the Hessian (typically, problem.cost, problem.grad % and problem.hess, or problem.egrad and problem.ehess). Then, to use this % generic purpose Hessian preconditioner: % % problem.precon = preconhessiansolve(problem, options); % % Passing that problem structure to the conjugategradients solver % (which uses preconditioning) configured in steepest descent mode results % in a type of Riemannian Newton method. % % See also: conjugategradients % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, April 9, 2015. % Contributors: % Change log: % Check availability of the Hessian, or at least of an approximation. if ~canGetHessian(problem) && ~canGetApproxHessian(problem) % Note: we do not give a warning if an approximate Hessian is % explicitly given in the problem description, as in that case the % user seems to be aware of the issue. warning('manopt:getHessian:approx', ... ['No Hessian provided. Using an FD approximation instead.\n' ... 'To disable this warning: warning(''off'', ''manopt:getHessian:approx'')']); problem.approxhess = approxhessianFD(problem); end % Set local defaults here, and merge with user options, if any. localdefaults = struct(); if ~exist('options', 'var') || isempty(options) options = struct(); end options = mergeOptions(localdefaults, options); % Build and return the function handle here. This extra construct via % funhandle makes it possible to make storedb and key optional. preconfun = @funhandle; function precxdot = funhandle(x, xdot, storedb, key) % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end precxdot = hessiansolvehelper(options, problem, x, xdot, ... storedb, key); end end function precxdot = hessiansolvehelper(options, problem, x, xdot, storedb, key) % This function does the actual work. % Exclude the case where xdot is zero norm_xdot = problem.M.norm(x, xdot); if norm_xdot < eps precxdot = problem.M.zerovec(x); return; end % Get a shorthand for the Hessian of the cost on M at x. hessian = @(u) getHessian(problem, x, u, storedb, key); % Setup an optimization problem on the tangent space to problem.M at x. M = problem.M; tgtspace = tangentspacefactory(M, x); prblm.M = tgtspace; prblm.cost = @cost; prblm.grad = @grad; prblm.hess = @(u, udot) 2*hessian(hessian(udot))/norm_xdot; function [f, store] = cost(u, store) if ~isfield(store, 'residue') Hu = hessian(u); store.residue = M.lincomb(x, 1, Hu, -1, xdot); end f = M.norm(x, store.residue).^2 / norm_xdot; end function [g, store] = grad(u, store) if ~isfield(store, 'residue') Hu = hessian(u); store.residue = M.lincomb(x, 1, Hu, -1, xdot); end g = 2 * hessian(store.residue) / norm_xdot; end % checkgradient(prblm); pause; % checkhessian(prblm); pause; localdefaults.solver = @trustregions; localdefaults.verbosity = 0; % Merge local defaults with user options, if any. if ~exist('options', 'var') || isempty(options) options = struct(); end options = mergeOptions(localdefaults, options); % Solve the linear system by solving the optimization problem. precxdot = manoptsolve(prblm, M.zerovec(), options); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/pso/pso.m ================================================ function [xbest, fbest, info, options] = pso(problem, x, options) % Particle swarm optimization (PSO) for derivative-free minimization. % % function [x, cost, info, options] = pso(problem) % function [x, cost, info, options] = pso(problem, x0) % function [x, cost, info, options] = pso(problem, x0, options) % function [x, cost, info, options] = pso(problem, [], options) % % Apply the Particle Swarm Optimization minimization algorithm to % the problem defined in the problem structure, starting with the % population x0 if it is provided (otherwise, a random population on the % manifold is generated). A population is a cell containing points on the % manifold. The number of elements in the cell must match the parameter % options.populationsize. % % To specify options whilst not specifying an initial guess, give x0 as [] % (the empty matrix). % % None of the options are mandatory. See in code for details. % % Based on the original PSO description in % http://particleswarm.info/nn951942.ps. % % See also: manopt/solvers/neldermead/neldermead % This file is part of Manopt: www.manopt.org. % Original author: Pierre Borckmans, Dec. 30, 2012. % Contributors: Bamdev Mishra, June 18, 2014. % Change log: % % June 18, 2014 (BM) : % Modified for handling product manifolds. Still need overall cleanup % to avoid potential issues, in particular wrt logarithms. % % June 23, 2014 (NB) : % Added some logic for handling of the populationsize option. % % April 5, 2015 (NB): % Working with the new StoreDB class system. The code keeps track of % storedb keys for all points, even though it is not strictly % necessary. This extra bookkeeping should help maintaining the code. % Verify that the problem description is sufficient for the solver. if ~canGetCost(problem) warning('manopt:getCost', ... 'No cost provided. The algorithm will likely abort.'); end % Dimension of the manifold dim = problem.M.dim(); % Set local defaults here localdefaults.storedepth = 0; % no need for caching localdefaults.maxcostevals = max(5000, 2*dim); localdefaults.maxiter = max(500, 4*dim); localdefaults.populationsize = min(40, 10*dim); localdefaults.nostalgia = 1.4; localdefaults.social = 1.4; % Merge global and local defaults, then merge w/ user options, if any. localdefaults = mergeOptions(getGlobalDefaults(), localdefaults); if ~exist('options', 'var') || isempty(options) options = struct(); end options = mergeOptions(localdefaults, options); if ~isfield(problem.M, 'log') % BM error(['The manifold problem.M must provide a logarithmic map, ' ... 'M.log(x, y). An approximate logarithm will do too.']); end % Start timing for initialization timetic = tic(); % If no initial population x is given by the user, % generate one at random. if ~exist('x', 'var') || isempty(x) x = cell(options.populationsize, 1); for i = 1 : options.populationsize x{i} = problem.M.rand(); end else if ~iscell(x) error('The initial guess x0 must be a cell (a population).'); end if length(x) ~= options.populationsize options.populationsize = length(x); warning('manopt:pso:size', ... ['The option populationsize was forced to the size' ... ' of the given initial population x0.']); end end % Create a store database and a key for each point x{i} storedb = StoreDB(options.storedepth); xkey = cell(size(x)); for i = 1 : numel(x) xkey{i} = storedb.getNewKey(); end % Initialize personal best positions to the initial population y = x; ykey = xkey; % Save a copy of the swarm at the previous iteration xprev = x; xprevkey = xkey; %#ok % Initialize velocities for each particle v = cell(size(x)); for i = 1 : numel(x) % random velocity to improve initial exploration v{i} = problem.M.randvec(x{i}); % or null velocity % v{i} = problem.M.zerovec(); end % Compute cost for each particle xi, % initialize personal best costs, % and setup a function evaluations counter. costs = zeros(size(x)); for i = 1 : numel(x) costs(i) = getCost(problem, x{i}, storedb, xkey{i}); end fy = costs; costevals = options.populationsize; % Identify the best particle and store its cost/position [fbest, imin] = min(costs); xbest = x{imin}; xbestkey = xkey{imin}; %#ok % Iteration counter (at any point, iter is the number of fully executed % iterations so far) iter = 0; % Save stats in a struct array info, and preallocate. % savestats will be called twice for the initial iterate (number 0), % which is unfortunate, but not problematic. stats = savestats(); info(1) = stats; info(min(10000, options.maxiter+1)).iter = []; % Start iterating until stopping criterion triggers while true stats = savestats(); info(iter+1) = stats; %#ok iter = iter + 1; % Make sure we don't use too much memory for the store database storedb.purge(); % Log / display iteration information here. if options.verbosity >= 2 fprintf('Cost evals: %7d\tBest cost: %+.8e\n', costevals, fbest); end % Start timing this iteration timetic = tic(); % BM: Run standard stopping criterion checks. % BM: Stop if any particle triggers a stopping criterion. for i = numel(x) [stop, reason] = stoppingcriterion(problem, x{i}, options, info, iter); if stop break; end end if stop if options.verbosity >= 1 fprintf([reason '\n']); end break; end % Compute the inertia factor % (linearly decreasing from .9 to .4, from iter=0 to maxiter) w = 0.4 + 0.5*(1-iter/options.maxiter); % Compute velocities for i = 1 : numel(x) % Get the position and past best position of particle i xi = x{i}; yi = y{i}; % Get the previous position and velocity of particle i xiprev = xprev{i}; vi = v{i}; % Compute new velocity of particle i, % composed of 3 contributions inertia = problem.M.lincomb(xi, w , problem.M.transp(xiprev, xi, vi)); nostalgia = problem.M.lincomb(xi, rand(1)*options.nostalgia, problem.M.log(xi, yi) ); social = problem.M.lincomb(xi, rand(1) * options.social, problem.M.log(xi, xbest)); v{i} = problem.M.lincomb(xi, 1, inertia, 1, problem.M.lincomb(xi, 1, nostalgia, 1, social)); end % Backup the current swarm positions xprev = x; xprevkey = xkey; %#ok % Update positions, personal bests and global best for i = 1 : numel(x) % compute new position of particle i x{i} = problem.M.retr(x{i}, v{i}); xkey{i} = storedb.getNewKey(); % compute new cost of particle i fxi = getCost(problem, x{i}, storedb, xkey{i}); costevals = costevals + 1; % update costs of the swarm costs(i) = fxi; % update self-best if necessary if fxi < fy(i) % update self-best cost and position fy(i) = fxi; y{i} = x{i}; ykey{i} = xkey{i}; % update global-best if necessary if fy(i) < fbest fbest = fy(i); xbest = y{i}; xbestkey = ykey{i}; %#ok end end end end info = info(1:iter); % Routine in charge of collecting the current iteration stats function stats = savestats() stats.iter = iter; stats.cost = fbest; stats.costevals = costevals; stats.x = x; stats.v = v; stats.xbest = xbest; if iter == 0 stats.time = toc(timetic); else stats.time = info(iter).time + toc(timetic); end % BM: Begin storing user defined stats for the entire population num_old_fields = size(fieldnames(stats), 1); trialstats = applyStatsfun(problem, x{1}, storedb, xkey{1}, options, stats);% BM new_fields = fieldnames(trialstats); num_new_fields = size(fieldnames(trialstats), 1); num_additional_fields = num_new_fields - num_old_fields; % User has defined new fields for jj = 1 : num_additional_fields % New fields added tempfield = new_fields(num_old_fields + jj); stats.(char(tempfield)) = cell(options.populationsize, 1); end for ii = 1 : options.populationsize % Adding information for each element of the population tempstats = applyStatsfun(problem, x{ii}, storedb, xkey{ii}, options, stats); for jj = 1 : num_additional_fields tempfield = new_fields(num_old_fields + jj); tempfield_value = tempstats.(char(tempfield)); stats.(char(tempfield)){ii} = tempfield_value; end end % BM: End storing end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/steepestdescent/steepestdescent.m ================================================ function [x, cost, info, options] = steepestdescent(problem, x, options) % Steepest descent (gradient descent) minimization algorithm for Manopt. % % function [x, cost, info, options] = steepestdescent(problem) % function [x, cost, info, options] = steepestdescent(problem, x0) % function [x, cost, info, options] = steepestdescent(problem, x0, options) % function [x, cost, info, options] = steepestdescent(problem, [], options) % % Apply the steepest descent minimization algorithm to the problem defined % in the problem structure, starting at x0 if it is provided (otherwise, at % a random point on the manifold). To specify options whilst not specifying % an initial guess, give x0 as [] (the empty matrix). % % In most of the examples bundled with the toolbox (see link below), the % solver can be replaced by the present one if need be. % % The outputs x and cost are the best reached point on the manifold and its % cost. The struct-array info contains information about the iterations: % iter : the iteration number (0 for the initial guess) % cost : cost value % time : elapsed time in seconds % gradnorm : Riemannian norm of the gradient % stepsize : norm of the last tangent vector retracted % linesearch : information logged by options.linesearch % And possibly additional information logged by options.statsfun. % For example, type [info.gradnorm] to obtain a vector of the successive % gradient norms reached. % % The options structure is used to overwrite the default values. All % options have a default value and are hence optional. To force an option % value, pass an options structure with a field options.optionname, where % optionname is one of the following and the default value is indicated % between parentheses: % % tolgradnorm (1e-6) % The algorithm terminates if the norm of the gradient drops below this. % maxiter (1000) % The algorithm terminates if maxiter iterations have been executed. % maxtime (Inf) % The algorithm terminates if maxtime seconds elapsed. % minstepsize (1e-10) % The algorithm terminates if the linesearch returns a displacement % vector (to be retracted) smaller in norm than this value. % linesearch (@linesearch or @linesearch_hint) % Function handle to a line search function. The options structure is % passed to the line search too, so you can pass it parameters. See % each line search's documentation for info. Another available line % search in manopt is @linesearch_adaptive, in % /manopt/linesearch/linesearch_adaptive.m % If the problem structure includes a line search hint, then the % default line search used is @linesearch_hint. % statsfun (none) % Function handle to a function that will be called after each % iteration to provide the opportunity to log additional statistics. % They will be returned in the info struct. See the generic Manopt % documentation about solvers for further information. % stopfun (none) % Function handle to a function that will be called at each iteration % to provide the opportunity to specify additional stopping criteria. % See the generic Manopt documentation about solvers for further % information. % verbosity (3) % Integer number used to tune the amount of output the algorithm % generates during execution (mostly as text in the command window). % The higher, the more output. 0 means silent. % storedepth (2) % Maximum number of different points x of the manifold for which a % store structure will be kept in memory in the storedb. If the % caching features of Manopt are not used, this is irrelevant. For % the SD algorithm, a store depth of 2 should always be sufficient. % % % See also: conjugategradient trustregions manopt/solvers/linesearch manopt/examples % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % % April 3, 2015 (NB): % Works with the new StoreDB class system. % Verify that the problem description is sufficient for the solver. if ~canGetCost(problem) warning('manopt:getCost', ... 'No cost provided. The algorithm will likely abort.'); end if ~canGetGradient(problem) && ~canGetApproxGradient(problem) % Note: we do not give a warning if an approximate gradient is % explicitly given in the problem description, as in that case the % user seems to be aware of the issue. warning('manopt:getGradient:approx', ... ['No gradient provided. Using an FD approximation instead (slow).\n' ... 'It may be necessary to increase options.tolgradnorm.\n' ... 'To disable this warning: warning(''off'', ''manopt:getGradient:approx'')']); problem.approxgrad = approxgradientFD(problem); end % Set local defaults here localdefaults.minstepsize = 1e-10; localdefaults.maxiter = 1000; localdefaults.tolgradnorm = 1e-6; % Depending on whether the problem structure specifies a hint for % line-search algorithms, choose a default line-search that works on % its own (typical) or that uses the hint. if ~canGetLinesearch(problem) localdefaults.linesearch = @linesearch; else localdefaults.linesearch = @linesearch_hint; end % Merge global and local defaults, then merge w/ user options, if any. localdefaults = mergeOptions(getGlobalDefaults(), localdefaults); if ~exist('options', 'var') || isempty(options) options = struct(); end options = mergeOptions(localdefaults, options); timetic = tic(); % If no initial point x is given by the user, generate one at random. if ~exist('x', 'var') || isempty(x) x = problem.M.rand(); end % Create a store database and get a key for the current x storedb = StoreDB(options.storedepth); key = storedb.getNewKey(); % Compute objective-related quantities for x [cost, grad] = getCostGrad(problem, x, storedb, key); gradnorm = problem.M.norm(x, grad); % Iteration counter. % At any point, iter is the number of fully executed iterations so far. iter = 0; % Save stats in a struct array info, and preallocate. stats = savestats(); info(1) = stats; info(min(10000, options.maxiter+1)).iter = []; if options.verbosity >= 2 fprintf(' iter\t cost val\t grad. norm\n'); end % Start iterating until stopping criterion triggers while true % Display iteration information if options.verbosity >= 2 fprintf('%5d\t%+.16e\t%.8e\n', iter, cost, gradnorm); end % Start timing this iteration timetic = tic(); % Run standard stopping criterion checks [stop, reason] = stoppingcriterion(problem, x, options, ... info, iter+1); % If none triggered, run specific stopping criterion check if ~stop && stats.stepsize < options.minstepsize stop = true; reason = sprintf(['Last stepsize smaller than minimum ' ... 'allowed; options.minstepsize = %g.'], ... options.minstepsize); end if stop if options.verbosity >= 1 fprintf([reason '\n']); end break; end % Pick the descent direction as minus the gradient desc_dir = problem.M.lincomb(x, -1, grad); % Execute the line search [stepsize, newx, newkey, lsstats] = options.linesearch( ... problem, x, desc_dir, cost, -gradnorm^2, ... options, storedb, key); % Compute the new cost-related quantities for x [newcost, newgrad] = getCostGrad(problem, newx, storedb, newkey); newgradnorm = problem.M.norm(newx, newgrad); % Make sure we don't use too much memory for the store database storedb.purge(); % Transfer iterate info x = newx; key = newkey; cost = newcost; grad = newgrad; gradnorm = newgradnorm; % iter is the number of iterations we have accomplished. iter = iter + 1; % Log statistics for freshly executed iteration stats = savestats(); info(iter+1) = stats; end info = info(1:iter+1); if options.verbosity >= 1 fprintf('Total time is %f [s] (excludes statsfun)\n', ... info(end).time); end % Routine in charge of collecting the current iteration stats function stats = savestats() stats.iter = iter; stats.cost = cost; stats.gradnorm = gradnorm; if iter == 0 stats.stepsize = NaN; stats.time = toc(timetic); stats.linesearch = []; else stats.stepsize = stepsize; stats.time = info(iter).time + toc(timetic); stats.linesearch = lsstats; end stats = applyStatsfun(problem, x, storedb, key, options, stats); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/stochasticgradient/stepsize_sg.m ================================================ function [stepsize, newx, newkey, ssstats] = ... stepsize_sg(problem, x, d, iter, options, storedb, key) %#ok % Standard step size selection algorithm for the stochastic gradient method % % Given a problem structure, a point x on the manifold problem.d and a % tangent vector d at x, produces a stepsize (a positive real number) and a % new point newx obtained by retraction -stepsize*d at x. Additional inputs % include iter (the iteration number of x, where 0 marks the initial % guess), an options structure, a storedb database and the key of point x % in that database. Additional outputs include the key of newx in the % database, newkey, as well as a structure ssstats collecting statistics % about the work done during the call to this function. % % See in code for the role of available options: % options.stepsize_type % options.stepsize_init % options.stepsize_lambda % options.stepsize_decaysteps % % This function may create and maintain a structure called sssgmem inside % storedb.internal. This gives the function the opportunity to remember % what happened in previous calls. % % See also: stochasticgradient % This file is part of Manopt: www.manopt.org. % Original authors: Bamdev Mishra and Nicolas Boumal, March 30, 2017. % Contributors: Hiroyuki Kasai and Hiroyuki Sato. % Change log: % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); %#ok end % Initial stepsize guess. default_options.stepsize_init = 0.1; % Stepsize evolution type. Options are 'decay', 'fix' and 'hybrid'. default_options.stepsize_type = 'decay'; % If stepsize_type = 'decay' or 'hybrid', lambda is a weighting factor. default_options.stepsize_lambda = 0.1; % If stepsize_type = 'hybrid', decaysteps states for how many % iterations the step size decays before becoming constant. default_options.stepsize_decaysteps = 100; if ~exist('options', 'var') || isempty(options) options = struct(); end options = mergeOptions(default_options, options); type = options.stepsize_type; init = options.stepsize_init; lambda = options.stepsize_lambda; decaysteps = options.stepsize_decaysteps; switch lower(type) % Step size decays as O(1/iter). case 'decay' stepsize = init / (1 + init*lambda*iter); % Step size is fixed. case {'fix', 'fixed'} stepsize = init; % Step size decays only for the few initial iterations. case 'hybrid' if iter < decaysteps stepsize = init / (1 + init*lambda*iter); else stepsize = init / (1 + init*lambda*decaysteps); end otherwise error(['Unknown options.stepsize_type. ' ... 'Should be ''fix'', ''decay'' or ''hybrid''.']); end % Store some information. ssstats = struct(); ssstats.stepsize = stepsize; % Compute the new point and give it a key. newx = problem.M.retr(x, d, -stepsize); newkey = storedb.getNewKey(); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/stochasticgradient/stochasticgradient.m ================================================ function [x, info, options] = stochasticgradient(problem, x, options) % Stochastic gradient (SG) minimization algorithm for Manopt. % % function [x, info, options] = stochasticgradient(problem) % function [x, info, options] = stochasticgradient(problem, x0) % function [x, info, options] = stochasticgradient(problem, x0, options) % function [x, info, options] = stochasticgradient(problem, [], options) % % Apply the Riemannian stochastic gradient algorithm to the problem defined % in the problem structure, starting at x0 if it is provided (otherwise, at % a random point on the manifold). To specify options whilst not specifying % an initial guess, give x0 as [] (the empty matrix). % % The problem structure must contain the following fields: % % problem.M: % Defines the manifold to optimize over, given by a factory. % % problem.partialgrad or problem.partialegrad (or equivalent) % Describes the partial gradients of the cost function. If the cost % function is of the form f(x) = sum_{k=1}^N f_k(x), % then partialegrad(x, K) = sum_{k \in K} grad f_k(x). % As usual, partialgrad must define the Riemannian gradient, whereas % partialegrad defines a Euclidean (classical) gradient which will be % converted automatically to a Riemannian gradient. Use the tool % checkgradient(problem) to check it. % % problem.ncostterms % An integer specifying how many terms are in the cost function (in % the example above, that would be N.) % % Importantly, the cost function itself needs not be specified. % % Some of the options of the solver are specific to this file. Please have % a look inside the code. % % To record the value of the cost function or the norm of the gradient for % example (which are statistics the algorithm does not require and hence % does not compute by default), one can set the following options: % % metrics.cost = @(problem, x) getCost(problem, x); % metrics.gradnorm = @(problem, x) problem.M.norm(x, getGradient(problem, x)); % options.statsfun = statsfunhelper(metrics); % % Important caveat: stochastic algorithms usually return an average of the % last few iterates. Computing averages on manifolds can be expensive. % Currently, this solver does not compute averages and simply returns the % last iterate. Using options.statsfun, it is possible for the user to % compute averages manually. If you have ideas on how to do this % generically, we welcome feedback. In particular, approximate means could % be computed with M.pairmean which is available in many geometries. % % See also: steepestdescent % This file is part of Manopt: www.manopt.org. % Original authors: Bamdev Mishra , % Hiroyuki Kasai , and % Hiroyuki Sato , 22 April 2016. % Contributors: Nicolas Boumal % Change log: % Verify that the problem description is sufficient for the solver. if ~canGetPartialGradient(problem) warning('manopt:getPartialGradient', ... 'No partial gradient provided. The algorithm will likely abort.'); end % Set local default localdefaults.maxiter = 1000; % Maximum number of iterations localdefaults.batchsize = 1; % Batchsize (# cost terms per iter) localdefaults.verbosity = 2; % Output verbosity (0, 1 or 2) localdefaults.storedepth = 20; % Limit amount of caching % Check stopping criteria and save stats every checkperiod iterations. localdefaults.checkperiod = 100; % stepsizefun is a function implementing a step size selection % algorithm. See that function for help with options, which can be % specified in the options structure passed to the solver directly. localdefaults.stepsizefun = @stepsize_sg; % Merge global and local defaults, then merge w/ user options, if any. localdefaults = mergeOptions(getGlobalDefaults(), localdefaults); if ~exist('options', 'var') || isempty(options) options = struct(); end options = mergeOptions(localdefaults, options); assert(options.checkperiod >= 1, ... 'options.checkperiod must be a positive integer (>= 1).'); % If no initial point x is given by the user, generate one at random. if ~exist('x', 'var') || isempty(x) x = problem.M.rand(); end % Create a store database and get a key for the current x storedb = StoreDB(options.storedepth); key = storedb.getNewKey(); % Elapsed time for the current set of iterations, where a set of % iterations comprises options.checkperiod iterations. We do not % count time spent for such things as logging statistics, as these are % not relevant to the actual optimization process. elapsed_time = 0; % Total number of completed steps iter = 0; % Total number of saved stats at this point. savedstats = 0; % Collect and save stats in a struct array info, and preallocate. stats = savestats(); info(1) = stats; savedstats = savedstats + 1; if isinf(options.maxiter) % We trust that if the user set maxiter = inf, then they defined % another stopping criterion. preallocate = 1e5; else preallocate = ceil(options.maxiter / options.checkperiod) + 1; end info(preallocate).iter = []; % Display information header for the user. if options.verbosity >= 2 fprintf(' iter time [s] step size\n'); end % Main loop. stop = false; while iter < options.maxiter % Record start time. start_time = tic(); % Draw the samples with replacement. idx_batch = randi(problem.ncostterms, options.batchsize, 1); % Compute partial gradient on this batch. pgrad = getPartialGradient(problem, x, idx_batch, storedb, key); % Compute a step size and the corresponding new point x. [stepsize, newx, newkey, ssstats] = ... options.stepsizefun(problem, x, pgrad, iter, ... options, storedb, key); % Make the step. x = newx; key = newkey; % Total number of completed steps. iter = iter + 1; % Make sure we do not use too much memory for the store database. storedb.purge(); % Elapsed time doing actual optimization work so far in this % set of options.checkperiod iterations. elapsed_time = elapsed_time + toc(start_time); % Check stopping criteria and save stats every checkperiod iters. if mod(iter, options.checkperiod) == 0 % Log statistics for freshly executed iteration. stats = savestats(); info(savedstats+1) = stats; savedstats = savedstats + 1; % Reset timer. elapsed_time = 0; % Print output. if options.verbosity >= 2 fprintf('%8d %10.2f %.3e\n', ... iter, stats.time, stepsize); end % Run standard stopping criterion checks. [stop, reason] = stoppingcriterion(problem, x, ... options, info, savedstats); if stop if options.verbosity >= 1 fprintf([reason '\n']); end break; end end end % Keep only the relevant portion of the info struct-array. info = info(1:savedstats); % Display a final information message. if options.verbosity >= 1 if ~stop % We stopped not because of stoppingcriterion but because the % loop came to an end, which means maxiter triggered. msg = 'Max iteration count reached; options.maxiter = %g.\n'; fprintf(msg, options.maxiter); end fprintf('Total time is %f [s] (excludes statsfun)\n', ... info(end).time + elapsed_time); end % Helper function to collect statistics to be saved at % index checkperiodcount+1 in info. function stats = savestats() stats.iter = iter; if savedstats == 0 stats.time = 0; stats.stepsize = NaN; stats.stepsize_stats = []; else stats.time = info(savedstats).time + elapsed_time; stats.stepsize = stepsize; stats.stepsize_stats = ssstats; end stats = applyStatsfun(problem, x, storedb, key, options, stats); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/trustregions/license for original GenRTR code.txt ================================================ Copyright (c) 2007,2012 Christopher G. Baker, Pierre-Antoine Absil, Kyle A. Gallivan All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of the contributors nor of their affiliated institutions may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. For questions, please contact Chris Baker (chris@cgbaker.net) ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/trustregions/tCG.m ================================================ function [eta, Heta, inner_it, stop_tCG] ... = tCG(problem, x, grad, eta, Delta, options, storedb, key) % tCG - Truncated (Steihaug-Toint) Conjugate-Gradient method % minimize + .5* % subject to _[inverse precon] <= Delta^2 % % See also: trustregions % This file is part of Manopt: www.manopt.org. % This code is an adaptation to Manopt of the original GenRTR code: % RTR - Riemannian Trust-Region % (c) 2004-2007, P.-A. Absil, C. G. Baker, K. A. Gallivan % Florida State University % School of Computational Science % (http://www.math.fsu.edu/~cbaker/GenRTR/?page=download) % See accompanying license file. % The adaptation was executed by Nicolas Boumal. % % Change log: % % NB Feb. 12, 2013: % We do not project r back to the tangent space anymore: it was not % necessary, and as of Manopt 1.0.1, the proj operator does not % coincide with this notion anymore. % % NB April 3, 2013: % tCG now also returns Heta, the Hessian at x along eta. Additional % esthetic modifications. % % NB Dec. 2, 2013: % If options.useRand is activated, we now make sure the preconditio- % ner is not used, as was originally intended in GenRTR. In time, we % may want to investigate whether useRand can be modifed to work well % with preconditioning too. % % NB Jan. 9, 2014: % Now checking explicitly for model decrease at each iteration. The % first iteration is a Cauchy point, which necessarily realizes a % decrease of the model cost. If a model increase is witnessed % (which is theoretically impossible if a linear operator is used for % the Hessian approximation), then we return the previous eta. This % ensures we always achieve at least the Cauchy decrease, which % should be sufficient for convergence. % % NB Feb. 17, 2015: % The previous update was in effect verifying that the current eta % performed at least as well as the first eta (the Cauchy step) with % respect to the model cost. While this is an acceptable strategy, % the documentation (and the original intent) was to ensure a % monotonic decrease of the model cost at each new eta. This is now % the case, with the added line: "model_value = new_model_value;". % % NB April 3, 2015: % Works with the new StoreDB class system. % All terms involving the trust-region radius will use an inner product % w.r.t. the preconditioner; this is because the iterates grow in % length w.r.t. the preconditioner, guaranteeing that we will not % re-enter the trust-region. % % The following recurrences for Prec-based norms and inner % products come from [CGT2000], pg. 205, first edition. % Below, P is the preconditioner. % % = % beta_k-1 * ( + alpha_k-1 |delta_k-1|^2_P ) % |delta_k|^2_P = + beta_k-1^2 |delta_k-1|^2_P % % therefore, we need to keep track of % 1) |delta_k|^2_P % 2) = _P % 3) |eta_k |^2_P % % initial values are given by: % |delta_0|_P = % |eta_0|_P = 0 % _P = 0 % because we take eta_0 = 0 (if useRand = false). % % [CGT2000] Conn, Gould and Toint: Trust-region methods, 2000. inner = problem.M.inner; lincomb = problem.M.lincomb; theta = options.theta; kappa = options.kappa; if ~options.useRand % and therefore, eta == 0 Heta = problem.M.zerovec(x); r = grad; e_Pe = 0; else % and therefore, no preconditioner % eta (presumably) ~= 0 was provided by the caller. Heta = getHessian(problem, x, eta, storedb, key); r = lincomb(x, 1, grad, 1, Heta); e_Pe = inner(x, eta, eta); end r_r = inner(x, r, r); norm_r = sqrt(r_r); norm_r0 = norm_r; % Precondition the residual. if ~options.useRand z = getPrecon(problem, x, r, storedb, key); else z = r; end % Compute z'*r. z_r = inner(x, z, r); d_Pd = z_r; % Initial search direction. delta = lincomb(x, -1, z); if ~options.useRand % and therefore, eta == 0 e_Pd = 0; else % and therefore, no preconditioner e_Pd = inner(x, eta, delta); end % If the Hessian or a linear Hessian approximation is in use, it is % theoretically guaranteed that the model value decreases strictly % with each iteration of tCG. Hence, there is no need to monitor the model % value. But, when a nonlinear Hessian approximation is used (such as the % built-in finite-difference approximation for example), the model may % increase. It is then important to terminate the tCG iterations and return % the previous (the best-so-far) iterate. The variable below will hold the % model value. model_fun = @(eta, Heta) inner(x, eta, grad) + .5*inner(x, eta, Heta); if ~options.useRand model_value = 0; else model_value = model_fun(eta, Heta); end % Pre-assume termination because j == end. stop_tCG = 5; % Begin inner/tCG loop. j = 0; for j = 1 : options.maxinner % This call is the computationally expensive step. Hdelta = getHessian(problem, x, delta, storedb, key); % Compute curvature (often called kappa). d_Hd = inner(x, delta, Hdelta); % Note that if d_Hd == 0, we will exit at the next "if" anyway. alpha = z_r/d_Hd; % _P = % _P + 2*alpha*_P + alpha*alpha*_P e_Pe_new = e_Pe + 2.0*alpha*e_Pd + alpha*alpha*d_Pd; if options.debug > 2, fprintf('DBG: (r,r) : %e\n', r_r); fprintf('DBG: (d,Hd) : %e\n', d_Hd); fprintf('DBG: alpha : %e\n', alpha); end % Check against negative curvature and trust-region radius violation. % If either condition triggers, we bail out. if d_Hd <= 0 || e_Pe_new >= Delta^2, % want % ee = _prec,x % ed = _prec,x % dd = _prec,x tau = (-e_Pd + sqrt(e_Pd*e_Pd + d_Pd*(Delta^2-e_Pe))) / d_Pd; if options.debug > 2, fprintf('DBG: tau : %e\n', tau); end eta = lincomb(x, 1, eta, tau, delta); % If only a nonlinear Hessian approximation is available, this is % only approximately correct, but saves an additional Hessian call. Heta = lincomb(x, 1, Heta, tau, Hdelta); % Technically, we may want to verify that this new eta is indeed % better than the previous eta before returning it (this is always % the case if the Hessian approximation is linear, but I am unsure % whether it is the case or not for nonlinear approximations.) % At any rate, the impact should be limited, so in the interest of % code conciseness (if we can still hope for that), we omit this. if d_Hd <= 0, stop_tCG = 1; % negative curvature else stop_tCG = 2; % exceeded trust region end break; end % No negative curvature and eta_prop inside TR: accept it. e_Pe = e_Pe_new; new_eta = lincomb(x, 1, eta, alpha, delta); % If only a nonlinear Hessian approximation is available, this is % only approximately correct, but saves an additional Hessian call. new_Heta = lincomb(x, 1, Heta, alpha, Hdelta); % Verify that the model cost decreased in going from eta to new_eta. If % it did not (which can only occur if the Hessian approximation is % nonlinear or because of numerical errors), then we return the % previous eta (which necessarily is the best reached so far, according % to the model cost). Otherwise, we accept the new eta and go on. new_model_value = model_fun(new_eta, new_Heta); if new_model_value >= model_value stop_tCG = 6; break; end eta = new_eta; Heta = new_Heta; model_value = new_model_value; %% added Feb. 17, 2015 % Update the residual. r = lincomb(x, 1, r, alpha, Hdelta); % Compute new norm of r. r_r = inner(x, r, r); norm_r = sqrt(r_r); % Check kappa/theta stopping criterion. % Note that it is somewhat arbitrary whether to check this stopping % criterion on the r's (the gradients) or on the z's (the % preconditioned gradients). [CGT2000], page 206, mentions both as % acceptable criteria. if j >= options.mininner && norm_r <= norm_r0*min(norm_r0^theta, kappa) % Residual is small enough to quit if kappa < norm_r0^theta, stop_tCG = 3; % linear convergence else stop_tCG = 4; % superlinear convergence end break; end % Precondition the residual. if ~options.useRand z = getPrecon(problem, x, r, storedb, key); else z = r; end % Save the old z'*r. zold_rold = z_r; % Compute new z'*r. z_r = inner(x, z, r); % Compute new search direction. beta = z_r/zold_rold; delta = lincomb(x, -1, z, beta, delta); % Update new P-norms and P-dots [CGT2000, eq. 7.5.6 & 7.5.7]. e_Pd = beta*(e_Pd + alpha*d_Pd); d_Pd = z_r + beta*beta*d_Pd; end % of tCG loop inner_it = j; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/trustregions/trustregions.m ================================================ function [x, cost, info, options] = trustregions(problem, x, options) % Riemannian trust-regions solver for optimization on manifolds. % % function [x, cost, info, options] = trustregions(problem) % function [x, cost, info, options] = trustregions(problem, x0) % function [x, cost, info, options] = trustregions(problem, x0, options) % function [x, cost, info, options] = trustregions(problem, [], options) % % This is the Riemannian Trust-Region solver (with tCG inner solve), named % RTR. This solver will attempt to minimize the cost function described in % the problem structure. It requires the availability of the cost function % and of its gradient. It will issue calls for the Hessian. If no Hessian % nor approximate Hessian is provided, a standard approximation of the % Hessian based on the gradient will be computed. If a preconditioner for % the Hessian is provided, it will be used. % % If no gradient is provided, an approximation of the gradient is computed, % but this can be slow for manifolds of high dimension. % % For a description of the algorithm and theorems offering convergence % guarantees, see the references below. Documentation for this solver is % available online at: % % http://www.manopt.org/solver_documentation_trustregions.html % % % The initial iterate is x0 if it is provided. Otherwise, a random point on % the manifold is picked. To specify options whilst not specifying an % initial iterate, give x0 as [] (the empty matrix). % % The two outputs 'x' and 'cost' are the last reached point on the manifold % and its cost. Notice that x is not necessarily the best reached point, % because this solver is not forced to be a descent method. In particular, % very close to convergence, it is sometimes preferable to accept very % slight increases in the cost value (on the order of the machine epsilon) % in the process of reaching fine convergence. % % The output 'info' is a struct-array which contains information about the % iterations: % iter (integer) % The (outer) iteration number, or number of steps considered % (whether accepted or rejected). The initial guess is 0. % cost (double) % The corresponding cost value. % gradnorm (double) % The (Riemannian) norm of the gradient. % numinner (integer) % The number of inner iterations executed to compute this iterate. % Inner iterations are truncated-CG steps. Each one requires a % Hessian (or approximate Hessian) evaluation. % time (double) % The total elapsed time in seconds to reach the corresponding cost. % rho (double) % The performance ratio for the iterate. % rhonum, rhoden (double) % Regularized numerator and denominator of the performance ratio: % rho = rhonum/rhoden. See options.rho_regularization. % accepted (boolean) % Whether the proposed iterate was accepted or not. % stepsize (double) % The (Riemannian) norm of the vector returned by the inner solver % tCG and which is retracted to obtain the proposed next iterate. If % accepted = true for the corresponding iterate, this is the size of % the step from the previous to the new iterate. If accepted is % false, the step was not executed and this is the size of the % rejected step. % Delta (double) % The trust-region radius at the outer iteration. % cauchy (boolean) % Whether the Cauchy point was used or not (if useRand is true). % And possibly additional information logged by options.statsfun. % For example, type [info.gradnorm] to obtain a vector of the successive % gradient norms reached at each (outer) iteration. % % The options structure is used to overwrite the default values. All % options have a default value and are hence optional. To force an option % value, pass an options structure with a field options.optionname, where % optionname is one of the following and the default value is indicated % between parentheses: % % tolgradnorm (1e-6) % The algorithm terminates if the norm of the gradient drops below % this. For well-scaled problems, a rule of thumb is that you can % expect to reduce the gradient norm by 8 orders of magnitude % (sqrt(eps)) compared to the gradient norm at a "typical" point (a % rough initial iterate for example). Further decrease is sometimes % possible, but inexact floating point arithmetic will eventually % limit the final accuracy. If tolgradnorm is set too low, the % algorithm may end up iterating forever (or at least until another % stopping criterion triggers). % maxiter (1000) % The algorithm terminates if maxiter (outer) iterations were executed. % maxtime (Inf) % The algorithm terminates if maxtime seconds elapsed. % miniter (3) % Minimum number of outer iterations (used only if useRand is true). % mininner (1) % Minimum number of inner iterations (for tCG). % maxinner (problem.M.dim() : the manifold's dimension) % Maximum number of inner iterations (for tCG). % Delta_bar (problem.M.typicaldist() or sqrt(problem.M.dim())) % Maximum trust-region radius. If you specify this parameter but not % Delta0, then Delta0 will be set to 1/8 times this parameter. % Delta0 (Delta_bar/8) % Initial trust-region radius. If you observe a long plateau at the % beginning of the convergence plot (gradient norm VS iteration), it % may pay off to try to tune this parameter to shorten the plateau. % You should not set this parameter without setting Delta_bar too (at % a larger value). % useRand (false) % Set to true if the trust-region solve is to be initiated with a % random tangent vector. If set to true, no preconditioner will be % used. This option is set to true in some scenarios to escape saddle % points, but is otherwise seldom activated. % kappa (0.1) % tCG inner kappa convergence tolerance. % kappa > 0 is the linear convergence target rate: tCG will terminate % early if the residual was reduced by a factor of kappa. % theta (1.0) % tCG inner theta convergence tolerance. % 1+theta (theta between 0 and 1) is the superlinear convergence % target rate. tCG will terminate early if the residual was reduced % by a power of 1+theta. % rho_prime (0.1) % Accept/reject threshold : if rho is at least rho_prime, the outer % iteration is accepted. Otherwise, it is rejected. In case it is % rejected, the trust-region radius will have been decreased. % To ensure this, rho_prime >= 0 must be strictly smaller than 1/4. % If rho_prime is negative, the algorithm is not guaranteed to % produce monotonically decreasing cost values. It is strongly % recommended to set rho_prime > 0, to aid convergence. % rho_regularization (1e3) % Close to convergence, evaluating the performance ratio rho is % numerically challenging. Meanwhile, close to convergence, the % quadratic model should be a good fit and the steps should be % accepted. Regularization lets rho go to 1 as the model decrease and % the actual decrease go to zero. Set this option to zero to disable % regularization (not recommended). See in-code for the specifics. % When this is not zero, it may happen that the iterates produced are % not monotonically improving the cost when very close to % convergence. This is because the corrected cost improvement could % change sign if it is negative but very small. % statsfun (none) % Function handle to a function that will be called after each % iteration to provide the opportunity to log additional statistics. % They will be returned in the info struct. See the generic Manopt % documentation about solvers for further information. statsfun is % called with the point x that was reached last, after the % accept/reject decision. See comment below. % stopfun (none) % Function handle to a function that will be called at each iteration % to provide the opportunity to specify additional stopping criteria. % See the generic Manopt documentation about solvers for further % information. % verbosity (2) % Integer number used to tune the amount of output the algorithm % generates during execution (mostly as text in the command window). % The higher, the more output. 0 means silent. 3 and above includes a % display of the options structure at the beginning of the execution. % debug (false) % Set to true to allow the algorithm to perform additional % computations for debugging purposes. If a debugging test fails, you % will be informed of it, usually via the command window. Be aware % that these additional computations appear in the algorithm timings % too, and may interfere with operations such as counting the number % of cost evaluations, etc. (the debug calls get storedb too). % storedepth (20) % Maximum number of different points x of the manifold for which a % store structure will be kept in memory in the storedb. If the % caching features of Manopt are not used, this is irrelevant. If % memory usage is an issue, you may try to lower this number. % Profiling may then help to investigate if a performance hit was % incurred as a result. % % Notice that statsfun is called with the point x that was reached last, % after the accept/reject decision. Hence: if the step was accepted, we get % that new x, with a store which only saw the call for the cost and for the % gradient. If the step was rejected, we get the same x as previously, with % the store structure containing everything that was computed at that point % (possibly including previous rejects at that same point). Hence, statsfun % should not be used in conjunction with the store to count operations for % example. Instead, you should use storedb's shared memory for such % purposes (either via storedb.shared, or via store.shared, see % online documentation). It is however possible to use statsfun with the % store to compute, for example, other merit functions on the point x % (other than the actual cost function, that is). % % % Please cite the Manopt paper as well as the research paper: % @Article{genrtr, % Title = {Trust-region methods on {Riemannian} manifolds}, % Author = {Absil, P.-A. and Baker, C. G. and Gallivan, K. A.}, % Journal = {Foundations of Computational Mathematics}, % Year = {2007}, % Number = {3}, % Pages = {303--330}, % Volume = {7}, % Doi = {10.1007/s10208-005-0179-9} % } % % See also: steepestdescent conjugategradient manopt/examples % An explicit, general listing of this algorithm, with preconditioning, % can be found in the following paper: % @Article{boumal2015lowrank, % Title = {Low-rank matrix completion via preconditioned optimization on the {G}rassmann manifold}, % Author = {Boumal, N. and Absil, P.-A.}, % Journal = {Linear Algebra and its Applications}, % Year = {2015}, % Pages = {200--239}, % Volume = {475}, % Doi = {10.1016/j.laa.2015.02.027}, % } % When the Hessian is not specified, it is approximated with % finite-differences of the gradient. The resulting method is called % RTR-FD. Some convergence theory for it is available in this paper: % @incollection{boumal2015rtrfd % author={Boumal, N.}, % title={Riemannian trust regions with finite-difference Hessian approximations are globally convergent}, % year={2015}, % booktitle={Geometric Science of Information} % } % This file is part of Manopt: www.manopt.org. % This code is an adaptation to Manopt of the original GenRTR code: % RTR - Riemannian Trust-Region % (c) 2004-2007, P.-A. Absil, C. G. Baker, K. A. Gallivan % Florida State University % School of Computational Science % (http://www.math.fsu.edu/~cbaker/GenRTR/?page=download) % See accompanying license file. % The adaptation was executed by Nicolas Boumal. % % % Change log: % % NB April 3, 2013: % tCG now returns the Hessian along the returned direction eta, so % that we do not compute that Hessian redundantly: some savings at % each iteration. Similarly, if the useRand flag is on, we spare an % extra Hessian computation at each outer iteration too, owing to % some modifications in the Cauchy point section of the code specific % to useRand = true. % % NB Aug. 22, 2013: % This function is now Octave compatible. The transition called for % two changes which would otherwise not be advisable. (1) tic/toc is % now used as is, as opposed to the safer way: % t = tic(); elapsed = toc(t); % And (2), the (formerly inner) function savestats was moved outside % the main function to not be nested anymore. This is arguably less % elegant, but Octave does not (and likely will not) support nested % functions. % % NB Dec. 2, 2013: % The in-code documentation was largely revised and expanded. % % NB Dec. 2, 2013: % The former heuristic which triggered when rhonum was very small and % forced rho = 1 has been replaced by a smoother heuristic which % consists in regularizing rhonum and rhoden before computing their % ratio. It is tunable via options.rho_regularization. Furthermore, % the solver now detects if tCG did not obtain a model decrease % (which is theoretically impossible but may happen because of % numerical errors and/or because of a nonlinear/nonsymmetric Hessian % operator, which is the case for finite difference approximations). % When such an anomaly is detected, the step is rejected and the % trust region radius is decreased. % Feb. 18, 2015 note: this is less useful now, as tCG now guarantees % model decrease even for the finite difference approximation of the % Hessian. It is still useful in case of numerical errors, but this % is less stringent. % % NB Dec. 3, 2013: % The stepsize is now registered at each iteration, at a small % additional cost. The defaults for Delta_bar and Delta0 are better % defined. Setting Delta_bar in the options will automatically set % Delta0 accordingly. In Manopt 1.0.4, the defaults for these options % were not treated appropriately because of an incorrect use of the % isfield() built-in function. % % NB Feb. 18, 2015: % Added some comments. Also, Octave now supports safe tic/toc usage, % so we reverted the changes to use that again (see Aug. 22, 2013 log % entry). % % NB April 3, 2015: % Works with the new StoreDB class system. % % NB April 8, 2015: % No Hessian warning if approximate Hessian explicitly available. % % NB Nov. 1, 2016: % Now uses approximate gradient via finite differences if need be. % Verify that the problem description is sufficient for the solver. if ~canGetCost(problem) warning('manopt:getCost', ... 'No cost provided. The algorithm will likely abort.'); end if ~canGetGradient(problem) && ~canGetApproxGradient(problem) % Note: we do not give a warning if an approximate gradient is % explicitly given in the problem description, as in that case the user % seems to be aware of the issue. warning('manopt:getGradient:approx', ... ['No gradient provided. Using an FD approximation instead (slow).\n' ... 'It may be necessary to increase options.tolgradnorm.\n' ... 'To disable this warning: warning(''off'', ''manopt:getGradient:approx'')']); problem.approxgrad = approxgradientFD(problem); end if ~canGetHessian(problem) && ~canGetApproxHessian(problem) % Note: we do not give a warning if an approximate Hessian is % explicitly given in the problem description, as in that case the user % seems to be aware of the issue. warning('manopt:getHessian:approx', ... ['No Hessian provided. Using an FD approximation instead.\n' ... 'To disable this warning: warning(''off'', ''manopt:getHessian:approx'')']); problem.approxhess = approxhessianFD(problem); end % Define some strings for display tcg_stop_reason = {'negative curvature',... 'exceeded trust region',... 'reached target residual-kappa (linear)',... 'reached target residual-theta (superlinear)',... 'maximum inner iterations',... 'model increased'}; % Set local defaults here localdefaults.verbosity = 2; localdefaults.maxtime = inf; localdefaults.miniter = 3; localdefaults.maxiter = 1000; localdefaults.mininner = 1; localdefaults.maxinner = problem.M.dim(); localdefaults.tolgradnorm = 1e-6; localdefaults.kappa = 0.1; localdefaults.theta = 1.0; localdefaults.rho_prime = 0.1; localdefaults.useRand = false; localdefaults.rho_regularization = 1e3; % Merge global and local defaults, then merge w/ user options, if any. localdefaults = mergeOptions(getGlobalDefaults(), localdefaults); if ~exist('options', 'var') || isempty(options) options = struct(); end options = mergeOptions(localdefaults, options); % Set default Delta_bar and Delta0 separately to deal with additional % logic: if Delta_bar is provided but not Delta0, let Delta0 automatically % be some fraction of the provided Delta_bar. if ~isfield(options, 'Delta_bar') if isfield(problem.M, 'typicaldist') options.Delta_bar = problem.M.typicaldist(); else options.Delta_bar = sqrt(problem.M.dim()); end end if ~isfield(options,'Delta0') options.Delta0 = options.Delta_bar / 8; end % Check some option values assert(options.rho_prime < 1/4, ... 'options.rho_prime must be strictly smaller than 1/4.'); assert(options.Delta_bar > 0, ... 'options.Delta_bar must be positive.'); assert(options.Delta0 > 0 && options.Delta0 < options.Delta_bar, ... 'options.Delta0 must be positive and smaller than Delta_bar.'); % It is sometimes useful to check what the actual option values are. if options.verbosity >= 3 disp(options); end ticstart = tic(); % If no initial point x is given by the user, generate one at random. if ~exist('x', 'var') || isempty(x) x = problem.M.rand(); end % Create a store database and get a key for the current x storedb = StoreDB(options.storedepth); key = storedb.getNewKey(); %% Initializations % k counts the outer (TR) iterations. The semantic is that k counts the % number of iterations fully executed so far. k = 0; % Initialize solution and companion measures: f(x), fgrad(x) [fx, fgradx] = getCostGrad(problem, x, storedb, key); norm_grad = problem.M.norm(x, fgradx); % Initialize trust-region radius Delta = options.Delta0; % Save stats in a struct array info, and preallocate. if ~exist('used_cauchy', 'var') used_cauchy = []; end stats = savestats(problem, x, storedb, key, options, k, fx, norm_grad, Delta, ticstart); info(1) = stats; info(min(10000, options.maxiter+1)).iter = []; % ** Display: if options.verbosity == 2 fprintf(['%3s %3s %5s %5s ',... 'f: %+e |grad|: %e\n'],... ' ',' ',' ',' ', fx, norm_grad); elseif options.verbosity > 2 fprintf('************************************************************************\n'); fprintf('%3s %3s k: %5s num_inner: %5s %s\n',... '','','______','______',''); fprintf(' f(x) : %+e |grad| : %e\n', fx, norm_grad); fprintf(' Delta : %f\n', Delta); end % To keep track of consecutive radius changes, so that we can warn the % user if it appears necessary. consecutive_TRplus = 0; consecutive_TRminus = 0; % ********************** % ** Start of TR loop ** % ********************** while true % Start clock for this outer iteration ticstart = tic(); % Run standard stopping criterion checks [stop, reason] = stoppingcriterion(problem, x, options, info, k+1); % If the stopping criterion that triggered is the tolerance on the % gradient norm but we are using randomization, make sure we make at % least miniter iterations to give randomization a chance at escaping % saddle points. if stop == 2 && options.useRand && k < options.miniter stop = 0; end if stop if options.verbosity >= 1 fprintf([reason '\n']); end break; end if options.verbosity > 2 || options.debug > 0 fprintf('************************************************************************\n'); end % ************************* % ** Begin TR Subproblem ** % ************************* % Determine eta0 if ~options.useRand % Pick the zero vector eta = problem.M.zerovec(x); else % Random vector in T_x M (this has to be very small) eta = problem.M.lincomb(x, 1e-6, problem.M.randvec(x)); % Must be inside trust-region while problem.M.norm(x, eta) > Delta eta = problem.M.lincomb(x, sqrt(sqrt(eps)), eta); end end % Solve TR subproblem approximately [eta, Heta, numit, stop_inner] = ... tCG(problem, x, fgradx, eta, Delta, options, storedb, key); srstr = tcg_stop_reason{stop_inner}; % If using randomized approach, compare result with the Cauchy point. % Convergence proofs assume that we achieve at least (a fraction of) % the reduction of the Cauchy point. After this if-block, either all % eta-related quantities have been changed consistently, or none of % them have changed. if options.useRand used_cauchy = false; % Check the curvature, Hg = getHessian(problem, x, fgradx, storedb, key); g_Hg = problem.M.inner(x, fgradx, Hg); if g_Hg <= 0 tau_c = 1; else tau_c = min( norm_grad^3/(Delta*g_Hg) , 1); end % and generate the Cauchy point. eta_c = problem.M.lincomb(x, -tau_c * Delta / norm_grad, fgradx); Heta_c = problem.M.lincomb(x, -tau_c * Delta / norm_grad, Hg); % Now that we have computed the Cauchy point in addition to the % returned eta, we might as well keep the best of them. mdle = fx + problem.M.inner(x, fgradx, eta) ... + .5*problem.M.inner(x, Heta, eta); mdlec = fx + problem.M.inner(x, fgradx, eta_c) ... + .5*problem.M.inner(x, Heta_c, eta_c); if mdlec < mdle eta = eta_c; Heta = Heta_c; % added April 11, 2012 used_cauchy = true; end end % This is only computed for logging purposes, because it may be useful % for some user-defined stopping criteria. If this is not cheap for % specific applications (compared to evaluating the cost), we should % reconsider this. norm_eta = problem.M.norm(x, eta); if options.debug > 0 testangle = problem.M.inner(x, eta, fgradx) / (norm_eta*norm_grad); end % Compute the tentative next iterate (the proposal) x_prop = problem.M.retr(x, eta); key_prop = storedb.getNewKey(); % Compute the function value of the proposal fx_prop = getCost(problem, x_prop, storedb, key_prop); % Will we accept the proposal or not? % Check the performance of the quadratic model against the actual cost. rhonum = fx - fx_prop; rhoden = -problem.M.inner(x, fgradx, eta) ... -.5*problem.M.inner(x, eta, Heta); % rhonum could be anything. % rhoden should be nonnegative, as guaranteed by tCG, baring numerical % errors. % Heuristic -- added Dec. 2, 2013 (NB) to replace the former heuristic. % This heuristic is documented in the book by Conn Gould and Toint on % trust-region methods, section 17.4.2. % rhonum measures the difference between two numbers. Close to % convergence, these two numbers are very close to each other, so % that computing their difference is numerically challenging: there may % be a significant loss in accuracy. Since the acceptance or rejection % of the step is conditioned on the ratio between rhonum and rhoden, % large errors in rhonum result in a very large error in rho, hence in % erratic acceptance / rejection. Meanwhile, close to convergence, % steps are usually trustworthy and we should transition to a Newton- % like method, with rho=1 consistently. The heuristic thus shifts both % rhonum and rhoden by a small amount such that far from convergence, % the shift is irrelevant and close to convergence, the ratio rho goes % to 1, effectively promoting acceptance of the step. % The rationale is that close to convergence, both rhonum and rhoden % are quadratic in the distance between x and x_prop. Thus, when this % distance is on the order of sqrt(eps), the value of rhonum and rhoden % is on the order of eps, which is indistinguishable from the numerical % error, resulting in badly estimated rho's. % For abs(fx) < 1, this heuristic is invariant under offsets of f but % not under scaling of f. For abs(fx) > 1, the opposite holds. This % should not alarm us, as this heuristic only triggers at the very last % iterations if very fine convergence is demanded. rho_reg = max(1, abs(fx)) * eps * options.rho_regularization; rhonum = rhonum + rho_reg; rhoden = rhoden + rho_reg; if options.debug > 0 fprintf('DBG: rhonum : %e\n', rhonum); fprintf('DBG: rhoden : %e\n', rhoden); end % This is always true if a linear, symmetric operator is used for the % Hessian (approximation) and if we had infinite numerical precision. % In practice, nonlinear approximations of the Hessian such as the % built-in finite difference approximation and finite numerical % accuracy can cause the model to increase. In such scenarios, we % decide to force a rejection of the step and a reduction of the % trust-region radius. We test the sign of the regularized rhoden since % the regularization is supposed to capture the accuracy to which % rhoden is computed: if rhoden were negative before regularization but % not after, that should not be (and is not) detected as a failure. % % Note (Feb. 17, 2015, NB): the most recent version of tCG already % includes a mechanism to ensure model decrease if the Cauchy step % attained a decrease (which is theoretically the case under very lax % assumptions). This being said, it is always possible that numerical % errors will prevent this, so that it is good to keep a safeguard. % % The current strategy is that, if this should happen, then we reject % the step and reduce the trust region radius. This also ensures that % the actual cost values are monotonically decreasing. model_decreased = (rhoden >= 0); if ~model_decreased srstr = [srstr ', model did not decrease']; %#ok end rho = rhonum / rhoden; % Added June 30, 2015 following observation by BM. % With this modification, it is guaranteed that a step rejection is % always accompanied by a TR reduction. This prevents stagnation in % this "corner case" (NaN's really aren't supposed to occur, but it's % nice if we can handle them nonetheless). if isnan(rho) fprintf('rho is NaN! Forcing a radius decrease. This should not happen.\n'); if isnan(fx_prop) fprintf('The cost function returned NaN (perhaps the retraction returned a bad point?)\n'); else fprintf('The cost function did not return a NaN value.'); end end if options.debug > 0 m = @(x, eta) ... getCost(problem, x, storedb, key) + ... getDirectionalDerivative(problem, x, eta, storedb, key) + ... .5*problem.M.inner(x, getHessian(problem, x, eta, storedb, key), eta); zerovec = problem.M.zerovec(x); actrho = (fx - fx_prop) / (m(x, zerovec) - m(x, eta)); fprintf('DBG: new f(x) : %+e\n', fx_prop); fprintf('DBG: actual rho : %e\n', actrho); fprintf('DBG: used rho : %e\n', rho); end % Choose the new TR radius based on the model performance trstr = ' '; % If the actual decrease is smaller than 1/4 of the predicted decrease, % then reduce the TR radius. if rho < 1/4 || ~model_decreased || isnan(rho) trstr = 'TR-'; Delta = Delta/4; consecutive_TRplus = 0; consecutive_TRminus = consecutive_TRminus + 1; if consecutive_TRminus >= 5 && options.verbosity >= 2 consecutive_TRminus = -inf; fprintf(' +++ Detected many consecutive TR- (radius decreases).\n'); fprintf(' +++ Consider decreasing options.Delta_bar by an order of magnitude.\n'); fprintf(' +++ Current values: options.Delta_bar = %g and options.Delta0 = %g.\n', options.Delta_bar, options.Delta0); end % If the actual decrease is at least 3/4 of the precicted decrease and % the tCG (inner solve) hit the TR boundary, increase the TR radius. % We also keep track of the number of consecutive trust-region radius % increases. If there are many, this may indicate the need to adapt the % initial and maximum radii. elseif rho > 3/4 && (stop_inner == 1 || stop_inner == 2) trstr = 'TR+'; Delta = min(2*Delta, options.Delta_bar); consecutive_TRminus = 0; consecutive_TRplus = consecutive_TRplus + 1; if consecutive_TRplus >= 5 && options.verbosity >= 1 consecutive_TRplus = -inf; fprintf(' +++ Detected many consecutive TR+ (radius increases).\n'); fprintf(' +++ Consider increasing options.Delta_bar by an order of magnitude.\n'); fprintf(' +++ Current values: options.Delta_bar = %g and options.Delta0 = %g.\n', options.Delta_bar, options.Delta0); end else % Otherwise, keep the TR radius constant. consecutive_TRplus = 0; consecutive_TRminus = 0; end % Choose to accept or reject the proposed step based on the model % performance. Note the strict inequality. if model_decreased && rho > options.rho_prime accept = true; accstr = 'acc'; x = x_prop; key = key_prop; fx = fx_prop; fgradx = getGradient(problem, x, storedb, key); norm_grad = problem.M.norm(x, fgradx); else accept = false; accstr = 'REJ'; end % Make sure we don't use too much memory for the store database storedb.purge(); % k is the number of iterations we have accomplished. k = k + 1; % Log statistics for freshly executed iteration. % Everything after this in the loop is not accounted for in the timing. stats = savestats(problem, x, storedb, key, options, k, fx, ... norm_grad, Delta, ticstart, info, rho, rhonum, ... rhoden, accept, numit, norm_eta, used_cauchy); info(k+1) = stats; %#ok % ** Display: if options.verbosity == 2, fprintf(['%3s %3s k: %5d num_inner: %5d ', ... 'f: %+e |grad|: %e %s\n'], ... accstr,trstr,k,numit,fx,norm_grad,srstr); elseif options.verbosity > 2, if options.useRand && used_cauchy, fprintf('USED CAUCHY POINT\n'); end fprintf('%3s %3s k: %5d num_inner: %5d %s\n', ... accstr, trstr, k, numit, srstr); fprintf(' f(x) : %+e |grad| : %e\n',fx,norm_grad); if options.debug > 0 fprintf(' Delta : %f |eta| : %e\n',Delta,norm_eta); end fprintf(' rho : %e\n',rho); end if options.debug > 0, fprintf('DBG: cos ang(eta,gradf): %d\n',testangle); if rho == 0 fprintf('DBG: rho = 0, this will likely hinder further convergence.\n'); end end end % of TR loop (counter: k) % Restrict info struct-array to useful part info = info(1:k+1); if (options.verbosity > 2) || (options.debug > 0), fprintf('************************************************************************\n'); end if (options.verbosity > 0) || (options.debug > 0) fprintf('Total time is %f [s] (excludes statsfun)\n', info(end).time); end % Return the best cost reached cost = fx; end % Routine in charge of collecting the current iteration stats function stats = savestats(problem, x, storedb, key, options, k, fx, ... norm_grad, Delta, ticstart, info, rho, rhonum, ... rhoden, accept, numit, norm_eta, used_cauchy) stats.iter = k; stats.cost = fx; stats.gradnorm = norm_grad; stats.Delta = Delta; if k == 0 stats.time = toc(ticstart); stats.rho = inf; stats.rhonum = NaN; stats.rhoden = NaN; stats.accepted = true; stats.numinner = NaN; stats.stepsize = NaN; if options.useRand stats.cauchy = false; end else stats.time = info(k).time + toc(ticstart); stats.rho = rho; stats.rhonum = rhonum; stats.rhoden = rhoden; stats.accepted = accept; stats.numinner = numit; stats.stepsize = norm_eta; if options.useRand, stats.cauchy = used_cauchy; end end % See comment about statsfun above: the x and store passed to statsfun % are that of the most recently accepted point after the iteration % fully executed. stats = applyStatsfun(problem, x, storedb, key, options, stats); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/checkdiff.m ================================================ function checkdiff(problem, x, d, force_gradient) % Checks the consistency of the cost function and directional derivatives. % % function checkdiff(problem) % function checkdiff(problem, x) % function checkdiff(problem, x, d) % % checkdiff performs a numerical test to check that the directional % derivatives defined in the problem structure agree up to first order with % the cost function at some point x, along some direction d. The test is % based on a truncated Taylor series (see online Manopt documentation). % % Both x and d are optional and will be sampled at random if omitted. % % See also: checkgradient checkhessian % If force_gradient = true (hidden parameter), then the function will call % getGradient and infer the directional derivative, rather than call % getDirectionalDerivative directly. This is used by checkgradient. % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % % March 26, 2017 (JB): % Detects if the approximated linear model is exact % and provides the user with the corresponding feedback. % % April 3, 2015 (NB): % Works with the new StoreDB class system. if ~exist('force_gradient', 'var') force_gradient = false; end % Verify that the problem description is sufficient. if ~canGetCost(problem) error('It seems no cost was provided.'); end if ~force_gradient && ~canGetDirectionalDerivative(problem) error('It seems no directional derivatives were provided.'); end if force_gradient && ~canGetGradient(problem) % Would normally issue a warning, but this function should only be % called with force_gradient on by checkgradient, which will % already have issued a warning. end x_isprovided = exist('x', 'var') && ~isempty(x); d_isprovided = exist('d', 'var') && ~isempty(d); if ~x_isprovided && d_isprovided error('If d is provided, x must be too, since d is tangent at x.'); end % If x and / or d are not specified, pick them at random. if ~x_isprovided x = problem.M.rand(); end if ~d_isprovided d = problem.M.randvec(x); end % Compute the value f0 at f and directional derivative at x along d. storedb = StoreDB(); xkey = storedb.getNewKey(); f0 = getCost(problem, x, storedb, xkey); if ~force_gradient df0 = getDirectionalDerivative(problem, x, d, storedb, xkey); else grad = getGradient(problem, x, storedb, xkey); df0 = problem.M.inner(x, grad, d); end % Compute the value of f at points on the geodesic (or approximation % of it) originating from x, along direction d, for stepsizes in a % large range given by h. h = logspace(-8, 0, 51); value = zeros(size(h)); for i = 1 : length(h) y = problem.M.exp(x, d, h(i)); ykey = storedb.getNewKey(); value(i) = getCost(problem, y, storedb, ykey); end % Compute the linear approximation of the cost function using f0 and % df0 at the same points. model = polyval([df0 f0], h); % Compute the approximation error err = abs(model - value); % And plot it. loglog(h, err); title(sprintf(['Directional derivative check.\nThe slope of the '... 'continuous line should match that of the dashed\n'... '(reference) line over at least a few orders of '... 'magnitude for h.'])); xlabel('h'); ylabel('Approximation error'); line('xdata', [1e-8 1e0], 'ydata', [1e-8 1e8], ... 'color', 'k', 'LineStyle', '--', ... 'YLimInclude', 'off', 'XLimInclude', 'off'); if ~all( err < 1e-12 ) % In a numerically reasonable neighborhood, the error should % decrease as the square of the stepsize, i.e., in loglog scale, % the error should have a slope of 2. isModelExact = false; window_len = 10; [range, poly] = identify_linear_piece(log10(h), log10(err), window_len); else % The 1st order model is exact: all errors are (numerically) zero % Fit line from all points, use log scale only in h. isModelExact = true; range = 1:numel(h); poly = polyfit(log10(h), err, 1); % Set mean error in log scale for plot. poly(end) = log10(poly(end)); % Change title to something more descriptive for this special case. title(sprintf(... ['Directional derivative check.\n'... 'It seems the linear model is exact:\n'... 'Model error is numerically zero for all h.'])); end hold all; loglog(h(range), 10.^polyval(poly, log10(h(range))), 'LineWidth', 3); hold off; if ~isModelExact fprintf('The slope should be 2. It appears to be: %g.\n', poly(1)); fprintf(['If it is far from 2, then directional derivatives ' ... 'might be erroneous.\n']); else fprintf(['The linear model appears to be exact ' ... '(within numerical precision),\n'... 'hence the slope computation is irrelevant.\n']); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/checkgradient.m ================================================ function checkgradient(problem, x, d) % Checks the consistency of the cost function and the gradient. % % function checkgradient(problem) % function checkgradient(problem, x) % function checkgradient(problem, x, d) % % checkgradient performs a numerical test to check that the gradient % defined in the problem structure agrees up to first order with the cost % function at some point x, along some direction d. The test is based on a % truncated Taylor series (see online Manopt documentation). % % It is also tested that the gradient is indeed a tangent vector. % % Both x and d are optional and will be sampled at random if omitted. % % See also: checkdiff checkhessian % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % % April 3, 2015 (NB): % Works with the new StoreDB class system. % % Nov. 1, 2016 (NB): % Now calls checkdiff with force_gradient = true, instead of doing an % rmfield of problem.diff. This became necessary after getGradient % was updated to know how to compute the gradient from directional % derivatives. % Verify that the problem description is sufficient. if ~canGetCost(problem) % The call to canGetPartialGradient will readily issue a warning if % problem.ncostterms is not defined even though it is expected. if ~canGetPartialGradient(problem) error('getCost:checkgradient', 'It seems no cost was provided.'); else error('getCost:stochastic', ['It seems no cost was provided.\n' ... 'If you intend to use a stochastic solver, you still\n' ... 'need to define problem.cost to use checkgradient.']); end end if ~canGetGradient(problem) warning('manopt:checkgradient:nograd', ... 'It seems no gradient was provided.'); end x_isprovided = exist('x', 'var') && ~isempty(x); d_isprovided = exist('d', 'var') && ~isempty(d); if ~x_isprovided && d_isprovided error('If d is provided, x must be too, since d is tangent at x.'); end % If x and / or d are not specified, pick them at random. if ~x_isprovided x = problem.M.rand(); end if ~d_isprovided d = problem.M.randvec(x); end %% Check that the gradient yields a first order model of the cost. % Call checkdiff with force_gradient set to true, to force that % function to make a gradient call. checkdiff(problem, x, d, true); title(sprintf(['Gradient check.\nThe slope of the continuous line ' ... 'should match that of the dashed\n(reference) line ' ... 'over at least a few orders of magnitude for h.'])); xlabel('h'); ylabel('Approximation error'); %% Try to check that the gradient is a tangent vector. if isfield(problem.M, 'tangent') storedb = StoreDB(); key = storedb.getNewKey(); grad = getGradient(problem, x, storedb, key); pgrad = problem.M.tangent(x, grad); residual = problem.M.lincomb(x, 1, grad, -1, pgrad); err = problem.M.norm(x, residual); fprintf('The residual should be 0, or very close. Residual: %g.\n', err); fprintf('If it is far from 0, then the gradient is not in the tangent space.\n'); else fprintf(['Unfortunately, Manopt was unable to verify that the '... 'gradient is indeed a tangent vector.\nPlease verify ' ... 'this manually or implement the ''tangent'' function ' ... 'in your manifold structure.']); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/checkhessian.m ================================================ function checkhessian(problem, x, d) % Checks the consistency of the cost function and the Hessian. % % function checkhessian(problem) % function checkhessian(problem, x) % function checkhessian(problem, x, d) % % checkhessian performs a numerical test to check that the directional % derivatives and Hessian defined in the problem structure agree up to % second order with the cost function at some point x, along some direction % d. The test is based on a truncated Taylor series (see online Manopt % documentation). % % It is also tested that the result of applying the Hessian along that % direction is indeed a tangent vector, and that the Hessian operator is % symmetric w.r.t. the Riemannian metric. % % Both x and d are optional and will be sampled at random if omitted. % % See also: checkdiff checkgradient checkretraction % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % % March 26, 2017 (JB): % Detects if the approximated quadratic model is exact % and provides the user with the corresponding feedback. % % April 3, 2015 (NB): % Works with the new StoreDB class system. % % Nov. 1, 2016 (NB): % Issues a call to getGradient rather than getDirectionalDerivative. % Verify that the problem description is sufficient. if ~canGetCost(problem) error('It seems no cost was provided.'); end if ~canGetGradient(problem) warning('manopt:checkhessian:nograd', ... 'It seems no gradient was provided.'); end if ~canGetHessian(problem) warning('manopt:checkhessian:nohess', ... 'It seems no Hessian was provided.'); end x_isprovided = exist('x', 'var') && ~isempty(x); d_isprovided = exist('d', 'var') && ~isempty(d); if ~x_isprovided && d_isprovided error('If d is provided, x must be too, since d is tangent at x.'); end % If x and / or d are not specified, pick them at random. if ~x_isprovided x = problem.M.rand(); end if ~d_isprovided d = problem.M.randvec(x); end %% Check that the directional derivative and the Hessian at x along d %% yield a second order model of the cost function. % Compute the value f0 at f, directional derivative df0 at x along d, % and Hessian along [d, d]. storedb = StoreDB(); xkey = storedb.getNewKey(); f0 = getCost(problem, x, storedb, xkey); df0 = problem.M.inner(x, d, getGradient(problem, x, storedb, xkey)); d2f0 = problem.M.inner(x, d, getHessian(problem, x, d, storedb, xkey)); % Compute the value of f at points on the geodesic (or approximation % of it) originating from x, along direction d, for stepsizes in a % large range given by h. h = logspace(-8, 0, 51); value = zeros(size(h)); for i = 1 : length(h) y = problem.M.exp(x, d, h(i)); ykey = storedb.getNewKey(); value(i) = getCost(problem, y, storedb, ykey); end % Compute the quadratic approximation of the cost function using f0, % df0 and d2f0 at the same points. model = polyval([.5*d2f0 df0 f0], h); % Compute the approximation error err = abs(model - value); % And plot it. loglog(h, err); title(sprintf(['Hessian check.\nThe slope of the continuous line ' ... 'should match that of the dashed\n(reference) line ' ... 'over at least a few orders of magnitude for h.'])); xlabel('h'); ylabel('Approximation error'); line('xdata', [1e-8 1e0], 'ydata', [1e-16 1e8], ... 'color', 'k', 'LineStyle', '--', ... 'YLimInclude', 'off', 'XLimInclude', 'off'); if ~all( err < 1e-12 ) % In a numerically reasonable neighborhood, the error should % decrease as the cube of the stepsize, i.e., in loglog scale, the % error should have a slope of 3. isModelExact = false; window_len = 10; [range, poly] = identify_linear_piece(log10(h), log10(err), window_len); else % The 2nd order model is exact: all errors are (numerically) zero % Fit line from all points, use log scale only in h. isModelExact = true; range = 1:numel(h); poly = polyfit(log10(h), err, 1); % Set mean error in log scale for plot poly(end) = log10(poly(end)); % Change title to something more descriptive for this special case. title(sprintf(... ['Hessian check.\n'... 'It seems the quadratic model is exact:\n'... 'Model error is numerically zero for all h.'])); end hold all; loglog(h(range), 10.^polyval(poly, log10(h(range))), 'LineWidth', 3); hold off; if ~isModelExact fprintf('The slope should be 3. It appears to be: %g.\n', poly(1)); fprintf(['If it is far from 3, then directional derivatives or ' ... 'the Hessian might be erroneous.\n']); fprintf(['Note: if the exponential map is only approximate, and it '... 'is not a second-order approximation,\nthen it is normal ' ... 'for the slope test to reach 2 instead of 3. Check the ' ... 'factory for this.\n' ... 'If tested at a critical point, then even for a first-order '... 'retraction the slope test should yield 3.\n']); else fprintf(['The quadratic model appears to be exact ' ... '(within numerical precision),\n'... 'hence the slope computation is irrelevant.\n']); end %% Check that the Hessian at x along direction d is a tangent vector. if isfield(problem.M, 'tangent') hess = getHessian(problem, x, d, storedb, xkey); phess = problem.M.tangent(x, hess); residual = problem.M.lincomb(x, 1, hess, -1, phess); err = problem.M.norm(x, residual); fprintf('The residual should be zero, or very close. '); fprintf('Residual: %g.\n', err); fprintf(['If it is far from 0, then the Hessian is not in the ' ... 'tangent plane.\n']); else fprintf(['Unfortunately, Manopt was unable to verify that the '... 'Hessian is indeed a tangent vector.\nPlease verify ' ... 'this manually.']); end %% Check that the Hessian at x is symmetric. d1 = problem.M.randvec(x); d2 = problem.M.randvec(x); h1 = getHessian(problem, x, d1, storedb, xkey); h2 = getHessian(problem, x, d2, storedb, xkey); v1 = problem.M.inner(x, d1, h2); v2 = problem.M.inner(x, h1, d2); value = v1-v2; fprintf([' - should be zero, or very close.' ... '\n\tValue: %g - %g = %g.\n'], v1, v2, value); fprintf('If it is far from 0, then the Hessian is not symmetric.\n'); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/checkretraction.m ================================================ function checkretraction(M, x, v) % Check the order of agreement of a retraction with an exponential. % % function checkretraction(M) % function checkretraction(M, x) % function checkretraction(M, x, v) % % checkretraction performs a numerical test to check the order of agreement % between the retraction and the exponential map in a given Manopt % manifold structure M. The test is performed at the point x if it is % provided (otherwise, the point is picked at random) and along the tangent % vector v at x if one is provided (otherwise, a tangent vector at x is % picked at random.) % % See also: checkdiff checkgradient checkhessian % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Oct. 21, 2016. % Contributors: % Change log: if ~exist('x', 'var') || isempty(x) x = M.rand(); v = M.randvec(x); end if ~exist('v', 'var') || isempty(v) v = M.randvec(x); end % Compare the retraction and the exponential over steps of varying % length, on a wide log-scale. tt = logspace(-12, 0, 251); ee = zeros(size(tt)); for k = 1 : numel(tt) t = tt(k); ee(k) = M.dist(M.exp(x, v, t), M.retr(x, v, t)); end % Plot the difference between the exponential and the retration over % that span of steps, in log-log scale. loglog(tt, ee); % We hope to see a slope of 3, to confirm a second-order retraction. If % the slope is only 2, we have a first-order retration. If the slope is % less than 2, this is not a retraction. % Slope 3 line('xdata', [1e-12 1e0], 'ydata', [1e-30 1e6], ... 'color', 'k', 'LineStyle', '--', ... 'YLimInclude', 'off', 'XLimInclude', 'off'); % Slope 2 line('xdata', [1e-14 1e0], 'ydata', [1e-20 1e8], ... 'color', 'k', 'LineStyle', ':', ... 'YLimInclude', 'off', 'XLimInclude', 'off'); % Figure out the slope of the error in log-log, by identifying a piece % of the error curve which is mostly linear. window_len = 10; [range, poly] = identify_linear_piece(log10(tt), log10(ee), window_len); hold all; loglog(tt(range), 10.^polyval(poly, log10(tt(range))), 'LineWidth', 3); hold off; xlabel('Step size multiplier t'); ylabel('Distance between Exp(x, v, t) and Retr(x, v, t)'); title(sprintf('Retraction check.\nA slope of 2 is required, 3 is desired.')); fprintf('Check agreement between M.exp and M.retr. Please check the\n'); fprintf('factory file of M to ensure M.exp is a proper exponential.\n'); fprintf('The slope must be at least 2 to have a proper retraction.\n'); fprintf('For the retraction to be second order, the slope should be 3.\n'); fprintf('It appears the slope is: %g.\n', poly(1)); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/criticalpointfinder.m ================================================ function problem_critpt = criticalpointfinder(problem) % Creates a Manopt problem whose optima are the critical points of another. % % problem_critpt = criticalpointfinder(problem) % % Given a Manopt problem structure 'problem', this tool returns a new % problem structure, 'problem_critpt', such that the global optima of the % new problem coincide with the critical points of the original problem. % This can be useful notably in empirical studies of the properties of % saddle points of a problem. % % Concretely, if f is the cost function of the given problem, grad f % denotes its (Riemannian) gradient and Hess f denotes its (Riemannian) % Hessian, then the new problem has a cost function g defined by: % % g(x) = (1/2)*norm(grad f(x))^2, % % where x is a point on the manifold problem.M (the new problem lives on % the same manifold), and norm(.) = problem.M.norm(x, .) is the Riemannian % norm on the tangent space at x. The Riemannian gradient of g is elegantly % obtained from knowledge of f: % % grad g(x) = Hess f(x)[grad f(x)] % % If the Hessian of f is not available in the given problem, Manopt will % approximate it automatically to compute an approximate gradient of g. % If the Hessian of f is available, then an approximate Hessian of g is % defined in the returned problem as % % approxhess g(x)[u] = Hess f(x)[ Hess f(x)[u] ]. % % This approximation is exact if x is a critical point of f, which is % enough to ensure superlinear local convergence to critical points of f % using the trustregions algorithm, for example. % % Once problem_critpt is obtained, it can be passed to any of the solvers % of Manopt to compute critical points of the original problem. Supplying % an initial point to the solver allows to aim for a critical point in a % specific neighborhood of the search space. % % % Usage example: % % The code below creates a problem whose optima are dominant eigenvectors % of a matrix A and whose critical points are any eigenvectors of A, then % compute critical points using the present tool: % % n = 100; A = randn(n); A = .5*(A+A'); % problem.M = spherefactory(n); % problem.cost = @(x) -x'*(A*x); % problem.egrad = @(x) -2*A*x; % problem.ehess = @(x, xdot) -2*A*xdot; % problem_critpt = criticalpointfinder(problem); % opts.tolcost = .5*(1e-5)^2; % aim for a gradient smaller than 1e-5 % [x, fx] = trustregions(problem_critpt, [], opts); % random initial guess % fprintf('Norm of the gradient at x: %g\n', sqrt(2*fx)); % fprintf('This is small if x is close to being an eigenvector: %g\n',... % norm((x'*A*x)*x - A*x)); % % The two displayed numbers are equal up to a factor 2. % % % See also: trustregions % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Jan. 25, 2017. % Contributors: % Change log: % TODO: Determine a safe way of using the caching functionalities of Manopt % with this tool. The issue in passing along storedb and key in the % costgrad and approxhess functions is that the storedb will be % associated to problem_critpt, not to problem. This may cause bugs % that would be very difficult to catch. To be on the safe side, % caching is not used at all here, but this may cause running times % to be longer than necessary. To create a local storedb associated % to problem and to only use the key seems to also not be a viable % solution, since there is no clear way of resetting it to zero % everytime a solver is called on problem_critpt. % -- Jan. 26, 2017 (NB) problem_critpt.M = problem.M; problem_critpt.costgrad = @costgrad; % If the Hessian is available for the problem, we build an approximate % Hessian based on it. Otherwise, there is no reason to believe that % this approximate Hessian would be better than the standard % approximate Hessian created by Manopt. if canGetHessian(problem) problem_critpt.approxhess = @approxhess; end function [g, gradg] = costgrad(x) gradf = getGradient(problem, x); Hessf_gradf = getHessian(problem, x, gradf); g = .5*problem.M.norm(x, gradf)^2; gradg = Hessf_gradf; end % This is not quite the Hessian because there should be a third-order % derivative term (which is inaccessible), but: at critical points % (where grad f(x) = 0 for the f of problem.cost) this Hessian is % exact, so it will allow for superlinear local convergence in % algorithms such as trustregions. function HHu = approxhess(x, u) Hu = getHessian(problem, x, u); HHu = getHessian(problem, x, Hu); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/dexpm.m ================================================ function D = dexpm(X, H) % Frchet derivative of the matrix exponential. % % function D = dexpm(X, H) % % Computes the directional derivative (the Frchet derivative) of expm at X % along H (square matrices). % % Thus, D = lim_(t -> 0) (expm(X + tH) - expm(X)) / t. % % Note: the adjoint of dexpm(X, .) is dexpm(X', .), which is a fact often % useful to derive gradients of matrix functions involving expm(X). % (This is wrt the inner product inner = @(A, B) real(trace(A'*B))). % % See also: dfunm dlogm dsqrtm % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, July 3, 2015. % Contributors: % Change log: D = dfunm(@expm, X, H); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/dfunm.m ================================================ function D = dfunm(funm, X, H) % Frchet derivative of matrix functions. % % function D = dfunm(funm, X, H) % % Computes the directional derivative (the Frchet derivative) of a matrix % function (such as @logm, @expm, ...) at X along H (square matrices), % according to a very nice trick which appears in this paper: % % "Computing the Frchet derivative of the matrix exponential, with an % application to condition number estimation", % Awad H. Al-Mohy and Nicholas J. Higham, 2009. % http://eprints.ma.man.ac.uk/1218/01/covered/MIMS_ep2008_26.pdf % % Thus, D = lim_(t -> 0) (funm(X + tH) - funm(X)) / t. % % This code is simple, but may not be the most efficient. In particular, it % requires computing the matrix function on matrices which are four times % as big, and which may have lost important structure (such as symmetry). % % See also: dlogm dexpm dsqrtm % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, July 3, 2015. % Contributors: % Change log: n = size(X, 1); assert(length(size(X)) == 2, 'X and H must be square matrices.'); assert(length(size(H)) == 2, 'X and H must be square matrices.'); assert(size(X, 1) == size(X, 2), 'X and H must be square matrices.'); assert(all(size(X) == size(H)), 'X and H must have the same size.'); Z = zeros(n); A = funm([X, H ; Z, X]); D = A(1:n, (n+1):end); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/diagsum.m ================================================ function [tracedtensor] = diagsum(tensor1, d1, d2) % C = DIAGSUM(A, d1, d2) Performs the trace % C(i[1],...,i[d1-1],i[d1+1],...,i[d2-1],i[d2+1],...i[n]) = % A(i[1],...,i[d1-1],k,i[d1+1],...,i[d2-1],k,i[d2+1],...,i[n]) % (Sum on k). % % C = DIAGSUM(A, d1, d2) traces A along the diagonal formed by dimensions d1 % and d2. If the lengths of these dimensions are not equal, DIAGSUM traces % until the end of the shortest of dimensions d1 and d2 is reached. This is % an analogue of the built in TRACE function. % % Wynton Moore, January 2006 dim1=size(tensor1); numdims=length(dim1); %check inputs if d1==d2 tracedtensor=squeeze(sum(tensor1,d1)); elseif numdims==2 tracedtensor=trace(tensor1); elseif dim1(d1)==1 && dim1(d2)==1 tracedtensor=squeeze(tensor1); else %determine correct permutation swapd1=d1;swapd2=d2; if d1~=numdims-1 && d1~=numdims && d2~=numdims-1 swapd1=numdims-1; elseif d1~=numdims-1 && d1~=numdims && d2~=numdims swapd1=numdims; end if d2~=numdims-1 && d2~=numdims && swapd1~=numdims-1 swapd2=numdims-1; elseif d2~=numdims-1 && d2~=numdims && swapd1~=numdims swapd2=numdims; end %prepare for construction of selector tensor temp1=eye(numdims); permmatrix=temp1; permmatrix(:,d1)=temp1(:,swapd1); permmatrix(:,swapd1)=temp1(:,d1); permmatrix(:,d2)=temp1(:,swapd2); permmatrix(:,swapd2)=temp1(:,d2); selectordim=dim1*permmatrix; permvector=(1:numdims)*permmatrix; %construct selector tensor if numdims>3 selector = ipermute(outer(ones(selectordim(1:numdims-2)), ... eye(selectordim(numdims-1), ... selectordim(numdims)), ... 0), ... permvector); else %when numdims=3, the above line gives ndims(selector)=4. This %routine avoids that error. When used with GMDMP, numdims will be %at least 4, so this routine will be unnecessary. selector2=eye(selectordim(numdims-1), selectordim(numdims)); selector=zeros(selectordim); for j=1:selectordim(1) selector(j, :, :)=selector2; end selector=ipermute(selector, permvector); end %perform trace, discard resulting singleton dimensions tracedtensor=sum(sum(tensor1.*selector, d1), d2); tracedtensor=squeeze(tracedtensor); end %correction for abberation in squeeze function: %size(squeeze(rand(1,1,2)))=[2 1] nontracedimensions=dim1; nontracedimensions(d1)=[]; if d2>d1 nontracedimensions(d2-1)=[]; else nontracedimensions(d2)=[]; end tracedsize=size(tracedtensor); % Next line modified, Nicolas Boumal, April 30, 2012, such that % diagsum(A, 1, 2) would compute the trace of A, a 2D matrix. if length(tracedsize)==2 && tracedsize(2)==1 && ... (isempty(nontracedimensions) || tracedsize(1)~=nontracedimensions(1)) tracedtensor=tracedtensor.'; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/dlogm.m ================================================ function D = dlogm(X, H) % Frchet derivative of the matrix logarithm. % % function D = dlogm(X, H) % % Computes the directional derivative (the Frchet derivative) of logm at X % along H (square matrices). % % Thus, D = lim_(t -> 0) (logm(X + tH) - logm(X)) / t. % % See also: dfunm dexpm dsqrtm % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, July 3, 2015. % Contributors: % Change log: D = dfunm(@logm, X, H); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/dsqrtm.m ================================================ function D = dsqrtm(X, H) % Frchet derivative of the matrix square root. % % function D = dsqrtm(X, H) % % Computes the directional derivative (the Frchet derivative) of sqrtm at % X along H (square matrices). % % Thus, D = lim_(t -> 0) (sqrtm(X + tH) - sqrtm(X)) / t. % % See also: dfunm dlogm dexpm % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, July 3, 2015. % Contributors: % Change log: D = dfunm(@sqrtm, X, H); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/grammatrix.m ================================================ function G = grammatrix(M, x, vectors) % Computes the Gram matrix of tangent vectors in the Manopt framework. % % function G = grammatrix(M, x, vectors) % % M is a Manopt manifold structure obtained from a factory. % x is a point on the manifold M. % vectors is a cell containing n tangent vectors at x. % % G is an n-by-n symmetric positive semidefinite matrix such that G(i, j) % is the inner product between vectors{i} and vectors{j}, with respect to % the metric on the tangent space to M at x. % % See also: orthogonalize tangentorthobasis % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, April 28, 2016. % Contributors: % Change log: n = numel(vectors); G = zeros(n); for i = 1 : n vi = vectors{i}; G(i, i) = M.inner(x, vi, vi); for j = (i+1) : n vj = vectors{j}; G(i, j) = M.inner(x, vi, vj); % Manopt is designed to work with real inner products, % but it does not hurt to allow for complex inner products % here by taking the conjugate. G(j, i) = G(i, j)'; end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/hashmd5.m ================================================ function h = hashmd5(inp) % Computes the MD5 hash of input data. % % function h = hashmd5(inp) % % Returns a string containing the MD5 hash of the input variable. The input % variable may be of any class that can be typecast to uint8 format, which % is fairly non-restrictive. % This file is part of Manopt: www.manopt.org. % This code is a stripped version of more general hashing code by % Michael Kleder, Nov 2005. % Change log: % % Aug. 8, 2013 (NB): % Made x a static (persistent) variable, in the hope it will speed % it up. Furthermore, the function is now Octave compatible. is_octave = exist('OCTAVE_VERSION', 'builtin'); persistent x; if isempty(x) && ~is_octave x = java.security.MessageDigest.getInstance('MD5'); end inp=inp(:); % Convert strings and logicals into uint8 format if ischar(inp) || islogical(inp) inp=uint8(inp); else % Convert everything else into uint8 format without loss of data inp=typecast(inp,'uint8'); end % Create hash if ~is_octave x.update(inp); h = typecast(x.digest, 'uint8'); h = dec2hex(h)'; % Remote possibility: all hash bytes < 128, so pad: if(size(h,1))==1 h = [repmat('0',[1 size(h,2)]);h]; end h = lower(h(:)'); else h = md5sum(char(inp'), true); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/hessianextreme.m ================================================ function [y, lambda, info] = hessianextreme(problem, x, side, y0, options, storedb, key) % Compute an extreme eigenvector / eigenvalue of the Hessian of a problem. % % [u, lambda, info] = hessianextreme(problem, x) % [u, lambda, info] = hessianextreme(problem, x, side) % [u, lambda, info] = hessianextreme(problem, x, side, u0) % [u, lambda, info] = hessianextreme(problem, x, side, u0, options) % [u, lambda, info] = hessianextreme(problem, x, side, u0, options, storedb) % [u, lambda, info] = hessianextreme(problem, x, side, u0, options, storedb, key) % % (For side, u0 and options, pass [] to omit any.) % % Given a Manopt problem structure and a point x on the manifold problem.M, % this function computes a tangent vector u at x of unit norm such that the % Hessian quadratic form is minimized or maximized: % % minimize or maximize such that = 1, % % where <.,.> is the Riemannian metric on the tangent space at x. Choose % between minimizing and maximizing by setting side = 'min' or 'max', with % 'min' being the default. The value attained is returned as lambda, and % is the minimal or maximal eigenvalue of the Hessian (actually, the last % value attained when the solver stopped). This is a real number since the % Hessian is a symmetric operator. % % If u0 is specified, it should be a unit-norm tangent vector at x. It is % then used as initial guess to solve the above problem. Pass [] to omit. % % The options structure, if provided, will be passed along to manoptsolve. % As such, you may choose which solver to use to solve the above % optimization problem by setting options.solver. See manoptsolve's help. % The other options will be passed along to the chosen solver too. % Pass [] to omit. % % Often times, it is only necessary to compute a vector u such that the % quadratic form is negative, if that is at all possible. To do so, set the % following stopping criterion: options.tolcost = -1e-10; (for example) % and side = 'min'. The solver will return as soon as the quadratic cost % defined above drops below the set value (or sooner if another stopping % criterion triggers first.) % % storedb is a StoreDB object, key is the StoreDB key to point x. % % info is the info struct-array returned by the solver. % % See also: hessianspectrum manoptsolve tangentspherefactory % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Aug. 13, 2014. % Contributors: % Change log: % % April 3, 2015 (NB): % Works with the new StoreDB class system. % % May 7, 2015 (NB): % Default solver options: verbosity = 0 and defaults to trustregions. % % Nov 27, 2015 (NB): % The function now also returns the info struct-array. % By default, minimize if ~exist('side', 'var') || isempty(side) side = 'min'; end % If no initial guess was specified, prepare the empty one. if ~exist('y0', 'var') y0 = []; end % Merge default solver options with potential user-specified options. % Set local defaults here localdefaults.verbosity = 0; localdefaults.solver = @trustregions; if ~exist('options', 'var') || isempty(options) options = struct(); end options = mergeOptions(localdefaults, options); % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end % Convert the side into a sign. % Since Manopt minimizes, 'min' asks for no sign change. switch lower(side) case 'min' sign = +1; case 'max' sign = -1; otherwise error('The side should be either ''min'' or ''max''.'); end % We define a manifold that is actually the unit sphere on the tangent % space to problem.M at x. A generalization would be to consider % Stiefel or Grassmann on the tangent space, but this would require % manipulating collections of tangent vectors, which in full generality % may be more complex (from a programming point of view). % Points are represented as tangent vectors of unit norm. % Tangent vectors are represented as tangent vectors orthogonal to the % root point, with respect to the Riemannian metric on the tangent % space. % M is the original manifold. x is a point on M. M = problem.M; % N is the manifold we build. y will be a point on N, thus also a % tangent vector to M at x. This is a typical Riemannian submanifold of % a Euclidean space, hence it is easy to describe in terms of the tools % available for M. N = tangentspherefactory(M, x); % It is usually a good idea to force a gradient computation to make % sure precomputable things are precomputed. if canGetGradient(problem) [unused1, unused2] = getCostGrad(problem, x, storedb, key); %#ok end % This is the star operator of this party. hessian = @(y) getHessian(problem, x, y, storedb, key); % Start a Manopt problem structure for the quadratic optimization % problem on the sphere N. new_problem.M = N; % Define the cost function, its gradient and its Hessian. new_problem.cost = @cost; function [f, store] = cost(y, store) store = prepare(y, store); f = sign*store.f; end new_problem.grad = @grad; function [g, store] = grad(y, store) store = prepare(y, store); g = N.lincomb(y, sign*2, store.Hy, sign*(-2)*store.f, y); end new_problem.hess = @hess; function [h, store] = hess(y, ydot, store) store = prepare(y, store); Hydot = hessian(ydot); h = N.lincomb(y, sign*2, Hydot, sign*(-2)*store.f, ydot); h = N.proj(y, h); end % This helper makes sure we do not duplicate Hessian computations. function store = prepare(y, store) if ~isfield(store, 'ready') Hy = hessian(y); store.f = M.inner(x, y, Hy); store.Hy = Hy; store.ready = true; end end % Call a Manopt solver to solve the quadratic optimization problem on % the abstract sphere N. [y, lambda, info] = manoptsolve(new_problem, y0, options); lambda = sign*lambda; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/hessianmatrix.m ================================================ function [H, basis] = hessianmatrix(problem, x, basis) % Computes a matrix which represents the Hessian in some tangent basis. % % [H, basis] = hessianmatrix(problem, x) % [H, basis] = hessianmatrix(problem, x, basis) % % problem is a Manopt problem structure with a manifold and cost function. % x is a point on the manifold problem.M. % basis (optional) is an orthonormal basis for the tangent space to the % manifold at x. If no basis is supplied, one will be generated at random. % If the basis spans only a subspace of the tangent space at x, % then the returned matrix represents the Hessian restricted to that subspace. % % H is an n-by-n symmetric matrix (with n the number of vectors in the basis) % such that H(i, j) is the inner product between basis{i} % and Hess(basis{j}), with respect to the metric on the tangent space to % problem.M at x, where Hess(basis{j}) is the vector obtained after % applying the Hessian at x to basis{j}. % % For optimization, it is usually not useful to compute the Hessian matrix, % as this quickly becomes expensive. This tool is provided mostly for % exploration and debugging rather than to be used algorithmically in % solvers. To access the spectrum of the Hessian, it may be more practical % to call hessianextreme or hessianspectrum. This should coincide with eig(H). % % % Example of equivalence: % % Hu = getHessian(problem, x, u) % % is equivalent to (but much faster than): % % B = tangentorthobasis(M, x); % H = hessianmatrix(problem, x, B); % u_vec = tangent2vec(M, x, B, u); % Hu_vec = H*u_vec; % Hu = lincomb(M, x, B, Hu_vec); % % Note that there will be some error due to numerical round-off. % % % See also: hessianspectrum hessianextreme tangentorthobasis orthogonalize tangent2vec % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, July 14, 2016. % Contributors: % Change log: % No warning if an approximate Hessian is available, as then the user % is presumably aware of what they are doing. if ~canGetHessian(problem) && ~canGetApproxHessian(problem) warning('manopt:hessianmatrix:nohessian', ... ['The Hessian appears to be unavailable.\n' ... 'Will try to use an approximate Hessian instead.\n'... 'Since this approximation may not be linear or '... 'symmetric,\nthe computation might fail and the '... 'results (if any)\nmight make no sense.']); end % Unless an orthonormal basis for the tangent space at x is provided, % pick a random one. if ~exist('basis', 'var') || isempty(basis) n = problem.M.dim(); basis = tangentorthobasis(problem.M, x, n); else n = numel(basis); end % Create a store database and get a key for x storedb = StoreDB(1); key = storedb.getNewKey(); % Apply the Hessian at x to each basis vector Hbasis = cell(n, 1); for k = 1 : numel(Hbasis) Hbasis{k} = getHessian(problem, x, basis{k}, storedb, key); end % H is the matrix which contains the inner products of % the ((basis vectors)) with the ((Hessian applied to basis vectors)). H = zeros(n); for i = 1 : n H(i, i) = problem.M.inner(x, basis{i}, Hbasis{i}); for j = (i+1) : n H(i, j) = problem.M.inner(x, basis{i}, Hbasis{j}); H(j, i) = H(i, j); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/hessianspectrum.m ================================================ function lambdas = hessianspectrum(problem, x, usepreconstr, storedb, key) % Returns the eigenvalues of the (preconditioned) Hessian at x. % % function lambdas = hessianspectrum(problem, x) % function lambdas = hessianspectrum(problem, x, useprecon) % function lambdas = hessianspectrum(problem, x, useprecon, storedb) % function lambdas = hessianspectrum(problem, x, useprecon, storedb, key) % % If useprecon is not set, or if it is set to 'noprecon' (default), this % computes and returns the eigenvalues of the Hessian operator (which needs % to be symmetric but not necessarily definite) on the tangent space at x. % There are problem.M.dim() eigenvalues. Matlab's eigs is used internally. % % If useprecon is set to 'precon', the eigenvalues of the composition of % the Hessian with the preconditioner at x are computed: Precon o Hessian. % The preconditioner must have been defined in the problem structure and % has to be symmetric, positive definite. It is supposed to approximate the % inverse of the (Riemannian) Hessian. Ideally, the preconditioned Hessian % is better conditioned (smaller ratio of largest to smallest eigenvalue in % magnitude) than the non-preconditioned spectrum. The present tool can % help assess that. % % The typical ways to define a preconditioner are via problem.precon or % problem.sqrtprecon (see comment below). These should be function handles % with the same input/output system as problem.hess for the Hessian. % % If the Hessian is not available from the problem structure, an % approximate Hessian will be used. There are no guarantees of % interpretability, but this may nevertheless be useful at times. % % Even though the Hessian and the preconditioner are both symmetric, their % composition is not symmetric. This can slow down the call to 'eigs' % substantially. If possible, you may specify the square root of the % preconditioner in the problem structure, as sqrtprecon. This operator on % the tangent space at x must also be symmetric, positive definite, and % such that SqrtPrecon o SqrtPrecon = Precon. Then, the spectrum of the % symmetric operator SqrtPrecon o Hessian o SqrtPrecon is computed: it is % the same as the spectrum of Precon o Hessian, but is usually faster to % compute. If both Precon and SqrtPrecon are provided, only SqrtPrecon will % be used. % % The input and the output of the Hessian and of the preconditioner are % projected on the tangent space to avoid undesired contributions of the % ambient space. % % storedb is a StoreDB object, key is the StoreDB key to point x. % % Requires the manifold description in problem.M to have these functions: % % u_vec = vec(x, u_mat) : % Returns a column vector representation of the normal (usually % matrix) representation of the tangent vector u_mat. vec must be an % isometry between the tangent space (with its Riemannian metric) and % a subspace of R^n where n = length(u_vec), with the 2-norm on R^n. % In other words: it is an orthogonal projector. % % u_mat = mat(x, u_vec) : % The inverse of vec (its adjoint). % % u_mat_clean = tangent(x, u_mat) : % Subtracts from the tangent vector u_mat any component that would % make it "not really tangent", by projection. % % answer = vecmatareisometries() : % Returns true if the linear maps encoded by vec and mat are % isometries, false otherwise. It is better if the answer is yes. % % See also: hessianextreme canGetPrecon canGetSqrtPrecon % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, July 3, 2013. % Contributors: % Change log: % % Dec. 18, 2014 (NB): % The lambdas are now sorted when they are returned. % % April 3, 2015 (NB): % Works with the new StoreDB class system. % Does no longer accept sqrtprecon as an input: the square root of % the preconditioner may now be specified directly in the problem % structure, following the same syntax as the preconditioner precon. % % April 4, 2015 (NB): % By default, the spectrum is computed without the preconditioner's % effect, even if it is available. A new input option allows to % switch this behavior without the need to change the problem % structure. % Allow omission of the key, and even of storedb. if ~exist('key', 'var') if ~exist('storedb', 'var') storedb = StoreDB(); end key = storedb.getNewKey(); end % Manage the option to use or not use a preconditioner. % The input is a string. It is here transformed into a Boolean. if ~exist('usepreconstr', 'var') || isempty(usepreconstr) usepreconstr = 'noprecon'; end switch lower(usepreconstr) case 'noprecon' useprecon = false; case 'precon' useprecon = true; otherwise % A bit of legacy code heads up. if isa(usepreconstr, 'function_handle') warning('manopt:hessianspectrum:oldsyntax', ... ['This function no longer expects sqrtprecon ' ... 'as input. Place it in the problem structure.']); end error('Input useprecon must be either ''precon'' or ''noprecon''.'); end % No warning if an approximate Hessian is available, as then the user % is presumably aware of what they are doing. if ~canGetHessian(problem) && ~canGetApproxHessian(problem) warning('manopt:hessianspectrum:nohessian', ... ['The Hessian appears to be unavailable.\n' ... 'Will try to use an approximate Hessian instead.\n'... 'Since this approximation may not be linear or '... 'symmetric,\nthe computation might fail and the '... 'results (if any)\nmight make no sense.']); end vec = @(u_mat) problem.M.vec(x, u_mat); mat = @(u_vec) problem.M.mat(x, u_vec); tgt = @(u_mat) problem.M.tangent(x, u_mat); % n: size of a vectorized tangent vector % dim: dimension of the tangent space % necessarily, n >= dim. % The vectorized operators we build below will have at least n - dim % zero eigenvalues. n = length(vec(problem.M.zerovec(x))); dim = problem.M.dim(); % It is usually a good idea to force a gradient computation to make % sure precomputable things are precomputed. if canGetGradient(problem) [unused1, unused2] = getCostGrad(problem, x, storedb, key); %#ok end hess = @(u_mat) tgt(getHessian(problem, x, tgt(u_mat), storedb, key)); hess_vec = @(u_vec) vec(hess(mat(u_vec))); % Regardless of preconditioning, we can only have a symmetric % eigenvalue problem if the vec/mat pair of the manifold is an % isometry: vec_mat_are_isometries = problem.M.vecmatareisometries(); if ~useprecon % No preconditioner to use: simply use the Hessian as is. eigs_opts.issym = vec_mat_are_isometries; eigs_opts.isreal = true; lambdas = eigs(hess_vec, n, dim, 'LM', eigs_opts); elseif canGetSqrtPrecon(problem) % There is a preconditioner, and we have its square root: deal with % the symmetric composition SqrtPrecon o Hessian o SqrtPrecon. sqrtprec = @(u_mat) tgt(getSqrtPrecon(problem, x, tgt(u_mat), storedb, key)); sqrtprec_vec = @(u_vec) vec(sqrtprec(mat(u_vec))); eigs_opts.issym = vec_mat_are_isometries; eigs_opts.isreal = true; lambdas = eigs(@(u_vec) ... sqrtprec_vec(hess_vec(sqrtprec_vec(u_vec))), ... n, dim, 'LM', eigs_opts); elseif canGetPrecon(problem) % There is a preconditioner, but we don't have its square root: % deal with the non-symmetric composition Precon o Hessian. prec = @(u_mat) tgt(getPrecon(problem, x, tgt(u_mat), storedb, key)); prec_vec = @(u_vec) vec(prec(mat(u_vec))); % prec_inv_vec = @(u_vec) pcg(prec_vec, u_vec); eigs_opts.issym = false; eigs_opts.isreal = true; lambdas = eigs(@(u_vec) prec_vec(hess_vec(u_vec)), ... n, dim, 'LM', eigs_opts); else error('No preconditioner is available in the problem structure.'); end lambdas = sort(lambdas); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/identify_linear_piece.m ================================================ function [range, poly] = identify_linear_piece(x, y, window_length) % Identify a segment of the curve (x, y) that appears to be linear. % % function [range poly] = identify_linear_piece(x, y, window_length) % % This function attempts to identify a contiguous segment of the curve % defined by the vectors x and y that appears to be linear. A line is fit % through the data over all windows of length window_length and the best % fit is retained. The output specifies the range of indices such that % x(range) is the portion over which (x, y) is the most linear and the % output poly specifies a first order polynomial that best fits (x, y) over % that range, following the usual matlab convention for polynomials % (highest degree coefficients first). % % See also: checkdiff checkgradient checkhessian % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, July 8, 2013. % Contributors: % Change log: residues = zeros(length(x)-window_length, 1); polys = zeros(2, length(residues)); for i = 1 : length(residues) range = i:(i+window_length); [poly, meta] = polyfit(x(range), y(range), 1); residues(i) = meta.normr; polys(:, i) = poly'; end [unused, best] = min(residues); %#ok range = best:(best+window_length); poly = polys(:, best)'; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/lincomb.m ================================================ function vec = lincomb(M, x, vecs, coeffs) % Computes a linear combination of tangent vectors in the Manopt framework. % % vec = lincomb(M, x, vecs, coeffs) % % M is a Manopt manifold structure obtained from a factory. % x is a point on the manifold M. % vecs is a cell containing n tangent vectors at x. % coeffs is a vector of length n % % vec is a tangent vector at x obtained as the linear combination % % vec = coeffs(1)*vecs{1} + ... + coeffs(n)*vecs{n} % % If vecs is an orthonormal basis, then tangent2vec is the inverse of % lincomb. % % See also: grammatrix orthogonalize tangentorthobasis tangent2vec % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, April 28, 2016. % Contributors: % Change log: n = numel(vecs); assert(numel(coeffs) == n); switch n case 0 vec = M.zerovec(x); case 1 vec = M.lincomb(x, coeffs(1), vecs{1}); otherwise vec = M.lincomb(x, coeffs(1), vecs{1}, coeffs(2), vecs{2}); for k = 3 : n vec = M.lincomb(x, 1, vec, coeffs(k), vecs{k}); end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/manoptsolve.m ================================================ function [x, cost, info, options] = manoptsolve(problem, x0, options) % Gateway helper function to call a Manopt solver, chosen in the options. % % function [x, cost, info, options] = manoptsolve(problem) % function [x, cost, info, options] = manoptsolve(problem, x0) % function [x, cost, info, options] = manoptsolve(problem, x0, options) % function [x, cost, info, options] = manoptsolve(problem, [], options) % % Depending on what is available in the Manopt problem structure, one of % the Manopt solvers will be called and the outputs passed along. It is % also possible to force the choice of a solver by specifying it in the % options structure. For example: % % options.solver = @trustregions; % % Simply specify a function handle to a Manopt solver. % % See also: trustregions conjugategradient steepestdescent % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Aug. 13, 2014. % Contributors: % Change log: % At the very least, we need a cost function. if ~canGetCost(problem) error('The problem structure must specify a cost function.'); end % Depending on the number of differentials available, pick a different % default solver. if ~canGetGradient(problem) localdefaults.solver = @neldermead; elseif ~canGetHessian(problem) localdefaults.solver = @conjugategradient; else localdefaults.solver = @trustregions; end % Merge local defaults with user options, if any. if ~exist('options', 'var') || isempty(options) options = struct(); end options = mergeOptions(localdefaults, options); % If no initial guess was specified, prepare the empty one. if ~exist('x0', 'var') x0 = []; end % Issue the actual call. [x, cost, info, options] = options.solver(problem, x0, options); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/matrixlincomb.m ================================================ function v = matrixlincomb(x, a1, d1, a2, d2) %#ok % Linear combination function for tangent vectors represented as matrices. % % function v = lincomb(x, a1, d1) % function v = lincomb(x, a1, d1, a2, d2) % % Given a point x, two tangent vectors d1 and d2 at x, and two real % coefficients a1 and a2, returns a tangent vector at x representing % a1*d1 + a2*d2, if d1 and d2 are represented as matrices (or more % generally as arrays in Matlab). % % If a2 and d2 are omitted, the returned tangent vector is a1*d1. % % The input x is actually unused. % % This function is a helper to define manifolds in Manopt. % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, July 2, 2015. % Contributors: % Change log: if nargin == 3 v = a1*d1; elseif nargin == 5 v = a1*d1 + a2*d2; else error('matrixlincomb takes either 3 or 5 inputs.'); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/multihconj.m ================================================ function b = multihconj(a, dim) %MULTIHCONJ Hermitian conjugating arrays of matrices. % B = MULTIHCONJ(A) is equivalent to B = MULTIHCONJ(A, DIM), where % DIM = 1. % % B = MULTIHCONJ(A, DIM) is equivalent to % B = PERMUTE(A, [1:DIM-1, DIM+1, DIM, DIM+2:NDIMS(A)]), where A is an % array containing N P-by-Q matrices along its dimensions DIM and DIM+1, % and B is an array containing the Q-by-P Hermitian conjugate (') of % those N matrices along the same dimensions. N = NUMEL(A) / (P*Q), i.e. % N is equal to the number of elements in A divided by the number of % elements in each matrix. % % % Example: % A 5-by-9-by-3-by-2 array may be considered to be a block array % containing ten 9-by-3 matrices along dimensions 2 and 3. In this % case, its size is so indicated: 5-by-(9-by-3)-by-2 or 5x(9x3)x2. % If A is ................ a 5x(9x3)x2 array of 9x3 matrices, % C = MULTIHCONJ(A, 2) is a 5x(3x9)x2 array of 3x9 matrices. % % See also MULTITRANSP MULTIHERM. % This file is part of Manopt: www.manopt.org. % Original author: Hiroyuki Sato, April 27, 2015. % Contributors: % Change log: % Setting DIM if not supplied. if nargin == 1, dim = 1; end % Transposing b = multitransp(a, dim); %Conjugating b = conj(b); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/multiherm.m ================================================ function Y = multiherm(X) % Returns the Hermitian parts of the matrices in the 3D matrix X % % function Y = multiherm(X) % % Y is a 3D matrix the same size as X. Each slice Y(:, :, i) is the % Hermitian part of the slice X(:, :, i). % % See also: multiprod multitransp multihconj multiscale multiskew % This file is part of Manopt: www.manopt.org. % Original author: Hiroyuki Sato, April 27, 2015. % Contributors: % Change log: Y = .5*(X + multihconj(X)); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/multiprod.m ================================================ function c = multiprod(a, b, idA, idB) % Multiplying 1-D or 2-D subarrays contained in two N-D arrays. % % C = MULTIPROD(A,B) is equivalent to C = MULTIPROD(A,B,[1 2],[1 2]) % C = MULTIPROD(A,B,[D1 D2]) is eq. to C = MULTIPROD(A,B,[D1 D2],[D1 D2]) % C = MULTIPROD(A,B,D1) is equival. to C = MULTIPROD(A,B,D1,D1) % % MULTIPROD performs multiple matrix products, with array expansion (AX) % enabled. Its first two arguments A and B are "block arrays" of any % size, containing one or more 1-D or 2-D subarrays, called "blocks" (*). % For instance, a 5x6x3 array may be viewed as an array containing five % 6x3 blocks. In this case, its size is denoted by 5x(6x3). The 1 or 2 % adjacent dimensions along which the blocks are contained are called the % "internal dimensions" (IDs) of the array (). % % 1) 2-D by 2-D BLOCK(S) (*) % C = MULTIPROD(A, B, [DA1 DA2], [DB1 DB2]) contains the products % of the PxQ matrices in A by the RxS matrices in B. [DA1 DA2] are % the IDs of A; [DB1 DB2] are the IDs of B. % % 2) 2-D by 1-D BLOCK(S) (*) % C = MULTIPROD(A, B, [DA1 DA2], DB1) contains the products of the % PxQ matrices in A by the R-element vectors in B. The latter are % considered to be Rx1 matrices. [DA1 DA2] are the IDs of A; DB1 is % the ID of B. % % 3) 1-D by 2-D BLOCK(S) (*) % C = MULTIPROD(A, B, DA1, [DB1 DB2]) contains the products of the % Q-element vectors in A by the RxS matrices in B. The vectors in A % are considered to be 1xQ matrices. DA1 is the ID of A; [DB1 DB2] % are the IDs of B. % % 4) 1-D BY 1-D BLOCK(S) (*) % (a) If either SIZE(A, DA1) == 1 or SIZE(B, DB1) == 1, or both, % C = MULTIPROD(A, B, DA1, DB1) returns products of scalars by % vectors, or vectors by scalars or scalars by scalars. % (b) If SIZE(A, DA1) == SIZE(B, DB1), % C = MULTIPROD(A, B, [0 DA1], [DB1 0]) or % C = MULTIPROD(A, B, DA1, DB1) virtually turns the vectors % contained in A and B into 1xP and Px1 matrices, respectively, % then returns their products, similar to scalar products. % Namely, C = DOT2(A, B, DA1, DB1) is equivalent to % C = MULTIPROD(CONJ(A), B, [0 DA1], [DB1 0]). % (c) Without limitations on the length of the vectors in A and B, % C = MULTIPROD(A, B, [DA1 0], [0 DB1]) turns the vectors % contained in A and B into Px1 and 1xQ matrices, respectively, % then returns their products, similar to outer products. % Namely, C = OUTER(A, B, DA1, DB1) is equivalent to % C = MULTIPROD(CONJ(A), B, [DA1 0], [0 DB1]). % % Common constraints for all syntaxes: % The external dimensions of A and B must either be identical or % compatible with AX rules. The internal dimensions of each block % array must be adjacent (DA2 == DA1 + 1 and DB2 == DB1 + 1 are % required). DA1 and DB1 are allowed to be larger than NDIMS(A) and % NDIMS(B). In syntaxes 1, 2, and 3, Q == R is required, unless the % blocks in A or B are scalars. % % Array expansion (AX): % AX is a powerful generalization to N-D of the concept of scalar % expansion. Indeed, A and B may be scalars, vectors, matrices or % multi-dimensional arrays. Scalar expansion is the virtual % replication or annihilation of a scalar which allows you to combine % it, element by element, with an array X of any size (e.g. X+10, % X*10, or []-10). Similarly, in MULTIPROD, the purpose of AX is to % automatically match the size of the external dimensions (EDs) of A % and B, so that block-by-block products can be performed. ED matching % is achieved by means of a dimension shift followed by a singleton % expansion: % 1) Dimension shift (see SHIFTDIM). % Whenever DA1 ~= DB1, a shift is applied to impose DA1 == DB1. % If DA1 > DB1, B is shifted to the right by DA1 - DB1 steps. % If DB1 > DA1, A is shifted to the right by DB1 - DA1 steps. % 2) Singleton expansion (SX). % Whenever an ED of either A or B is singleton and the % corresponding ED of the other array is not, the mismatch is % fixed by virtually replicating the array (or diminishing it to % length 0) along that dimension. % % MULTIPROD is a generalization for N-D arrays of the matrix % multiplication function MTIMES, with AX enabled. Vector inner, outer, % and cross products generalized for N-D arrays and with AX enabled are % performed by DOT2, OUTER, and CROSS2 (MATLAB Central, file #8782). % Elementwise multiplications (see TIMES) and other elementwise binary % operations with AX enabled are performed by BAXFUN (MATLAB Central, % file #23084). Together, these functions make up the "ARRAYLAB toolbox". % % Input and output format: % The size of the EDs of C is determined by AX. Block size is % determined as follows, for each of the above-listed syntaxes: % 1) C contains PxS matrices along IDs MAX([DA1 DA2], [DB1 DB2]). % 2) Array Block size ID(s) % ---------------------------------------------------- % A PxQ (2-D) [DA1 DA2] % B R (1-D) DB1 % C (a) P (1-D) MAX(DA1, DB1) % C (b) PxQ (2-D) MAX([DA1 DA2], [DB1 DB1+1]) % ---------------------------------------------------- % (a) The 1-D blocks in B are not scalars (R > 1). % (b) The 1-D blocks in B are scalars (R = 1). % 3) Array Block size ID(s) % ---------------------------------------------------- % A Q (1-D) DA1 % B RxS (2-D) [DB1 DB2] % C (a) S (1-D) MAX(DA1, DB1) % C (b) RxS (2-D) MAX([DA1 DA1+1], [DB1 DB2]) % ---------------------------------------------------- % (a) The 1-D blocks in A are not scalars (Q > 1). % (b) The 1-D blocks in A are scalars (Q = 1). % 4) Array Block size ID(s) % -------------------------------------------------------------- % (a) A P (1-D) DA1 % B Q (1-D) DB1 % C MAX(P,Q) (1-D) MAX(DA1, DB1) % -------------------------------------------------------------- % (b) A P (1-D) DA1 % B P (1-D) DB1 % C 1 (1-D) MAX(DA1, DB1) % -------------------------------------------------------------- % (c) A P (1-D) DA1 % B Q (1-D) DB1 % C PxQ (2-D) MAX([DA1 DA1+1], [DB1 DB1+1]) % -------------------------------------------------------------- % % Terminological notes: % (*) 1-D and 2-D blocks are generically referred to as "vectors" and % "matrices", respectively. However, both may be also called % "scalars" if they have a single element. Moreover, matrices with a % single row or column (e.g. 1x3 or 3x1) may be also called "row % vectors" or "column vectors". % () Not to be confused with the "inner dimensions" of the two matrices % involved in a product X * Y, defined as the 2nd dimension of X and % the 1st of Y (DA2 and DB1 in syntaxes 1, 2, 3). % % Examples: % 1) If A is .................... a 5x(6x3)x2 array, % and B is .................... a 5x(3x4)x2 array, % C = MULTIPROD(A, B, [2 3]) is a 5x(6x4)x2 array. % % A single matrix A pre-multiplies each matrix in B % If A is ........................... a (1x3) single matrix, % and B is ........................... a 10x(3x4) 3-D array, % C = MULTIPROD(A, B, [1 2], [3 4]) is a 10x(1x4) 3-D array. % % Each matrix in A pre-multiplies each matrix in B (all possible % combinations) % If A is .................... a (6x3)x5 array, % and B is .................... a (3x4)x1x2 array, % C = MULTIPROD(A, B, [1 2]) is a (6x4)x5x2 array. % % 2a) If A is ........................... a 5x(6x3)x2 4-D array, % and B is ........................... a 5x(3)x2 3-D array, % C = MULTIPROD(A, B, [2 3], [2]) is a 5x(6)x2 3-D array. % % 2b) If A is ........................... a 5x(6x3)x2 4-D array, % and B is ........................... a 5x(1)x2 3-D array, % C = MULTIPROD(A, B, [2 3], [2]) is a 5x(6x3)x2 4-D array. % % 4a) If both A and B are .................. 5x(6)x2 3-D arrays, % C = MULTIPROD(A, B, 2) is .......... a 5x(1)x2 3-D array, while % 4b) C = MULTIPROD(A, B, [2 0], [0 2]) is a 5x(6x6)x2 4-D array % % See also DOT2, OUTER, CROSS2, BAXFUN, MULTITRANSP, MULTITRACE, MULTISCALE. % $ Version: 2.1 $ % CODE by: Paolo de Leva % (Univ. of Rome, Foro Italico, IT) 2009 Jan 24 % optimized by: Paolo de Leva % Jinhui Bai (Georgetown Univ., D.C.) 2009 Jan 24 % COMMENTS by: Paolo de Leva 2009 Feb 24 % OUTPUT tested by: Paolo de Leva 2009 Feb 24 % ------------------------------------------------------------------------- assert(nargin >= 2 && nargin <= 4, 'Takes from 2 to 4 inputs.'); switch nargin % Setting IDA and/or IDB case 2, idA = [1 2]; idB = [1 2]; case 3, idB = idA; end % ESC 1 - Special simple case (both A and B are 2D), solved using C = A * B if ndims(a)==2 && ndims(b)==2 && ... isequal(idA,[1 2]) && isequal(idB,[1 2]) c = a * b; return end % MAIN 0 - Checking and evaluating array size, block size, and IDs sizeA0 = size(a); sizeB0 = size(b); [sizeA, sizeB, shiftC, delC, sizeisnew, idA, idB, ... squashOK, sxtimesOK, timesOK, mtimesOK, sumOK] = ... sizeval(idA,idB, sizeA0,sizeB0); % MAIN 1 - Applying dimension shift (first step of AX) and % turning both A and B into arrays of either 1-D or 2-D blocks if sizeisnew(1), a = reshape(a, sizeA); end if sizeisnew(2), b = reshape(b, sizeB); end % MAIN 2 - Performing products with or without SX (second step of AX) if squashOK % SQUASH + MTIMES (fastest engine) c = squash2D_mtimes(a,b, idA,idB, sizeA,sizeB, squashOK); elseif timesOK % TIMES (preferred w.r. to SX + TIMES) if sumOK, c = sum(a .* b, sumOK); else c = a .* b; end elseif sxtimesOK % SX + TIMES if sumOK, c = sum(bsxfun(@times, a, b), sumOK); else c = bsxfun(@times, a, b); end elseif mtimesOK % MTIMES (rarely used) c = a * b; end % MAIN 3 - Reshaping C (by inserting or removing singleton dimensions) [sizeC sizeCisnew] = adjustsize(size(c), shiftC, false, delC, false); if sizeCisnew, c = reshape(c, sizeC); end function c = squash2D_mtimes(a, b, idA, idB, sizeA, sizeB, squashOK) % SQUASH2D_MTIMES Multiproduct with single-block expansion (SBX). % Actually, no expansion is performed. The multi-block array is % rearranged from N-D to 2-D, then MTIMES is applied, and eventually the % result is rearranged back to N-D. No additional memory is required. % One and only one of the two arrays must be single-block, and its IDs % must be [1 2] (MAIN 1 removes leading singletons). Both arrays % must contain 2-D blocks (MAIN 1 expands 1-D blocks to 2-D). if squashOK == 1 % A is multi-block, B is single-block (squashing A) % STEP 1 - Moving IDA(2) to last dimension nd = length(sizeA); d2 = idA(2); order = [1:(d2-1) (d2+1):nd d2]; % Partial shifting a = permute(a, order); % ...xQ % STEP 2 - Squashing A from N-D to 2-D q = sizeB(1); s = sizeB(2); lengthorder = length(order); collapsedsize = sizeA(order(1:lengthorder-1)); n = prod(collapsedsize); a = reshape(a, [n, q]); % NxQ fullsize = [collapsedsize s]; % Size to reshape C back to N-D else % B is multi-block, A is single-block (squashing B) % STEP 1 - Moving IDB(1) to first dimension nd = length(sizeB); d1 = idB(1); order = [d1 1:(d1-1) (d1+1):nd]; % Partial shifting b = permute(b, order); % Qx... % STEP 2 - Squashing B from N-D to 2-D p = sizeA(1); q = sizeA(2); lengthorder = length(order); collapsedsize = sizeB(order(2:lengthorder)); n = prod(collapsedsize); b = reshape(b, [q, n]); % QxN fullsize = [p collapsedsize]; % Size to reshape C back to N-D end % FINAL STEPS - Multiplication, reshape to N-D, inverse permutation invorder(order) = 1 : lengthorder; c = permute (reshape(a*b, fullsize), invorder); function [sizeA, sizeB, shiftC, delC, sizeisnew, idA, idB, ... squashOK, sxtimesOK, timesOK, mtimesOK, sumOK] = ... sizeval(idA0,idB0, sizeA0,sizeB0) %SIZEVAL Evaluation of array size, block size, and IDs % Possible values for IDA and IDB: % [DA1 DA2], [DB1 DB2] % [DA1 DA2], [DB1] % [DA1], [DB1 DB2] % [DA1], [DB1] % [DA1 0], [0 DB1] % [0 DA1], [DB1 0] % % sizeA/B Equal to sizeA0/B0 if RESHAPE is not needed in MAIN 1 % shiftC, delC Variables controlling MAIN 3. % sizeisnew 1x2 logical array; activates reshaping of A and B. % idA/B May change only if squashOK ~= 0 % squashOK If only A or B is a multi-block array (M-B) and the other % is single-block (1-B), it will be rearranged from N-D to % 2-D. If both A and B are 1-B or M-B arrays, squashOK = 0. % If only A (or B) is a M-B array, squashOK = 1 (or 2). % sxtimesOK, timesOK, mtimesOK Flags controlling MAIN 2 (TRUE/FALSE). % sumOK Dimension along which SUM is performed. If SUM is not % needed, sumOK = 0. % Initializing output arguments idA = idA0; idB = idB0; squashOK = 0; sxtimesOK = false; timesOK = false; mtimesOK = false; sumOK = 0; shiftC = 0; delC = 0; % Checking for gross input errors NidA = numel(idA); NidB = numel(idB); idA1 = idA(1); idB1 = idB(1); if NidA>2 || NidB>2 || NidA==0 || NidB==0 || ... ~isreal(idA1) || ~isreal(idB1) || ... ~isnumeric(idA1) || ~isnumeric(idB1) || ... 0>idA1 || 0>idB1 || ... % negative idA1~=fix(idA1) || idB1~=fix(idB1) || ... % non-integer ~isfinite(idA1) || ~isfinite(idB1) % Inf or NaN error('MULTIPROD:InvalidDimensionArgument', ... ['Internal-dimension arguments (e.g., [IDA1 IDA2]) must\n', ... 'contain only one or two non-negative finite integers']); end % Checking Syntaxes containing zeros (4b/c) declared_outer = false; idA2 = idA(NidA); % It may be IDA1 = IDA2 (1-D block) idB2 = idB(NidB); if any(idA==0) || any(idB==0) % "Inner products": C = MULTIPROD(A, B, [0 DA1], [DB1 0]) if idA1==0 && idA2>0 && idB1>0 && idB2==0 idA1 = idA2; idB2 = idB1; % "Outer products": C = MULTIPROD(A, B, [DA1 0], [0 DB1]) elseif idA1>0 && idA2==0 && idB1==0 && idB2>0 declared_outer = true; idA2 = idA1; idB1 = idB2; else error('MULTIPROD:InvalidDimensionArgument', ... ['Misused zeros in the internal-dimension arguments\n', ... '(see help heads 4b and 4c)']); end NidA = 1; NidB = 1; idA = idA1; idB = idB1; elseif (NidA==2 && idA2~=idA1+1) || ... % Non-adjacent IDs (NidB==2 && idB2~=idB1+1) error('MULTIPROD:InvalidDimensionArgument', ... ['If an array contains 2-D blocks, its two internal dimensions', ... 'must be adjacent (e.g. IDA2 == IDA1+1)']); end % ESC - Case for which no reshaping is needed (both A and B are scalars) scalarA = isequal(sizeA0, [1 1]); scalarB = isequal(sizeB0, [1 1]); if scalarA && scalarB sizeA = sizeA0; sizeB = sizeB0; sizeisnew = [false false]; timesOK = true; return end % Computing and checking adjusted sizes % The lengths of ADJSIZEA and ADJSIZEB must be >= IDA(END) and IDB(END) NsA = idA2 - length(sizeA0); % Number of added trailing singletons NsB = idB2 - length(sizeB0); adjsizeA = [sizeA0 ones(1,NsA)]; adjsizeB = [sizeB0 ones(1,NsB)]; extsizeA = adjsizeA([1:idA1-1, idA2+1:end]); % Size of EDs extsizeB = adjsizeB([1:idB1-1, idB2+1:end]); p = adjsizeA(idA1); q = adjsizeA(idA2); r = adjsizeB(idB1); s = adjsizeB(idB2); scalarsinA = (p==1 && q==1); scalarsinB = (r==1 && s==1); singleA = all(extsizeA==1); singleB = all(extsizeB==1); if q~=r && ~scalarsinA && ~scalarsinB && ~declared_outer error('MULTIPROD:InnerDimensionsMismatch', ... 'Inner matrix dimensions must agree.'); end % STEP 1/3 - DIMENSION SHIFTING (FIRST STEP OF AX) % Pipeline 1 (using TIMES) never needs left, and may need right shifting. % Pipeline 2 (using MTIMES) may need left shifting of A and right of B. shiftA = 0; shiftB = 0; diffBA = idB1 - idA1; if scalarA % Do nothing elseif singleA && ~scalarsinB, shiftA = -idA1 + 1; % Left shifting A elseif idB1 > idA1, shiftA = diffBA; % Right shifting A end if scalarB % Do nothing elseif singleB && ~scalarsinA, shiftB = -idB1 + 1; % Left shifting B elseif idA1 > idB1, shiftB = -diffBA; % Right shifting B end % STEP 2/3 - SELECTION OF PROPER ENGINE AND BLOCK SIZE ADJUSTMENTS addA = 0; addB = 0; delA = 0; delB = 0; swapA = 0; swapB = 0; idC1 = max(idA1, idB1); idC2 = idC1 + 1; checktimes = false; if (singleA||singleB) &&~scalarsinA &&~scalarsinB % Engine using MTIMES if singleA && singleB mtimesOK = true; shiftC=idC1-1; % Right shifting C idC1=1; idC2=2; elseif singleA squashOK = 2; idB = [idB1, idB1+1] + shiftB; else % singleB squashOK = 1; idA = [idA1, idA1+1] + shiftA; end if NidA==2 && NidB==2 % 1) 2-D BLOCKS BY 2-D BLOCKS % OK elseif NidA==2 % 2) 2-D BLOCKS BY 1-D BLOCKS addB=idB1+1; delC=idC2; elseif NidB==2 % 3) 1-D BLOCKS BY 2-D BLOCKS addA=idA1; delC=idC1; else % 4) 1-D BLOCKS BY 1-D BLOCKS if declared_outer addA=idA1+1; addB=idB1; else addA=idA1; addB=idB1+1; delC=idC2; end end else % Engine using TIMES (also used if SCALARA || SCALARB) sxtimesOK = true; if NidA==2 && NidB==2 % 1) 2-D BLOCKS BY 2-D BLOCKS if scalarA || scalarB timesOK=true; elseif scalarsinA && scalarsinB % scal-by-scal checktimes=true; elseif scalarsinA || scalarsinB || ... % scal-by-mat (q==1 && r==1) % vec-by-vec ("outer") elseif p==1 && s==1 % vec-by-vec ("inner") swapA=idA1; sumOK=idC1; checktimes=true; elseif s==1 % mat-by-vec swapB=idB1; sumOK=idC2; elseif p==1 % vec-by-mat swapA=idA1; sumOK=idC1; else % mat-by-mat addA=idA2+1; addB=idB1; sumOK=idC2; delC=idC2; end elseif NidA==2 % 2) 2-D BLOCKS BY 1-D BLOCKS if scalarA || scalarB timesOK=true; elseif scalarsinA && scalarsinB % scal-by-scal addB=idB1; checktimes=true; elseif scalarsinA % scal-by-vec delA=idA1; elseif scalarsinB % mat-by-scal addB=idB1; elseif p==1 % vec-by-vec ("inner") delA=idA1; sumOK=idC1; checktimes=true; else % mat-by-vec addB=idB1; sumOK=idC2; delC=idC2; end elseif NidB==2 % 3) 1-D BLOCKS BY 2-D BLOCKS if scalarA || scalarB timesOK=true; elseif scalarsinA && scalarsinB % scal-by-scal addA=idA1+1; checktimes=true; elseif scalarsinB % vec-by-scal delB=idB2; elseif scalarsinA % scal-by-mat addA=idA1+1; elseif s==1 % vec-by-vec ("inner") delB=idB2; sumOK=idC1; checktimes=true; else % vec-by-mat addA=idA1+1; sumOK=idC1; delC=idC1; end else % 4) 1-D BLOCKS BY 1-D BLOCKS if scalarA || scalarB timesOK=true; elseif declared_outer % vec-by-vec ("outer") addA=idA1+1; addB=idB1; elseif scalarsinA && scalarsinB % scal-by-scal checktimes=true; elseif scalarsinA || scalarsinB % vec-by-scal else % vec-by-vec sumOK=idC1; checktimes=true; end end end % STEP 3/3 - Adjusting the size of A and B. The size of C is adjusted % later, because it is not known yet. [sizeA, sizeisnew(1)] = adjustsize(sizeA0, shiftA, addA, delA, swapA); [sizeB, sizeisnew(2)] = adjustsize(sizeB0, shiftB, addB, delB, swapB); if checktimes % Faster than calling BBXFUN diff = length(sizeB) - length(sizeA); if isequal([sizeA ones(1,diff)], [sizeB ones(1,-diff)]) timesOK = true; end end function [sizeA, sizeisnew] = adjustsize(sizeA0, shiftA, addA, delA, swapA) % ADJUSTSIZE Adjusting size of a block array. % Dimension shifting (by adding or deleting trailing singleton dim.) if shiftA>0, [sizeA,newA1] = addsing(sizeA0, 1, shiftA); elseif shiftA<0, [sizeA,newA1] = delsing(sizeA0, 1,-shiftA); else sizeA = sizeA0; newA1 = false; end % Modifying block size (by adding, deleting, or moving singleton dim.) if addA, [sizeA,newA2] = addsing(sizeA, addA+shiftA, 1); % 1D-->2D elseif delA, [sizeA,newA2] = delsing(sizeA, delA+shiftA, 1); % 2D-->1D elseif swapA, [sizeA,newA2] = swapdim(sizeA,swapA+shiftA); % ID Swapping else newA2 = false; end sizeisnew = newA1 || newA2; function [newsize, flag] = addsing(size0, dim, ns) %ADDSING Adding NS singleton dimensions to the size of an array. % Warning: NS is assumed to be a positive integer. % Example: If the size of A is ..... SIZE0 = [5 9 3] % NEWSIZE = ADDSING(SIZE0, 3, 2) is [5 9 1 1 3] if dim > length(size0) newsize = size0; flag = false; else newsize = [size0(1:dim-1), ones(1,ns), size0(dim:end)]; flag = true; end function [newsize, flag] = delsing(size0, dim, ns) %DELSING Removing NS singleton dimensions from the size of an array. % Warning: Trailing singletons are not removed % Example: If the size of A is SIZE0 = [1 1 1 5 9 3] % NEWSIZE = DELSING(SIZE, 1, 3) is [5 9 3] if dim > length(size0)-ns % Trailing singletons are not removed newsize = size0; flag = false; else % Trailing singl. added, so NEWSIZE is guaranteed to be 2D or more newsize = size0([1:dim-1, dim+ns:end, dim]); flag = true; end function [newsize, flag] = swapdim(size0, dim) %SWAPDIM Swapping two adjacent dimensions of an array (DIM and DIM+1). % Used only when both A and B are multi-block arrays with 2-D blocks. % Example: If the size of A is .......... 5x(6x3) % NEWSIZE = SWAPIDS(SIZE0, 2) is 5x(3x6) newsize = [size0 1]; % Guarantees that dimension DIM+1 exists. newsize = newsize([1:dim-1, dim+1, dim, dim+2:end]); flag = true; ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/multiprodmultitransp_license.txt ================================================ Copyright (c) 2009, Paolo de Leva All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/multiscale.m ================================================ function A = multiscale(scale, A) % Multiplies the 2D slices in a 3D matrix by individual scalars. % % function A = multiscale(scale, A) % % Given a vector scale of length N and a 3-dimensional matrix A of size % n-by-m-by-N, returns a matrix A of same size such that % A(:, :, k) := scale(k) * A(:, :, k); % % See also: multiprod multitransp multitrace % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: assert(ndims(A) <= 3, ... ['multiscale is only well defined for matrix arrays of 3 ' ... 'or less dimensions.']); [n, m, N] = size(A); assert(numel(scale) == N, ... ['scale must be a vector whose length equals the third ' ... 'dimension of A, that is, the number of 2D matrix slices ' ... 'in the 3D matrix A.']); scale = scale(:); A = reshape(bsxfun(@times, reshape(A, n*m, N), scale'), n, m, N); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/multiskew.m ================================================ function Y = multiskew(X) % Returns the skew-symmetric parts of the matrices in the 3D matrix X. % % function Y = multiskew(X) % % Y is a 3D matrix the same size as X. Each slice Y(:, :, i) is the % skew-symmetric part of the slice X(:, :, i). % % See also: multiprod multitransp multiscale multisym % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Jan. 31, 2013. % Contributors: % Change log: Y = .5*(X - multitransp(X)); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/multisqnorm.m ================================================ function sqnorm = multisqnorm(A) % Returns the squared Frobenius norms of the slices of a 3D matrix. % % function sqnorm = multisqnorm(A) % % Given a 3-dimensional matrix A of size n-by-m-by-N, returns a column % vector of length N such that sqnorm(i) = norm(A(:, :, i), 'fro')^2. % % See also: multiprod multitransp multitrace norms % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, June 17, 2015. % Contributors: % Change log: assert(ndims(A) <= 3, ... ['multisqnorm is only well defined for matrix arrays of 3 ' ... 'or less dimensions.']); [n, m, N] = size(A); % This is equivalent to squeeze(sum(norms(A, 2, 1).^2)), but faster. sqnorm = sum(reshape(A, n*m, N).^2, 1)'; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/multisym.m ================================================ function Y = multisym(X) % Returns the symmetric parts of the matrices in the 3D matrix X % % function Y = multisym(X) % % Y is a 3D matrix the same size as X. Each slice Y(:, :, i) is the % symmetric part of the slice X(:, :, i). % % See also: multiprod multitransp multiscale multiskew % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Jan. 31, 2013. % Contributors: % Change log: Y = .5*(X + multitransp(X)); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/multitrace.m ================================================ function tr = multitrace(A) % Computes the traces of the 2D slices in a 3D matrix. % % function tr = multitrace(A) % % For a 3-dimensional matrix A of size n-by-n-by-N, returns a column vector % tr of length N such that tr(k) = trace(A(:, :, k)); % % See also: multiprod multitransp multiscale % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: assert(ndims(A) <= 3, ... ['multitrace is only well defined for matrix arrays of 3 ' ... 'or less dimensions.']); tr = diagsum(A, 1, 2); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/multitransp.m ================================================ function b = multitransp(a, dim) % Transposing arrays of matrices. % % B = MULTITRANSP(A) is equivalent to B = MULTITRANSP(A, DIM), where % DIM = 1. % % B = MULTITRANSP(A, DIM) is equivalent to % B = PERMUTE(A, [1:DIM-1, DIM+1, DIM, DIM+2:NDIMS(A)]), where A is an % array containing N P-by-Q matrices along its dimensions DIM and DIM+1, % and B is an array containing the Q-by-P transpose (.') of those N % matrices along the same dimensions. N = NUMEL(A) / (P*Q), i.e. N is % equal to the number of elements in A divided by the number of elements % in each matrix. % % MULTITRANSP, PERMUTE and IPERMUTE are a generalization of TRANSPOSE % (.') for N-D arrays. % % Example: % A 5-by-9-by-3-by-2 array may be considered to be a block array % containing ten 9-by-3 matrices along dimensions 2 and 3. In this % case, its size is so indicated: 5-by-(9-by-3)-by-2 or 5x(9x3)x2. % If A is ................ a 5x(9x3)x2 array of 9x3 matrices, % C = MULTITRANSP(A, 2) is a 5x(3x9)x2 array of 3x9 matrices. % % See also PERMUTE, IPERMUTE, MULTIPROD, MULTITRACE, MULTISCALE. % $ Version: 1.0 $ % CODE by: Paolo de Leva (IUSM, Rome, IT) 2005 Sep 9 % COMMENTS by: Code author 2006 Nov 21 % OUTPUT tested by: Code author 2005 Sep 13 % ------------------------------------------------------------------------- % Setting DIM if not supplied. if nargin == 1, dim = 1; end % Transposing order = [1:dim-1, dim+1, dim, dim+2:ndims(a)]; b = permute(a, order); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/orthogonalize.m ================================================ function [orthobasis, L] = orthogonalize(M, x, basis) % Orthonormalizes a basis of tangent vectors in the Manopt framework. % % function [orthobasis, L] = orthogonalize(M, x, basis) % % M is a Manopt manifold structure obtained from a factory. % x is a point on the manifold M. % basis is a cell containing n linearly independent tangent vectors at x. % % orthobasis is a cell of same size as basis which contains an orthonormal % basis for the same subspace as that spanned by basis. Orthonormality is % assessed with respect to the metric on the tangent space to M at x. % L is upper triangular of size n x n if basis has n vectors, such that, % basis{k} = sum_j=1^k orthobasis{j} * L(j, k) (akin to R in a QR % factorization.) % % See also: grammatrix tangentorthobasis % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, April 28, 2016. % Contributors: % Change log: n = numel(basis); orthobasis = cell(size(basis)); % Build the Gram matrix of the basis vectors. G = grammatrix(M, x, basis); % If the vectors in 'basis' were the columns of V, and the inner % product were the classical dot product, then G = V'*V. We are looking % for R, an invertible matrix such that V*R is orthogonal. Thus, R % satisfies R'*V'*V*R = eye(n); equivalently: % G = inv(R)'*inv(R). % Computing a Cholesky factorization of G yields L such that G = L'*L. % Thus, R = inv(L). Each column of R states exactly which linear % combinations of the vectors in 'basis' must be computed to produce % the orthonormal basis. % % Of course, in that formalism, we could directly take a qr of V, but % in the actual setting V is not available; the only simple object % available is G. % % If this simple code turns out not to be satisfactory (most likely % because of numerical instability), it may be good to consider % implementing a modified Gram-Schmidt algorithm instead, and even to % provide a helper function which calls it twice. L = chol(G); R = inv(L); % Note that R is upper triangular. % We now compute the n linear combinations. for k = 1 : n orthobasis{k} = lincomb(M, x, basis(1:k), R(1:k, k)); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/plotprofile.m ================================================ function cost = plotprofile(problem, x, d, t) % Plot the cost function along a geodesic or a retraction path. % % function plotprofile(problem) % function plotprofile(problem, x) % function plotprofile(problem, x, d) % function plotprofile(problem, x, d, t) % function plotprofile(problem, x, [], t) % function plotprofile(problem, [], [], t) % % function costs = plotprofile(problem, x, d, t) % % Plot profile evaluates the cost function along a geodesic gamma(t) such % that gamma(0) = x and the derivative of gamma at 0 is the direction d. % The input t is a vector specifying for which values of t we must evaluate % f(gamma(t)) (it may include negative values). % % If the function is called with an output, the plot is not drawn and the % values of the cost are returned for the instants t. % % If x is omitted, a random point is picked. If d is omitted, a random % tangent vector at x is picked. If t is omitted, it is generated as a % linspace over [-1, 1]. % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Jan. 9, 2013. % Contributors: % Change log: % % April 3, 2015 (NB): % Works with the new StoreDB class system. % % Nov. 12, 2016 (NB): % Making more inputs optional. % Verify that the problem description is sufficient. if ~canGetCost(problem) error('It seems no cost was provided.'); end if ~exist('x', 'var') || isempty(x) x = problem.M.rand(); if exist('d', 'var') && ~isempty(d) error('If x is omitted, d should not be specified.'); end end if ~exist('d', 'var') || isempty(d) d = problem.M.randvec(x); end if ~exist('t', 'var') || isempty(t) t = linspace(-1, 1, 101); end if isfield(problem.M, 'exp') expo = problem.M.exp; str = 'Exp'; else expo = problem.M.retr; str = 'Retr'; end storedb = StoreDB(); linesearch_fun = @(t) getCost(problem, expo(x, d, t), storedb); cost = zeros(size(t)); for i = 1 : numel(t) cost(i) = linesearch_fun(t(i)); end if nargout == 0 plot(t, cost); xlabel('t'); ylabel(['f(' str '_x(t*d))']); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/powermanifold.m ================================================ function Mn = powermanifold(M, n) % Returns a structure describing a power manifold M^n = M x M x ... x M. % % function Mn = powermanifold(M, n) % % Input: a manifold structure M and an integer n >= 1. % % Output: a manifold structure Mn representing M x ... x M (n copies of M) % with the metric of M extended element-wise. Points and vectors are stored % as cells of size nx1. % % This code is for prototyping uses. The structures returned are often % inefficient representations of power manifolds owing to their use of % for-loops, but they should allow to rapidly try out an idea. % % Example (an inefficient representation of the oblique manifold (3, 10)): % Mn = powermanifold(spherefactory(3), 10) % disp(Mn.name()); % x = Mn.rand() % % See also: productmanifold % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % NB, July 4, 2013: Added support for vec, mat, tangent. % Added support for egrad2rgrad and ehess2rhess. assert(n >= 1, 'n must be an integer larger than or equal to 1.'); Mn.name = @() sprintf('[%s]^%d', M.name(), n); Mn.dim = @() n*M.dim(); Mn.inner = @inner; function val = inner(x, u, v) val = 0; for i = 1 : n val = val + M.inner(x{i}, u{i}, v{i}); end end Mn.norm = @(x, d) sqrt(Mn.inner(x, d, d)); Mn.dist = @dist; function d = dist(x, y) sqd = 0; for i = 1 : n sqd = sqd + M.dist(x{i}, y{i})^2; end d = sqrt(sqd); end Mn.typicaldist = @typicaldist; function d = typicaldist() sqd = 0; for i = 1 : n sqd = sqd + M.typicaldist()^2; end d = sqrt(sqd); end Mn.proj = @proj; function u = proj(x, u) for i = 1 : n u{i} = M.proj(x{i}, u{i}); end end Mn.tangent = @tangent; function u = tangent(x, u) for i = 1 : n u{i} = M.tangent(x{i}, u{i}); end end if isfield(M, 'tangent2ambient') Mn.tangent2ambient = @tangent2ambient; else Mn.tangent2ambient = @(x, u) u; end function u = tangent2ambient(x, u) for i = 1 : n u{i} = M.tangent2ambient(x{i}, u{i}); end end Mn.egrad2rgrad = @egrad2rgrad; function g = egrad2rgrad(x, g) for i = 1 : n g{i} = M.egrad2rgrad(x{i}, g{i}); end end Mn.ehess2rhess = @ehess2rhess; function h = ehess2rhess(x, eg, eh, h) for i = 1 : n h{i} = M.ehess2rhess(x{i}, eg{i}, eh{i}, h{i}); end end Mn.exp = @expo; function x = expo(x, u, t) if nargin < 3 t = 1.0; end for i = 1 : n x{i} = M.exp(x{i}, u{i}, t); end end Mn.retr = @retr; function x = retr(x, u, t) if nargin < 3 t = 1.0; end for i = 1 : n x{i} = M.retr(x{i}, u{i}, t); end end if isfield(M, 'log') Mn.log = @loga; end function u = loga(x, y) u = cell(n, 1); for i = 1 : n u{i} = M.log(x{i}, y{i}); end end Mn.hash = @hash; function str = hash(x) str = ''; for i = 1 : n str = [str M.hash(x{i})]; %#ok end str = ['z' hashmd5(str)]; end Mn.lincomb = @lincomb; function x = lincomb(x, a1, u1, a2, u2) if nargin == 3 for i = 1 : n x{i} = M.lincomb(x{i}, a1, u1{i}); end elseif nargin == 5 for i = 1 : n x{i} = M.lincomb(x{i}, a1, u1{i}, a2, u2{i}); end else error('Bad usage of powermanifold.lincomb'); end end Mn.rand = @rand; function x = rand() x = cell(n, 1); for i = 1 : n x{i} = M.rand(); end end Mn.randvec = @randvec; function u = randvec(x) u = cell(n, 1); for i = 1 : n u{i} = M.randvec(x{i}); end u = Mn.lincomb(x, 1/sqrt(n), u); end Mn.zerovec = @zerovec; function u = zerovec(x) u = cell(n, 1); for i = 1 : n u{i} = M.zerovec(x{i}); end end if isfield(M, 'transp') Mn.transp = @transp; end function u = transp(x1, x2, u) for i = 1 : n u{i} = M.transp(x1{i}, x2{i}, u{i}); end end if isfield(M, 'pairmean') Mn.pairmean = @pairmean; end function y = pairmean(x1, x2) y = cell(n, 1); for i = 1 : n y{i} = M.pairmean(x1{i}, x2{i}); end end % Compute the length of a vectorized tangent vector of M at x, assuming % this length is independent of the point x (that should be fine). if isfield(M, 'vec') rand_x = M.rand(); zero_u = M.zerovec(rand_x); len_vec = length(M.vec(rand_x, zero_u)); Mn.vec = @vec; if isfield(M, 'mat') Mn.mat = @mat; end end function u_vec = vec(x, u_mat) u_vec = zeros(len_vec, n); for i = 1 : n u_vec(:, i) = M.vec(x{i}, u_mat{i}); end u_vec = u_vec(:); end function u_mat = mat(x, u_vec) u_mat = cell(n, 1); u_vec = reshape(u_vec, len_vec, n); for i = 1 : n u_mat{i} = M.mat(x{i}, u_vec(:, i)); end end if isfield(M, 'vecmatareisometries') Mn.vecmatareisometries = M.vecmatareisometries; else Mn.vecmatareisometries = @() false; end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/productmanifold.m ================================================ function M = productmanifold(elements) % Returns a structure describing a product manifold M = M1 x M2 x ... x Mn. % % function M = productmanifold(elements) % % Input: an elements structure such that each field contains a manifold % structure. % % Output: a manifold structure M representing the manifold obtained by % taking the Cartesian product of the manifolds described in the elements % structure, with the metric obtainded by element-wise extension. Points % and vectors are stored as structures with the same fieldnames as in % elements. % % Example: % M = productmanifold(struct('X', spherefactory(3), 'Y', spherefactory(4))) % disp(M.name()); % x = M.rand() % % Points of M = S^2 x S^3 are represented as structures with two fields, X % and Y. The values associated to X are points of S^2, and likewise points % of S^3 for the field Y. Tangent vectors are also represented as % structures with two corresponding fields X and Y. % % See also: powermanifold % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 30, 2012. % Contributors: % Change log: % NB, July 4, 2013: Added support for vec, mat, tangent. % Added support for egrad2rgrad and ehess2rhess. % Modified hash function to make hash strings shorter. elems = fieldnames(elements); nelems = numel(elems); assert(nelems >= 1, ... 'elements must be a structure with at least one field.'); M.name = @name; function str = name() str = 'Product manifold: '; str = [str sprintf('[%s: %s]', ... elems{1}, elements.(elems{1}).name())]; for i = 2 : nelems str = [str sprintf(' x [%s: %s]', ... elems{i}, elements.(elems{i}).name())]; %#ok end end M.dim = @dim; function d = dim() d = 0; for i = 1 : nelems d = d + elements.(elems{i}).dim(); end end M.inner = @inner; function val = inner(x, u, v) val = 0; for i = 1 : nelems val = val + elements.(elems{i}).inner(x.(elems{i}), ... u.(elems{i}), v.(elems{i})); end end M.norm = @(x, d) sqrt(M.inner(x, d, d)); M.dist = @dist; function d = dist(x, y) sqd = 0; for i = 1 : nelems sqd = sqd + elements.(elems{i}).dist(x.(elems{i}), ... y.(elems{i}))^2; end d = sqrt(sqd); end M.typicaldist = @typicaldist; function d = typicaldist sqd = 0; for i = 1 : nelems sqd = sqd + elements.(elems{i}).typicaldist()^2; end d = sqrt(sqd); end M.proj = @proj; function v = proj(x, u) for i = 1 : nelems v.(elems{i}) = elements.(elems{i}).proj(x.(elems{i}), ... u.(elems{i})); end end M.tangent = @tangent; function v = tangent(x, u) for i = 1 : nelems v.(elems{i}) = elements.(elems{i}).tangent(x.(elems{i}), ... u.(elems{i})); end end M.tangent2ambient = @tangent2ambient; function v = tangent2ambient(x, u) for i = 1 : nelems if isfield(elements.(elems{i}), 'tangent2ambient') v.(elems{i}) = ... elements.(elems{i}).tangent2ambient( ... x.(elems{i}), u.(elems{i})); else v.(elems{i}) = u.(elems{i}); end end end M.egrad2rgrad = @egrad2rgrad; function g = egrad2rgrad(x, g) for i = 1 : nelems g.(elems{i}) = elements.(elems{i}).egrad2rgrad(... x.(elems{i}), g.(elems{i})); end end M.ehess2rhess = @ehess2rhess; function h = ehess2rhess(x, eg, eh, h) for i = 1 : nelems h.(elems{i}) = elements.(elems{i}).ehess2rhess(... x.(elems{i}), eg.(elems{i}), eh.(elems{i}), h.(elems{i})); end end M.exp = @exp; function y = exp(x, u, t) if nargin < 3 t = 1.0; end for i = 1 : nelems y.(elems{i}) = elements.(elems{i}).exp(x.(elems{i}), ... u.(elems{i}), t); end end M.retr = @retr; function y = retr(x, u, t) if nargin < 3 t = 1.0; end for i = 1 : nelems y.(elems{i}) = elements.(elems{i}).retr(x.(elems{i}), ... u.(elems{i}), t); end end M.log = @log; function u = log(x1, x2) for i = 1 : nelems u.(elems{i}) = elements.(elems{i}).log(x1.(elems{i}), ... x2.(elems{i})); end end M.hash = @hash; function str = hash(x) str = ''; for i = 1 : nelems str = [str elements.(elems{i}).hash(x.(elems{i}))]; %#ok end str = ['z' hashmd5(str)]; end M.lincomb = @lincomb; function v = lincomb(x, a1, u1, a2, u2) if nargin == 3 for i = 1 : nelems v.(elems{i}) = elements.(elems{i}).lincomb(x.(elems{i}), ... a1, u1.(elems{i})); end elseif nargin == 5 for i = 1 : nelems v.(elems{i}) = elements.(elems{i}).lincomb(x.(elems{i}), ... a1, u1.(elems{i}), a2, u2.(elems{i})); end else error('Bad usage of productmanifold.lincomb'); end end M.rand = @rand; function x = rand() for i = 1 : nelems x.(elems{i}) = elements.(elems{i}).rand(); end end M.randvec = @randvec; function u = randvec(x) for i = 1 : nelems u.(elems{i}) = elements.(elems{i}).randvec(x.(elems{i})); end u = M.lincomb(x, 1/sqrt(nelems), u); end M.zerovec = @zerovec; function u = zerovec(x) for i = 1 : nelems u.(elems{i}) = elements.(elems{i}).zerovec(x.(elems{i})); end end M.transp = @transp; function v = transp(x1, x2, u) for i = 1 : nelems v.(elems{i}) = elements.(elems{i}).transp(x1.(elems{i}), ... x2.(elems{i}), u.(elems{i})); end end M.pairmean = @pairmean; function y = pairmean(x1, x2) for i = 1 : nelems y.(elems{i}) = elements.(elems{i}).pairmean(x1.(elems{i}), ... x2.(elems{i})); end end % Gather the length of the column vector representations of tangent % vectors for each of the manifolds. Raise a flag if any of the base % manifolds has no vec function available. vec_available = true; vec_lens = zeros(nelems, 1); for ii = 1 : nelems Mi = elements.(elems{ii}); if isfield(Mi, 'vec') rand_x = Mi.rand(); zero_u = Mi.zerovec(rand_x); vec_lens(ii) = length(Mi.vec(rand_x, zero_u)); else vec_available = false; break; end end vec_pos = cumsum([1 ; vec_lens]); if vec_available M.vec = @vec; M.mat = @mat; end function u_vec = vec(x, u_mat) u_vec = zeros(vec_pos(end)-1, 1); for i = 1 : nelems range = vec_pos(i) : (vec_pos(i+1)-1); u_vec(range) = elements.(elems{i}).vec(x.(elems{i}), ... u_mat.(elems{i})); end end function u_mat = mat(x, u_vec) u_mat = struct(); for i = 1 : nelems range = vec_pos(i) : (vec_pos(i+1)-1); u_mat.(elems{i}) = elements.(elems{i}).mat(x.(elems{i}), ... u_vec(range)); end end vecmatareisometries = true; for ii = 1 : nelems if ~isfield(elements.(elems{ii}), 'vecmatareisometries') || ... ~elements.(elems{ii}).vecmatareisometries() vecmatareisometries = false; break; end end M.vecmatareisometries = @() vecmatareisometries; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/smallestinconvexhull.m ================================================ function [u_norm, coeffs, u] = smallestinconvexhull(M, x, U, tol) % Computes a minimal norm convex combination of given tangent vectors in Manopt. % % function [u_norm, coeffs, u] = smallestinconvexhull(M, x, U) % function [u_norm, coeffs, u] = smallestinconvexhull(M, x, U, tol) % % M is a manifold as returned by a Manopt factory. % x is a point on this manifold. % U is a cell containing N tangent vectors U{1} to U{N} at x. % tol (default: 1e-8): tolerance for solving the quadratic program. % % This function computes u, a tangent vector at x contained in the convex % hull spanned by the N vectors U{i}, with minimal norm (according to the % Riemannian metric on M). This is obtained by solving a convex quadratic % program involving the Gram matrix of the given tangent vectors. % The quadratic program is solved using Matlab's built-in quadprog, % which requires the optimization toolbox. If this toolbox is not % available, consider replacing with CVX for example. % % % u_norm is the norm of the smallest vector u. % coeffs is a vector of length N with entries in [0, 1] summing to 1. % u is the sought vector: u = coeffs(1)*U{1} + ... + coeffs(N)*U{N}. % % Nicolas Boumal, Feb. 19, 2013 % Modified April 6, 2016 to work with Manopt. % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, June 28, 2016. % Contributors: % Change log: % % June 28, 2016 (NB): % Adapted for Manopt from original code by same author (Feb. 19, 2013) % Example code: pick a manifold, a point, and a collection of tangent % vectors at that point, then get the smallest vector in the convex hull % of those: % % M = spherefactory(5); % x = M.rand(); % N = 3; % U = cell(N,1); % for k = 1 : N, U{k} = M.randvec(x); end % [u_norm, coeffs, u] = smallestinconvexhull(M, x, U) % We simply need to solve the following quadratic program: % minimize ||u||^2 such that u = sum_i s_i U_i, 0 <= s_i <= 1 % and sum_i s_i = 1 % % This is equivalent to solving: % min s'*G*s s.t. 0 <= s <= 1, s'*ones = 1, with G(i, j) = (Gram matrix) % Then our solution is s_1 U_1 + ... + s_N U_N. % Compute the Gram matrix of the given tangent vectors N = numel(U); G = grammatrix(M, x, U); % Solve the quadratic program. % If the optimization toolbox is not available, consider replacing with % CVX. if ~exist('tol', 'var') || isempty(tol) tol = 1e-8; end opts = optimset('Display', 'off', 'TolFun', tol); [s_opt, cost_opt] ... = quadprog(G, zeros(N, 1), ... % objective (squared norm) [], [], ... % inequalities (none) ones(1, N), 1, ... % equality (sum to 1) zeros(N, 1), ... % lower bounds (s_i >= 0) ones(N, 1), ... % upper bounds (s_i <= 1) [], ... % we do not specify an initial guess opts); % Norm of the smallest tangent vector in the convex hull: u_norm = real(sqrt(2*cost_opt)); % Keep track of optimal coefficients coeffs = s_opt; % If required, construct the vector explicitly. if nargout >= 3 u = lincomb(M, x, U, coeffs); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/statsfunhelper.m ================================================ function statsfun = statsfunhelper(inp1, inp2) % Helper tool to create a statsfun for the options structure of solvers. % % function statsfun = statsfunhelper(name, fun) % function statsfun = statsfunhelper(S) % % Usage with (name, fun): % % Input 1: name is a string which is a valid field name (no spaces, starts % with a letter or an underscore, only alphanumeric characters and % underscores). % % Input2: fun is a function handle with one output and 1 to 4 inputs, as % follows (your choice): % % fun(x) or fun(problem, x) or % fun(problem, x, stats) or fun(problem, x, stats, store) % % where the inputs are the ones that would be given to options.statsfun, as % described in the help of the solver used. Typically, x is the point on % the manifold at the current iterate, problem is the Manopt problem % structure, stats is all the current statistics recorded for that iterate % and store is the cache structure at the current iterate. % % When calling a Manopt solver with the options structure, such as for % example with: % % [x, xcost, info] = steepestdescent(problem, [], options); % % you may set a field of the options structure as follows: % % options.statsfun = statsfunhelper('nameofthefield', fun); % % As a result, at each iteration, the stats structure will contain a field % stats.nameofthefield with the value returned by the call to fun at that % iterate. The stats structures are stored in the struct-array info. % As an example, if the value returned by fun is a scalar, then % [info.nameofthefield] is a vector containing all returned values. % % % Usage with S: % % The input S is a structure. For each field of S, say S.field, the stats % structure will be augmented with stats.field = fun(..), where fun is the % function handle stored in S.field, and with the same conventions as % above. This version allows to record more than one bit of information at % each iteration. Example: % % metrics.nameofthefield = fun; % metrics.othername = otherfun; % options.statsfun = statsfunhelper(metrics); % % The different function handles (here, fun and otherfun) can take 1 to 4 % inputs too, and they do not have to take the same number of inputs. % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Dec. 17, 2014. % Contributors: % Change log: if (nargin == 1) && isstruct(inp1) S = inp1; elseif (nargin == 2) S = struct(inp1, inp2); else error('statsfunhelper takes 1 or 2 inputs. If 1 input, it must be a structure.'); end function stats = thestatsfun(problem, x, stats, store) names = fieldnames(S); for it = 1 : length(names) name = names{it}; fun = S.(name); switch nargin(fun) case 1 stats.(name) = fun(x); case 2 stats.(name) = fun(problem, x); case 3 stats.(name) = fun(problem, x, stats); case 4 stats.(name) = fun(problem, x, stats, store); otherwise error('The functions passed to statsfunhelper must take 1 to 4 inputs.'); end end end statsfun = @thestatsfun; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/surfprofile.m ================================================ function costs = surfprofile(problem, x, d1, d2, t1, t2) % Plot the cost function as a surface over a 2-dimensional subspace. % % function surfprofile(problem, x, d1, d2, t1, t2) % function costs = surfprofile(problem, x, d1, d2, t1, t2) % % Evaluates the cost function at points % % gamma(t1, t2) = exponential_x(t1*d1 + t2*d2) % % where the exponential map at x is specified by problem.M.exp (retr is % used instead if needed). d1 and d2 are two tangent vectors to problem.M % at the point x. The values assigned to t1 and t2 are as specified in the % two input vectors t1 and t2. % % If the function is called with an output, the plot is not drawn and the % values of the cost are returned in a matrix of size % length(t1)*length(t2). To plot a surf, call surf(t1, t2, costs.') (notice % the transpose). % % If x is omitted, a point is generated at random. If d1 is omitted, a % random tangent vector at x is generated. If d2 is omitted, a random % tangent vector at x is generated, orthogonally to d1. If t1, t2 are % omitted, they are generated with linspace's in [-1, 1]. % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Sep. 1, 2014. % Contributors: % Change log: % % April 3, 2015 (NB): % Works with the new StoreDB class system. % % Nov. 12, 2016 (NB): % Most inputs are now optional. % Verify that the problem description is sufficient. if ~canGetCost(problem) error('It seems no cost was provided.'); end if ~exist('x', 'var') || isempty(x) x = problem.M.rand(); if (exist('d1', 'var') && ~isempty(d1)) || ... (exist('d2', 'var') && ~isempty(d2)) error('If x is omitted, d1, d2 should not be specified.'); end end if ~exist('d1', 'var') || isempty(d1) d1 = problem.M.randvec(x); end if ~exist('d2', 'var') || isempty(d2) d2 = problem.M.randvec(x); % Make it orthogonal to d1 coeff = problem.M.inner(x, d1, d2) / problem.M.inner(x, d1, d1); d2 = problem.M.lincomb(x, 1, d2, -coeff, d1); end if ~exist('t1', 'var') || isempty(t1) t1 = linspace(-1, 1, 51); end if ~exist('t2', 'var') || isempty(t2) t2 = linspace(-1, 1, 51); end if isfield(problem.M, 'exp') expo = problem.M.exp; str = 'Exp'; else expo = problem.M.retr; str = 'Retr'; end storedb = StoreDB(); linesearch_fun = @(ta, tb) getCost(problem, ... expo(x, problem.M.lincomb(x, ta, d1, tb, d2)), ... storedb); costs = zeros(length(t1), length(t2)); for i = 1 : length(t1) for j = 1 : length(t2) costs(i, j) = linesearch_fun(t1(i), t2(j)); end end if nargout == 0 surf(t1, t2, costs.'); xlabel('t1'); ylabel('t2'); zlabel(['f(' str '_x(t1*d1+t2*d2))']); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/tangent2vec.m ================================================ function vec = tangent2vec(M, x, basis, u) % Expands a tangent vector into an orthogonal basis in the Manopt framework % % vec = tangent2vec(M, x, basis, u) % % The inverse operation is lincomb (see below). % % M is a Manopt manifold structure obtained from a factory. % x is a point on the manifold M. % basis is a cell containing n orthonormal tangent vectors at x, forming an % orthonormal basis of the tangent space at x. % u is a tangent vector at x % % vec is a column vector of length n which contains the coefficients of the % expansion of u into the basis. Thus: % % vec(k) = _x <- vec = tangent2vec(M, x, basis, u) % % u = sum_{k=1}^n vec(k)*basis{k} <- u = lincomb(M, x, basis, vec) % % Note that tangent2vec is an isometry, that is, up to numerical round-off % errors, with u and v two tangent vectors at x: % % M.inner(x, u, v) == uu'*vv, % % where uu = tangent2vec(M, x, basis, u), vv = tangent2vec(M, x, basis, v). % % See also: lincomb tangentorthobasis orthogonalize grammatrix hessianmatrix % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, Feb. 3, 2017. % Contributors: % Change log: n = numel(basis); vec = zeros(n, 1); for k = 1 : n vec(k) = M.inner(x, basis{k}, u); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/tangentorthobasis.m ================================================ function orthobasis = tangentorthobasis(M, x, n) % Returns an orthonormal basis of tangent vectors in the Manopt framework. % % function orthobasis = tangentorthobasis(M, x) % function orthobasis = tangentorthobasis(M, x, n) % % M is a Manopt manifold structure obtained from a factory. % x is a point on the manifold M. % n (optional) is the dimension of the random subspace to span; by default, % n = M.dim() so that the returned basis spans the whole tangent space. % % orthobasis is a cell of n tangent vectors at x. % With high probability, they form an orthonormal basis of the tangent % space at x. If necessary, this can be checked by calling % G = grammatrix(M, x, orthobasis) % and verifying that norm(G - eye(size(G))) is close to zero. % % Note: if extra accuracy is required, it may help to re-orthogonalize the % basis returned by this function once, as follows: % B = tangentorthobasis(M, x, n); % B = orthogonalize(M, x, B); % % See also: grammatrix orthogonalize lincomb plotprofile % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, April 28, 2016. % Contributors: % Change log: dim = M.dim(); if ~exist('n', 'var') || isempty(n) n = dim; end assert(n >= 0 && n <= dim && n == round(n), ... 'n must be an integer between 0 and M.dim().'); basis = cell(n, 1); % With high probability, n vectors taken at random in the tangent space % are linearly independent. for k = 1 : n basis{k} = M.randvec(x); end % The Gram-Schmidt process transforms any n linearly independent % vectors into n orthonormal vectors spanning the same subspace. orthobasis = orthogonalize(M, x, basis); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/tangentspacefactory.m ================================================ function N = tangentspacefactory(M, x) % Returns a manifold structure representing the tangent space to M at x. % % N = tangentspacefactory(M, x) % % N defines a (linear) manifold that is the tangent space to M at x. Points % are represented as tangent vectors to M at x. Tangent vectors are also % represented as tangent vectors to M at x. % % This is chiefly useful to solve optimization problems involving tangent % vectors to M at x, which notably comes up when solving linear systems % involving, for example, the Hessian of the cost on M at x (think of the % Newton equations.) The Riemannian (actually, Euclidean) structure on N is % that of the tangent space to M, that is, the inner product is inherited. % % See also: preconhessiansolve % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, April 9, 2015. % Contributors: % Change log: % % Jan. 25, 2017 (NB): % Following a comment by Jesus Briales on the Manopt forum, the % functions N.egrad2rgrad, N.ehess2rhess and N.tangent now include a % projection (they were formerly identities.) % % Feb. 2, 2017 (NB): % Following a comment by Jesus Briales on the Manopt forum, the % function N.proj now calls M.proj(x, .) instead of M.proj(y, .). % Furthermore, N.ehess2rhess was corrected in the same way. % N is the manifold we build. y will be a point on N, thus also a % tangent vector to M at x. This is a typical Euclidean space, hence it % will be easy to describe in terms of the tools available for M. N = struct(); % u, u1 and u2 will be tangent vectors to N at y. The tangent space to % N at y is the tangent space to M at x, thus u, u1 and u2 are also % tangent vectors to M at x. N.dim = @() M.dim(); N.inner = @(y, u1, u2) M.inner(x, u1, u2); N.norm = @(y, u) M.norm(x, u); N.proj = @(y, u) M.proj(x, u); N.typicaldist = @() sqrt(N.dim()); N.tangent = N.proj; N.egrad2rgrad = N.proj; N.ehess2rhess = @(y, eg, eh, d) M.proj(x, eh); N.exp = @exponential; N.retr = @exponential; N.log = @(y1, y2) M.lincomb(x, 1, y2, -1, y1); N.pairmean = @(y1, y2) M.lincomb(x, 0.5, y1, 0.5, y2); N.rand = @() M.randvec(x); N.randvec = @(y) M.randvec(x); N.zerovec = M.zerovec; N.lincomb = M.lincomb; N.transp = @(y1, y2, u) u; N.hash = @(y) ['z' hashmd5(M.vec(x, y))]; % In a Euclidean space, the exponential is merely the sum: y + tu. function yy = exponential(y, u, t) if nargin == 2 t = 1; end yy = M.lincomb(x, 1, y, t, u); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/tangentspherefactory.m ================================================ function N = tangentspherefactory(M, x) % Returns a manifold struct. for the sphere on the tangent space to M at x. % % N = tangentspherefactory(M, x) % % N defines a manifold that is the unit sphere on the tangent space to M % at x. Points are represented as tangent vectors of unit norm. Tangent % vectors are represented as tangent vectors orthogonal to the root point, % with respect to the Riemannian metric on the tangent space. % % This is chiefly useful to solve optimization problems involving unit norm % tangent vectors to M at x, which notably comes up when looking for % extreme eigenvectors of the Hessian of a cost function on M at x, for % example. The Riemannian structure on this sphere is that of a Riemannian % submanifold of the (Euclidean) tangent space, equipped with the % Riemannian metric of M at that point. % % See also: hessianextreme % This file is part of Manopt: www.manopt.org. % Original author: Nicolas Boumal, March 16, 2015. % Contributors: % Change log: % % Nov 27, 2015 (NB): % Extra projection added in the retraction, to prevent numerical % drift. % N is the manifold we build. y will be a point on N, thus also a % tangent vector to M at x. This is a typical Riemannian submanifold of % a Euclidean space, hence it will be easy to describe in terms of the % tools available for M. N = struct(); % u, u1 and u2 will be tangent vectors to N at y. The tangent space to % N at y is a subspace of the tangent space to M at x, thus u, u1 and % u2 are also tangent vectors to M at x. N.dim = @() M.dim() - 1; N.inner = @(y, u1, u2) M.inner(x, u1, u2); N.norm = @(y, u) M.norm(x, u); N.proj = @(y, v) M.lincomb(x, 1, v, -M.inner(x, v, y), y); N.typicaldist = @() 1; N.tangent = N.proj; N.egrad2rgrad = N.proj; N.retr = @retraction; N.exp = N.retr; function yy = retraction(y, u, t) if nargin == 2 t = 1; end y_plus_tu = M.lincomb(x, 1, y, t, u); % This extra projection is not required mathematically, % but appears to be necessary numerically, sometimes. % The reason is that, as many retractions are operated, % there is a risk that the points generated would leave % the tangent space. If this proves to be a huge slow down, % one could consider adding a type of counter that only % executes this extra projection every so often, instead % of at every call. y_plus_tu = M.proj(x, y_plus_tu); nrm = M.norm(x, y_plus_tu); yy = M.lincomb(x, 1/nrm, y_plus_tu); end N.rand = @random; function y = random() y = M.randvec(x); nrm = M.norm(x, y); y = M.lincomb(x, 1/nrm, y); end N.randvec = @randvec; function u = randvec(y) u = N.proj(y, N.rand()); nrm = N.norm(y, u); u = M.lincomb(x, 1/nrm, u); end N.zerovec = M.zerovec; N.lincomb = M.lincomb; N.transp = @(y1, y2, u) N.proj(y2, u); N.hash = @(y) ['z' hashmd5(M.vec(x, y))]; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt_version.m ================================================ function [version, released] = manopt_version() % Returns the version of the Manopt package you are running, as a vector. % % function [version, released] = manopt_version() % % version(1) is the primary version number. % released is the date this version was released, in the same format as the % date() function in Matlab. version = [4, 0, 0]; released = '09-Sep-2017'; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/readme ================================================ test ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/readme ================================================ test ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR_Assessment.tex ================================================ \begin{tabular}{|l|c|c|c|} \hline &\textbf{Q2n}&\textbf{SAM}&\textbf{ERGAS}\\\hline \textbf{GT}&1.0000&0.0000&0.0000\\\hline \textbf{EXP}&0.6513&7.2118&8.1106\\\hline \textbf{BT-H}&0.9241&6.4530&3.9714\\\hline \textbf{BDSD-PC}&0.9327&6.8388&3.8905\\\hline \textbf{C-GSA}&0.9213&6.6967&4.0504\\\hline \textbf{SR-D}&0.9113&6.6269&4.3472\\\hline \textbf{MTF-GLP-HPM-R}&0.9228&7.0038&4.0692\\\hline \textbf{MTF-GLP-FS}&0.9228&6.7650&4.0434\\\hline \textbf{TV}&0.9277&6.6213&4.0630\\\hline \textbf{PanNet}&0.9238&6.9050&4.2365\\\hline \textbf{DRPNN}&0.9205&7.3887&4.2504\\\hline \textbf{MSDCNN}&0.9087&7.5139&4.4214\\\hline \textbf{BDPN}&0.9180&7.7148&4.4522\\\hline \textbf{DiCNN}&0.8567&8.0256&5.5124\\\hline \textbf{PNN}&0.8849&12.6019&6.7233\\\hline \textbf{APNN}&0.9132&7.6201&4.4536\\\hline \textbf{FusionNet}&0.8499&8.3823&6.0458\\\hline \end{tabular} ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/SR-D/CS.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % CSDetails is the Compressive Sensing (CS) approach for Pansharpening proposed in [Vicinanza15]. % % Interface: % I_Fus_CS = CSDetails(I_MS, I_PAN, I_MS_LR, resize_fact, sensor, TS, ol, n_atoms) % % Inputs: % I_MS: Multispectral (MS) original image upsampled to the PAN scale; % I_PAN: Panchromatic (PAN) image; % I_MS_LR: MS original image; % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value; % sensor: String for type of sensor (e.g. 'WV2', 'IKONOS'); % TS: Tiling (dimensions of the patches are TS x TS, e.g. 7 x 7); % ol: Overlap in pixels between contiguous tile; % n_atoms: max number of representation atoms (default value = 10). % % Output: % I_Fus_CS: Fusion image using the CS approach in [Vicinanza15]. % % References: % [Vicinanza15] M.R. Vicinanza, R. Restaino, G. Vivone, M. Dalla Mura, and J. Chanussot, "A pansharpening method based on the sparse representation of injected details", % IEEE Geoscience and Remote Sensing Letters, vol. 12, no. 1, pp. 180-184, 2015. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. % % % % % % % % % % % % % % % Version: 1 % % % % % % % % % % % % % % % % Copyright (C) 2019 % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function I_Fus_CS = CS(I_MS, I_PAN, I_MS_LR, ratio, sensor, TS, ol, n_atoms) if nargin < 9 n_atoms = 10; end imageLR = double(I_MS); imageHR = double(I_PAN); imageLR_LR = double(I_MS_LR); %%% Equalization imageHR = repmat (imageHR, [1 1 size(I_MS,3)]); for ii = 1 : size(imageLR_LR,3) % imageHR(:,:,ii) = equalize_image (imageHR(:,:,ii), imageLR(:,:,ii)); imageHR(:,:,ii) = (imageHR(:,:,ii) - mean2(imageHR(:,:,ii))) / std2(imageHR(:,:,ii))... * std2(imageLR(:,:,ii)) + mean2(imageLR(:,:,ii)); end %%% Extract details using MTF-based filters imageLR_LP = MTF(imageLR, sensor, ratio); imageLR_D = imageLR - imageLR_LP; imageHR_LP = MTF(imageHR, sensor, ratio); for ii = 1:size(imageHR,3) imageHR_LP(:,:,ii) = imresize(imresize(imageHR_LP(:,:,ii), 1/ratio, 'nearest'), ratio); end imageHR_D = imageHR - imageHR_LP; %%% Decimation MS for ii = 1 : size(imageLR,3) imageLR_LR(:,:,ii) = double(imresize(imageLR_D(:,:,ii),1/ratio, 'nearest')); end %%% Degradation PAN imageHR_LR = resize_images(imageHR_D, 1, ratio, sensor); %%% Dictionary learning [Dh, Dl, ytilde_k] = Dict_Learn(imageHR_D, imageHR_LR, imageLR_LR, ratio, TS, ol); %%% Sparse coefficient estimation and HR signal reconstruction I_Fus_CS = OMP_Rec_Detile(Dl, Dh, ytilde_k, size(imageHR,1), size(imageHR,2), size(imageLR_LR, 3), ratio, ol , TS, n_atoms); I_Fus_CS = imageLR + I_Fus_CS; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/SR-D/Dict_Learn.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Dict_Learn is the dictionary learning method for the % compressive sensing approach for Pansharpening proposed in [Vicinanza15]. % % INPUTS % I_PAN_D: Details of the panchromatic image; % I_PAN_LR_D: Details of the low resolution panchromatic image; % I_MS_LR_D: Details of the MS original image or the MS original image (depending on the flag "do_detail" in CSDetails); % resize_fact: Resize factor (ratio between PAN and MS images); % TS: Tiling (dimensions of the patches are TS x TS, e.g. 7 x 7); % ol: Overlap in pixels between contiguous tiles. % % OUTPUTS % Dh: High spatial resolution dictionary (PAN details) built as in [Vicinanza15]; % Dl: Low spatial resolution dictionary (Low resolution PAN details) built as in [Vicinanza15]; % ytilde_k: Patches in column form of the details of the MS original image or the MS original image (depending on the flag "do_detail" in CSDetails). % % REFERENCE % [Vicinanza15] M.R. Vicinanza, et al. "A pansharpening method based on the sparse representation of injected details." % IEEE Geoscience and Remote Sensing Letters 12.1 (2015): 180-184. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function [Dh, Dl, ytilde_k] = Dict_Learn(I_PAN_D, I_PAN_LR_D, I_MS_LR_D, resize_fact, TS, ol) nr = ceil ((size(I_PAN_D,1)/resize_fact - ol) / (TS - ol)); nc = ceil ((size(I_PAN_D,2)/resize_fact - ol) / (TS - ol)); nBands = size (I_MS_LR_D,3); Dh = zeros (TS^2*resize_fact^2*nBands, nr*nc); Dl = zeros (TS^2*nBands, nr*nc); ytilde_k = zeros (TS^2*nBands, nr*nc); % Building the dictionaries (Dh and Dl) icount = 0; for irow=1:nr for icol=1:nc icount = icount + 1; shiftr = 0; shiftc = 0; if irow == nr && mod(size(I_MS_LR_D,1)-ol, TS-ol) ~= 0 shiftr = TS-ol - mod (size(I_MS_LR_D,1)-ol, TS-ol); end if icol == nc && mod(size(I_MS_LR_D,2)-ol, TS-ol) ~= 0 shiftc = TS-ol - mod (size(I_MS_LR_D,2)-ol, TS-ol); end blockr = ((irow-1)*(TS-ol)*resize_fact+1 - shiftr*resize_fact) : ((irow*TS-(irow-1)*ol)*resize_fact - shiftr*resize_fact); blockc = ((icol-1)*(TS-ol)*resize_fact+1 - shiftc*resize_fact) : ((icol*TS-(icol-1)*ol)*resize_fact - shiftc*resize_fact); blockrl = ((irow-1)*(TS-ol)+1 - shiftr) : (irow*TS-(irow-1)*ol - shiftr); blockcl = ((icol-1)*(TS-ol)+1 - shiftc) : (icol*TS-(icol-1)*ol - shiftc); for iband = 1:nBands colmn = I_PAN_D(blockr,blockc,iband); colmnlr = I_PAN_LR_D(blockrl,blockcl,iband); colmny = I_MS_LR_D(blockrl,blockcl,iband); Dh((iband-1)*TS^2*resize_fact^2+1:(iband-1)*TS^2*resize_fact^2+length(colmn(:)),icount) = (colmn(:)); Dl((iband-1)*TS^2+1:(iband-1)*TS^2+length(colmnlr(:)),icount) = (colmnlr(:)); ytilde_k((iband-1)*TS^2+1:(iband-1)*TS^2+length(colmny(:)),icount) = (colmny(:)); end end end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/SR-D/OMP.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % OMP is the Orthogonal matching Pursuit (OMP) modified to work with multispectral data. % % INPUTS % D: Dictionary (matrix); % y: Measurements (column vector); % delta: Maximum error allowed for the constraint y = D a; % nBands: Number of MS spectral bands; % iatom: Id of the actual atom under analysis. % n_atoms: max number of representation atoms % % OUTPUTS % a: Estimated alphas; % indx: Vector of the atom positions in the dictionary. % % REFERENCE % [Vicinanza15] M.R. Vicinanza, et al. "A pansharpening method based on the sparse representation of injected details." % IEEE Geoscience and Remote Sensing Letters 12.1 (2015): 180-184. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function [a, indx] = OMP(D, y, nBands, iatom, n_atoms) L_atom = size(D); n = round(L_atom / nBands); delta = 0; res = y; curr_delta = sum (res.^2); j = 0; while curr_delta > delta && j < n_atoms j = j+1; if j==1 indx = iatom; else proj = D' * res; [~, imax] = max(abs(proj)); imax = imax(1); indx = cat(2,indx,imax); end a = zeros (j, nBands); for iband = 1:nBands Di = D((iband-1)*n+1:iband*n,indx(1:j)); yi = y((iband-1)*n+1:iband*n); DitDi = Di'*Di; if det (DitDi) > 1e-1 a(:,iband) = ((DitDi)\(Di')) * yi; end Da((iband-1)*n+1:iband*n) = Di * a(:,iband); end res = y - Da'; curr_delta = sum(res.^2); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/SR-D/OMP_Rec_Detile.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % OMP_Rec_Detile performs: % 1) The estimation of the coefficients \alpha at reduced resolution using an orthogonal matching pursuit (OMP) procedure for multispectral images; % 2) The reconstruction of the patches at full resolution using the hypothesis of invariance among scales of the \alpha coefficients; % 3) The detiling step to get the final image details at full resolution for the approach proposed in [Vicinanza15]. % % INPUTS % Dl: Low spatial resolution dictionary (Low resolution PAN details) built as in [Vicinanza15]; % Dh: High spatial resolution dictionary (PAN details) built as in [Vicinanza15]; % ytilde_k: Patches in column form of the details of the MS original image or the MS original image (depending on the flag "do_detail" in CSDetails); % H_PAN,L_PAN,C_PAN: PAN (row and column) dimensions and number of MS spectral bands; % resize_fact: Resize factor (ratio between PAN and MS images); % TS: Tiling (dimensions of the patches are TS x TS, e.g. 7 x 7); % ol: Overlap in pixels between contiguous tiles. % n_atoms: max number of representation atoms % % OUTPUT % I_Fus_CS: Reconstructed details (or fused image if do_detail flag is 0) using the CS approach in [Vicinanza15] for the final pansharpening product. % % REFERENCE % [Vicinanza15] M.R. Vicinanza, et al. "A pansharpening method based on the sparse representation of injected details." % IEEE Geoscience and Remote Sensing Letters 12.1 (2015): 180-184. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function I_Fus_CS = OMP_Rec_Detile(Dl, Dh, ytilde_k, H_PAN, L_PAN, C_MS, resize_fact, ol, TS, n_atoms) I_Fus_CS = zeros ([H_PAN L_PAN C_MS]); countpx = zeros ([H_PAN L_PAN C_MS]); nr = ceil ((H_PAN/resize_fact - ol) / (TS - ol)); nc = ceil ((L_PAN/resize_fact - ol) / (TS - ol)); shiftr_glob = 0; shiftc_glob = 0; if mod(H_PAN/resize_fact-ol, TS-ol) ~= 0 shiftr_glob = TS-ol - mod (H_PAN/resize_fact-ol, TS-ol); end if mod(L_PAN/resize_fact-ol, TS-ol) ~= 0 shiftc_glob = TS-ol - mod (L_PAN/resize_fact-ol, TS-ol); end alpha_count = 0; Latom = size (Dl, 2); Dict_Size = size (ytilde_k, 2); iatom = 0; for irow=1:nr for icol=1:nc iatom = iatom+1; if irow == nr shiftr = shiftr_glob; else shiftr = 0; end if icol == nc shiftc = shiftc_glob; else shiftc = 0; end blockr = ((irow-1)*(TS-ol)*resize_fact+1 - shiftr*resize_fact) : ((irow*TS-(irow-1)*ol)*resize_fact - shiftr*resize_fact); blockc = ((icol-1)*(TS-ol)*resize_fact+1 - shiftc*resize_fact) : ((icol*TS-(icol-1)*ol)*resize_fact - shiftc*resize_fact); Lr = length (blockr); Lc = length (blockc); y_cur = ytilde_k(:,iatom); % Sparse coding with OMP for MS data [alpha,inds] = OMP(Dl, y_cur, C_MS, iatom, n_atoms); % Patch reconstruction and detiling for iband = 1:C_MS reconstr_patch = Dh((iband-1)*TS^2*resize_fact^2+1:iband*TS^2*resize_fact^2,inds) * alpha(:,iband); I_Fus_CS(blockr,blockc,iband) = I_Fus_CS(blockr,blockc,iband) + reshape (reconstr_patch, Lr, Lc); countpx(blockr,blockc,iband) = countpx(blockr,blockc,iband) +1; end if mod(iatom,100)==1 fprintf ('OMP band by band and detile: atom %i of %i\n', iatom, Dict_Size); end alpha_count = alpha_count + sum( sum(alpha,2)~=0 ); end end % Average overlapping patches I_Fus_CS = I_Fus_CS ./ countpx; fprintf ('Sparsity di alfa = %.2f: %.1f atoms on %i used for each patch on average\n', (Dict_Size*Latom-alpha_count)/Dict_Size/Latom*100, alpha_count/Dict_Size, Dict_Size) end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/TV/TV_pansharpen.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % This function minimizes % J(x) = || y - M*x ||^2 + lambda*TV(x) % where % y = [yms^T, ypan^T]^T % x is the pansharpened ms image % M models the relationship between % y and x; see [Palsson07] for details % % Interface: % x = TV_pansharpen(yms,ypan,alpha,lambda,c,maxiter,w) % % Inputs: % yms: The observed MS image; % ypan: The PAN image; % alpha: convergence parameter 1, suggested value=0.75; % c: convergence parameter 2, suggested value=8; % maxiter: number of iterations; % w: We assume the pan image to be a linear % combination of the pansharpened ms image, % w contains the weights. % Output: % x: Pansharpened image. % % Reference: % [Palsson14] F. Palsson, J.R. Sveinsson, and M.O. Ulfarsson, A New Pansharpening Algorithm Based on Total Variation % IEEE Geoscience and Remote Sensing Letters, vol. 11, no. 1, pp. 318 - 322, 2014. % [Vivone20] G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", % IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function x = TV_pansharpen(yms,ypan,alpha,lambda,c,maxiter,w) z=zeros([size(ypan) size(yms,3)*2]); x=zeros([size(ypan) size(yms,3)]); for k=1:maxiter b=computeb(yms,ypan,x,alpha,w); z=znext(z,x,b,alpha,lambda,c); x=xnext(z,b,alpha); end end function b=computeb(yms,ypan,xk,alpha,w) [Hxms, Hxpan]=computeH(xk,w); b=alpha*xk+adjointH(yms-Hxms,ypan-Hxpan,w); end function [yms, ypan]=computeH(x,w) ypan=zeros([size(x,1) size(x,2)]); for i=1:size(x,3) yms(:,:,i)=decimate(x(:,:,i)); ypan=ypan+w(i)*x(:,:,i); end end function y=decimate(x) % y = imfilter(x,fspecial('Gaussian',9,sigma),'replicate'); % y = imfilter(y,fspecial('average',4),'replicate'); % y = y(1:4:end,1:4:end); % h=0.25*[1 1 1 1]; % x=imfilter(x,h'*h,'symmetric','same'); % y=downsample(downsample(x,4,1)',4,1)'; y=imresize(x,0.25,'bilinear'); % y=MTF_downsample(x,'QB','none',4,1); % y=imresize(imresize(x,1/4,'bicubic'),4,'bicubic'); end function x=adjointH(yms,ypan,w) for i=1:size(yms,3) x(:,:,i)=interpolate(yms(:,:,i))+w(i)*ypan; end end function y=interpolate(x) % y = upsample(upsample(x,4)',4)'; y=imresize(x,4,'bilinear'); % y = imfilter(y,fspecial('Gaussian',9,sigma),'replicate'); % y = imfilter(y,fspecial('average',4),'replicate'); % y=imresize(x,4,'bicubic'); % y=MTF_upsample(x,'IKONOS','none',4,1); % y=interp23tap(x,4); end function z1=znext(z0,x0,b,alpha,lambda,c) for i=1:size(x0,3) W(:,:,i)= 2* alpha/lambda * sqrt(Dx(x0(:,:,i)).^2+Dy(x0(:,:,i)).^2)+c; W(:,:,i+size(x0,3))=2 * alpha/lambda * sqrt(Dx(x0(:,:,i)).^2+Dy(x0(:,:,i)).^2)+c; end z1=(computeDb(b)+cIDDTz(z0,c))./W; end function DX = Dx(v) DX=[diff(v,1,2) zeros(size(v,1),1)]; end function DY = Dy(v) DY=[diff(v); zeros(1,size(v,2))]; end function Db=computeDb(b) for i=1:size(b,3) Db(:,:,i)=Dx(b(:,:,i)); end for i=size(b,3)+1:2*size(b,3) Db(:,:,i)=Dy(b(:,:,i-size(b,3))); end end function ddtz=cIDDTz(z,c) for i=1:size(z,3)/2 dtz(:,:,i)=DxT(z(:,:,i))+DyT(z(:,:,i+4)); end ddtz=computeDb(dtz); cIddtz=c*z-ddtz; end function DXT=DxT(v) DXT=DyT(v')'; end function DYT = DyT(v) u0 = -v(1,:); u1 = -diff(v); u2 = v(end-1,:); DYT = [u0; u1(1:(end-1),:); u2]; return end function x1=xnext(z1,b,alpha) x1=(b-DTz(z1))./alpha; end function dtz=DTz(z) for i=1:size(z,3)/2 dtz(:,:,i)=DxT(z(:,:,i))+DyT(z(:,:,i+4)); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/LPfilter.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % LPfilter filters the panchromatic (PAN) image using trous wavelet transform. % % Interface: % HRPanLP = LPfilter(HRPan,ratio) % % Inputs: % HRPan: PAN image; % ratio: Scale ratio between MS and PAN. % % Outputs: % HRPanLP: Output filtered MS image. % % References: % [Vivone15] G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, % IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function HRPanLP = LPfilter(HRPan,ratio) h=[1 4 6 4 1 ]/16; g=[0 0 1 0 0 ]-h; htilde=[ 1 4 6 4 1]/16; gtilde=[ 0 0 1 0 0 ]+htilde; h=sqrt(2)*h; g=sqrt(2)*g; htilde=sqrt(2)*htilde; gtilde=sqrt(2)*gtilde; WF={h,g,htilde,gtilde}; Levels = ceil(log2(ratio)); WT = ndwt2_working(HRPan,Levels,WF); for ii = 2 : numel(WT.dec), WT.dec{ii} = zeros(size(WT.dec{ii})); end HRPanLP = indwt2_working(WT,'c'); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/LPfilterGauss.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % LPfilterGauss filters the panchromatic (PAN) image using a Gaussin filter with gain at Nyquist frequency 0.3. % % Interface: % I_PAN_LR = LPfilterGauss(I_PAN,ratio) % % Inputs: % I_PAN: PAN image; % ratio: Scale ratio between MS and PAN. % % Outputs: % I_PAN_LR: Output filtered MS image. % % References: % [Aiazzi02] B. Aiazzi, L. Alparone, S. Baronti, and A. Garzelli, Context-driven fusion of high spatial and spectral resolution images based on % oversampled multiresolution analysis, IEEE Transactions on Geoscience and Remote Sensing, vol. 40, no. 10, pp. 23002312, October % 2002. % [Aiazzi06] B. Aiazzi, L. Alparone, S. Baronti, A. Garzelli, and M. Selva, MTF-tailored multiscale fusion of high-resolution MS and Pan imagery, % Photogrammetric Engineering and Remote Sensing, vol. 72, no. 5, pp. 591596, May 2006. % [Vivone14a] G. Vivone, R. Restaino, M. Dalla Mura, G. Licciardi, and J. Chanussot, Contrast and error-based fusion schemes for multispectral % image pansharpening, IEEE Geoscience and Remote Sensing Letters, vol. 11, no. 5, pp. 930934, May 2014. % [Vivone15] G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, % IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function I_PAN_LR = LPfilterGauss(I_PAN,ratio) GNyq = 0.3; N = 41; fcut = 1/ratio; alpha = sqrt((N*(fcut/2))^2/(-2*log(GNyq))); H = fspecial('gaussian', N, alpha); Hd = H./max(H(:)); h = fwind1(Hd,kaiser(N)); I_PAN_LR = imfilter(I_PAN,real(h),'replicate'); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/LPfilterPlusDec.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % LPfilterPlusDec filters and decimates the image I_PAN using a Starck and Murtagh (S&M) filter. % % Interface: % I_PAN_LR = LPfilterPlusDec(I_PAN,ratio) % % Inputs: % I_PAN: Image to be filtered and decimated; % ratio: Scale ratio between MS and PAN. Pre-condition: Resize factors power of 2. % % Outputs: % I_PAN_LR: Filtered and decimated image. % % References: % [Starck07] J.-L. Starck, J. Fadili, and F. Murtagh, The undecimated wavelet decomposition and its reconstruction, IEEE Transactions on Image % Processing, vol. 16, no. 2, pp. 297309, February 2007. % [Vivone15] G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, % IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function I_PAN_LR = LPfilterPlusDec(I_PAN,ratio) h=[1 4 6 4 1 ]/16; g=[0 0 1 0 0 ]-h; htilde=[ 1 4 6 4 1]/16; gtilde=[ 0 0 1 0 0 ]+htilde; h=sqrt(2)*h; g=sqrt(2)*g; htilde=sqrt(2)*htilde; gtilde=sqrt(2)*gtilde; WF={h,g,htilde,gtilde}; Levels = ceil(log2(ratio)); WT = ndwt2_working(I_PAN,Levels,WF); for ii = 2 : numel(WT.dec), WT.dec{ii} = zeros(size(WT.dec{ii})); end I_PAN_LR = indwt2_working(WT,'c'); I_PAN_LR = imresize(I_PAN_LR,1/ratio,'nearest'); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/MTF.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % MTF filters the image I_MS using a Gaussin filter matched with the Modulation Transfer Function (MTF) of the MultiSpectral (MS) sensor. % % Interface: % I_Filtered = MTF(I_MS,sensor,ratio) % % Inputs: % I_MS: MS image; % sensor: String for type of sensor (e.g. 'WV2', 'IKONOS'); % ratio: Scale ratio between MS and PAN. % % Outputs: % I_Filtered: Output filtered MS image. % % References: % [Aiazzi02] B. Aiazzi, L. Alparone, S. Baronti, and A. Garzelli, Context-driven fusion of high spatial and spectral resolution images based on % oversampled multiresolution analysis, IEEE Transactions on Geoscience and Remote Sensing, vol. 40, no. 10, pp. 23002312, October % 2002. % [Aiazzi06] B. Aiazzi, L. Alparone, S. Baronti, A. Garzelli, and M. Selva, MTF-tailored multiscale fusion of high-resolution MS and Pan imagery, % Photogrammetric Engineering and Remote Sensing, vol. 72, no. 5, pp. 591596, May 2006. % [Vivone14a] G. Vivone, R. Restaino, M. Dalla Mura, G. Licciardi, and J. Chanussot, Contrast and error-based fusion schemes for multispectral % image pansharpening, IEEE Geoscience and Remote Sensing Letters, vol. 11, no. 5, pp. 930934, May 2014. % [Vivone15] G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, % IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function I_Filtered = MTF(I_MS,sensor,ratio) h = genMTF(ratio, sensor, size(I_MS,3)); I_MS_LP = zeros(size(I_MS)); for ii = 1 : size(I_MS,3) I_MS_LP(:,:,ii) = imfilter(I_MS(:,:,ii),real(h(:,:,ii)),'replicate'); end I_Filtered = double(I_MS_LP); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/MTF_PAN.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % MTF filters the panchromatic (PAN) image using a Gaussin filter matched with the Modulation Transfer Function (MTF) of the PAN sensor. % % Interface: % I_Filtered = MTF_PAN(I_PAN,sensor,ratio) % % Inputs: % I_PAN: PAN image; % sensor: String for type of sensor (e.g. 'WV2', 'IKONOS'); % ratio: Scale ratio between MS and PAN. % % Outputs: % I_Filtered: Output filtered PAN image. % % References: % [Aiazzi02] B. Aiazzi, L. Alparone, S. Baronti, and A. Garzelli, Context-driven fusion of high spatial and spectral resolution images based on % oversampled multiresolution analysis, IEEE Transactions on Geoscience and Remote Sensing, vol. 40, no. 10, pp. 23002312, October % 2002. % [Aiazzi06] B. Aiazzi, L. Alparone, S. Baronti, A. Garzelli, and M. Selva, MTF-tailored multiscale fusion of high-resolution MS and Pan imagery, % Photogrammetric Engineering and Remote Sensing, vol. 72, no. 5, pp. 591596, May 2006. % [Vivone14a] G. Vivone, R. Restaino, M. Dalla Mura, G. Licciardi, and J. Chanussot, Contrast and error-based fusion schemes for multispectral % image pansharpening, IEEE Geoscience and Remote Sensing Letters, vol. 11, no. 5, pp. 930934, May 2014. % [Vivone15] G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, % IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function I_Filtered = MTF_PAN(I_PAN,sensor,ratio) switch sensor case 'QB' GNyq = 0.15; case 'IKONOS' GNyq = 0.17; case {'GeoEye1','WV4'} GNyq = 0.16; case 'WV2' GNyq = 0.11; case 'WV3' GNyq = 0.14; case 'none' GNyq = 0.15; end N = 41; fcut = 1/ratio; alpha = sqrt(((N-1)*(fcut/2))^2/(-2*log(GNyq))); H = fspecial('gaussian', N, alpha); Hd = H./max(H(:)); h = fwind1(Hd,kaiser(N)); I_PAN_LP = imfilter(I_PAN,real(h),'replicate'); I_Filtered= double(I_PAN_LP); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/estimation_alpha.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Estimation coefficients linear regression model. % % Interface: % alpha = estimation_alpha(I_MS,I_PAN,type_estimation) % % Inputs: % I_MS: MS image upsampled at PAN scale; % I_PAN: PAN image; % type_estimation: Type of estimation (i.e. local or global). % % Outputs: % alpha: Coefficients estimated by the linear regression model. % % References: % [Vivone14] G. Vivone, R. Restaino, M. Dalla Mura, G. Licciardi, and J. Chanussot, Contrast and error-based fusion schemes for multispectral % image pansharpening, IEEE Geoscience and Remote Sensing Letters, vol. 11, no. 5, pp. 930934, May 2014. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function alpha = estimation_alpha(I_MS,I_PAN,type_estimation) if strcmp(type_estimation,'global') %%%%%%%% Global estimation IHc = reshape(I_PAN,[numel(I_PAN) 1]); ILRc = reshape(I_MS,[size(I_MS,1)*size(I_MS,2) size(I_MS,3)]); alpha = ILRc\IHc; else %%%%%%%% Local estimation block_win = 32; alphas = zeros(size(I_MS,3),1); cont_bl = 0; for ii = 1 : block_win : size(I_MS,1) for jj = 1 : block_win : size(I_MS,2) imHRbl = I_PAN(ii : min(size(I_MS,1),ii + block_win - 1), jj : min(size(I_MS,2),jj + block_win - 1)); imageLRbl = I_MS(ii : min(size(I_MS,1),ii + block_win - 1), jj : min(size(I_MS,2),jj + block_win - 1),:); imageHRc = reshape(imHRbl,[numel(imHRbl) 1]); ILRc = reshape(imageLRbl,[size(imageLRbl,1).*size(imageLRbl,2) size(imageLRbl,3)]); alphah = ILRc\imageHRc; alphas = alphas + alphah; cont_bl = cont_bl + 1; end end alpha = alphas/cont_bl; end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/genMTF.m ================================================ % Description: % Generate a bank of filters shaped on the MTF of the sensor. Each filter % corresponds to a band acquired by the sensor. % % Interface: % h = genMTF(ratio, sensor, nbands) % % Inputs: % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value. % sensor: String for type of sensor (e.g. 'WV2','IKONOS'); % nbands: Number of spectral bands. % % Outputs: % h: Gaussian filter mimicking the MTF of the MS sensor % % References: % [Aiazzi02] B. Aiazzi, L. Alparone, S. Baronti, and A. Garzelli, Context-driven fusion of high spatial and spectral resolution images based on % oversampled multiresolution analysis,? IEEE Transactions on Geoscience and Remote Sensing, vol. 40, no. 10, pp. 2300?2312, October % 2002. % [Aiazzi06] B. Aiazzi, L. Alparone, S. Baronti, A. Garzelli, and M. Selva, MTF-tailored multiscale fusion of high-resolution MS and Pan imagery,? % Photogrammetric Engineering and Remote Sensing, vol. 72, no. 5, pp. 591?596, May 2006. % [Vivone14a] G. Vivone, R. Restaino, M. Dalla Mura, G. Licciardi, and J. Chanussot, Contrast and error-based fusion schemes for multispectral % image pansharpening,? IEEE Geoscience and Remote Sensing Letters, vol. 11, no. 5, pp. 930?934, May 2014. % [Vivone15] G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms?, % IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 2565?2586, May 2015. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function h = genMTF(ratio, sensor, nbands) switch sensor case 'QB' GNyq = [0.34 0.32 0.30 0.22]; % Band Order: B,G,R,NIR case 'IKONOS' GNyq = [0.26,0.28,0.29,0.28]; % Band Order: B,G,R,NIR case {'GeoEye1','WV4'} GNyq = [0.23,0.23,0.23,0.23]; % Band Order: B,G,R,NIR case 'WV2' GNyq = [0.35 .* ones(1,7), 0.27]; case 'WV3' GNyq = [0.325 0.355 0.360 0.350 0.365 0.360 0.335 0.315]; otherwise GNyq = 0.3 .* ones(1, nbands); end %%% MTF N = 41; nBands = length(GNyq); h = zeros(N, N, nBands); fcut = 1/ratio; for ii = 1 : nBands alpha = sqrt(((N-1)*(fcut/2))^2/(-2*log(GNyq(ii)))); H = fspecial('gaussian', N, alpha); Hd = H./max(H(:)); h(:,:,ii) = fwind1(Hd,kaiser(N)); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/gen_LP_image.m ================================================ % Description: % gen_LP_image generates the Low Resolution version of the PAN image required for the calculation of the % segmentation-based version of the Gram-Schmidt algorithm, based on the segmentation S. % % Interface: % I_LR_input = gen_LP_image(Local_algorithm,I_MS,I_PAN,I_MS_LR,ratio,sensor,S) % % Inputs: % PS_algorithm: Employed segmentation-based algorithm % ('GSA','GS2GLP') % I_MS: MS image upsampled at PAN scale % I_PAN: PAN image % I_MS_LR: MS image % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value. % sensor: String for type of sensor (e.g. 'WV2','IKONOS'); % % Outputs: % I_LR_input: Low Resolution version of the PAN image % % References: % % [Restaino17] R. Restaino, M. Dalla Mura, G. Vivone, J. Chanussot, Context-Adaptive Pansharpening Based on Image Segmentation, % IEEE Transactions on Geoscience and Remote Sensing, vol. 55, no. 2, pp. 753766, February 2017. % [Vivone15] G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, % IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function I_LR_input = gen_LP_image(PS_algorithm,I_MS,I_PAN,I_MS_LR,ratio,sensor) switch PS_algorithm case 'GSA' %%%%%%%%% Generation of LR PAN image PAN_LP = LPfilterGauss(I_PAN,ratio); %%%%%%%%%% Estimation of weights PAN_LP2 = imresize(PAN_LP,1/ratio,'nearest'); alpha= estimation_alpha(cat(3,I_MS_LR,ones(size(I_MS_LR,1),size(I_MS_LR,2))),PAN_LP2,'global'); [Height,Width,Bands] = size(I_MS); I_MS_col = reshape(double(I_MS), Height*Width, Bands); alpha = repmat(alpha', [size(I_MS_col,1),1]); I_LR_col = sum([I_MS_col, ones(size(I_MS_col,1),1)] .* alpha, 2); I_LR_input = reshape(I_LR_col, Height, Width); case 'GS2GLP' h = genMTF(ratio, sensor, size(I_MS,3)); for ii=1:size(h, 3) PAN_LP(:,:,ii) = imfilter(I_PAN,real(h(:,:,ii)),'replicate'); end PAN_LP2 = imresize(PAN_LP,1/ratio,'nearest'); I_LR_input = interp23tap(PAN_LP2,ratio); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/indexes_evaluation.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Reduced resolution quality indexes. % % Interface: % [Q_index, SAM_index, ERGAS_index, sCC, Q2n_index] = indexes_evaluation(I_F,I_GT,ratio,L,Q_blocks_size,flag_cut_bounds,dim_cut,th_values) % % Inputs: % I_F: Fused Image; % I_GT: Ground-Truth image; % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value; % L: Image radiometric resolution; % Q_blocks_size: Block size of the Q-index locally applied; % flag_cut_bounds: Cut the boundaries of the viewed Panchromatic image; % dim_cut: Define the dimension of the boundary cut; % th_values: Flag. If th_values == 1, apply an hard threshold to the dynamic range. % % Outputs: % Q_index: Q index; % SAM_index: Spectral Angle Mapper (SAM) index; % ERGAS_index: Erreur Relative Globale Adimensionnelle de Synthse (ERGAS) index; % sCC: spatial Correlation Coefficient between fused and ground-truth images; % Q2n_index: Q2n index. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function [Q_index, SAM_index, ERGAS_index, sCC, Q2n_index] = indexes_evaluation(I_F,I_GT,ratio,L,Q_blocks_size,flag_cut_bounds,dim_cut,th_values) if flag_cut_bounds I_GT = I_GT(dim_cut:end-dim_cut,dim_cut:end-dim_cut,:); I_F = I_F(dim_cut:end-dim_cut,dim_cut:end-dim_cut,:); end if th_values I_F(I_F > 2^L) = 2^L; I_F(I_F < 0) = 0; end cd Quality_Indices Q2n_index = q2n(I_GT,I_F,Q_blocks_size,Q_blocks_size); Q_index = Q(I_GT,I_F,2^L); SAM_index = SAM(I_GT,I_F); ERGAS_index = ERGAS(I_GT,I_F,ratio); sCC = SCC(I_F,I_GT); cd .. end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/indexes_evaluation_FS.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Full resolution quality indexes. % % Interface: % [D_lambda,D_S,QNR_index,SAM_index,sCC] = indexes_evaluation_FS(I_F,I_MS_LR,I_PAN,L,th_values,I_MS,sensor,tag,ratio) % % Inputs: % I_F: Fused image; % I_MS_LR: MS image; % I_PAN: Panchromatic image; % L: Image radiometric resolution; % th_values: Flag. If th_values == 1, apply an hard threshold to the dynamic range; % I_MS: MS image upsampled to the PAN size; % sensor: String for type of sensor (e.g. 'WV2','IKONOS'); % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value; % flagQNR: if flagQNR == 1, the software uses the QNR otherwise the HQNR is used. % % Outputs: % D_lambda: D_lambda index; % D_S: D_S index; % QNR_index: QNR index; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function [D_lambda,D_S,QNR_index] = indexes_evaluation_FS(I_F,I_MS_LR,I_PAN,L,th_values,I_MS,sensor,ratio,flagQNR) if th_values I_F(I_F > 2^L) = 2^L; I_F(I_F < 0) = 0; end cd Quality_Indices if flagQNR == 1 [QNR_index,D_lambda,D_S]= QNR(I_F,I_MS,I_MS_LR,I_PAN,ratio); else [QNR_index,D_lambda,D_S] = HQNR(I_F,I_MS_LR,I_MS,I_PAN,32,sensor,ratio); end cd .. end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/indwt2_working.m ================================================ function X = indwt2_working(W,varargin) %INDWT2 Inverse nondecimated 2-D wavelet transform. % INDWT2 will be removed in a future release of MATLAB. Use the % following function instead: % iswt2 % Error in R2015a % error(message('Wavelet:warnobsolete:ErrorReplaceINDWT2')); nbIN = nargin-1; idxCFS = -1; cfsFLAG = false; if nbIN>0 nbCELL = numel(W.dec); type = varargin{1}; if ~ischar(type) error(message('Wavelet:FunctionArgVal:Invalid_ArgTyp')) end type = upper(type); cfsFLAG = isequal(upper(type(1)),'C'); if cfsFLAG , type = type(2:end); end switch type case {'D','H'} , idxCFS = 0; case {'AA','LL','A','L'} , idxCFS = 1; case {'AD','LH'} , idxCFS = 2; case {'DA','HL'} , idxCFS = 3; case {'DD','HH'} , idxCFS = 4; end if nbIN>1 , levREC = varargin{2}; else levREC = W.level; end if idxCFS>1 idxCFS = idxCFS + 3*(W.level-levREC); if ~cfsFLAG for j=1:nbCELL if ~isequal(j,idxCFS); W.dec{j} = zeros(size(W.dec{j})); end end else X = W.dec{idxCFS}; % Coefficients return end elseif idxCFS==1 % Approximations (AA or LL) if cfsFLAG && levREC==W.level X = W.dec{1}; return; % Coefficients of Approximation at level MAX end idxMinToKill = 1 + 3*(W.level-levREC)+1; for j=idxMinToKill:nbCELL W.dec{j} = zeros(size(W.dec{j})); end elseif idxCFS==0 idxMaxToKill = 1 + 3*(W.level-levREC); for j=1:idxMaxToKill W.dec{j} = zeros(size(W.dec{j})); end else end end % Initialization. Lo = W.filters.LoR; Hi = W.filters.HiR; dwtEXTM = W.mode; perFLAG = isequal(dwtEXTM,'per'); cfs = W.dec; sizes = W.sizes; level = W.level; maxloop = level; if idxCFS==1 && cfsFLAG , maxloop = (level-levREC); end idxBeg = 1; for k=1:maxloop idxEnd = idxBeg+3; dec = reshape(cfs(idxBeg:idxEnd),2,2); sizerec = sizes(k+1,:); X = recFUNC(dec,sizerec,Lo,Hi,perFLAG); cfs(1:idxEnd-1) = {[]}; cfs{idxEnd} = X; idxBeg = idxEnd; end if abs(idxCFS)==1 && ~cfsFLAG && length(W.sizeINI)==3 % X = uint8(X); end %-----------------------------------------------------------------------% function X = recFUNC(dec,sINI,Lo,Hi,perFLAG) % Reconstruction. perm = [2,1,3]; W = cell(1,2); for i = 1:2 W{i} = wrec1D(dec{i,1},Lo{2},perm,perFLAG) + ... wrec1D(dec{i,2},Hi{2},perm,perFLAG); end X = (wrec1D(W{1},Lo{1},[],perFLAG) + wrec1D(W{2},Hi{1},[],perFLAG))/4; % Extraction of central part sREC = size(X); F = floor((sREC-sINI)/2); C = ceil((sREC-sINI)/2); X = X(1+F(1):end-C(1),1+F(2):end-C(2),:); %-----------------------------------------------------------------------% function X = wrec1D(X,F,perm,perFLAG) if ~isempty(perm) , X = permute(X,perm); end if perFLAG nb = length(F)-1; X = [X X(:,1:nb,:)]; end X = convn(X,F); if ~isempty(perm) , X = permute(X,perm); end %-----------------------------------------------------------------------% ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/interp23tap.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % interp23tap interpolates the image I_Interpolated using a polynomial with 23 coefficients interpolator. % % Interface: % I_Interpolated = interp23tap(I_Interpolated,ratio) % % Inputs: % I_Interpolated: Image to interpolate; % ratio: Scale ratio between MS and PAN. Pre-condition: Resize factors power of 2. % % Outputs: % I_Interpolated: Interpolated image. % % References: % [Aiazzi02] B. Aiazzi, L. Alparone, S. Baronti, and A. Garzelli, Context-driven fusion of high spatial and spectral resolution images based on % oversampled multiresolution analysis,? IEEE Transactions on Geoscience and Remote Sensing, vol. 40, no. 10, pp. 2300?2312, October % 2002. % [Aiazzi13] B. Aiazzi, S. Baronti, M. Selva, and L. Alparone, Bi-cubic interpolation for shift-free pan-sharpening,? ISPRS Journal of Photogrammetry % and Remote Sensing, vol. 86, no. 6, pp. 65?76, December 2013. % [Vivone15] G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms?, % IEEE Transaction on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 2565?2586, May 2015. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function I_Interpolated = interp23tap(I_Interpolated,ratio) if (2^round(log2(double(ratio))) ~= ratio) disp('Error: Only resize factors power of 2'); return; end [r,c,b] = size(I_Interpolated); CDF23 = 2.*[0.5 0.305334091185 0 -0.072698593239 0 0.021809577942 0 -0.005192756653 0 0.000807762146 0 -0.000060081482]; CDF23 = [fliplr(CDF23(2:end)) CDF23]; BaseCoeff = CDF23; first = 1; for z = 1 : ratio/2 I1LRU = zeros((2^z) * r, (2^z) * c, b); if first I1LRU(2:2:end,2:2:end,:) = I_Interpolated; first = 0; else I1LRU(1:2:end,1:2:end,:) = I_Interpolated; end for ii = 1 : b t = I1LRU(:,:,ii); t = imfilter(t',BaseCoeff,'circular'); I1LRU(:,:,ii) = imfilter(t',BaseCoeff,'circular'); end I_Interpolated = I1LRU; end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/k_means_clustering.m ================================================ % I_MS: Image to segment % n_segm: Number of segments % Output: % S: Segmentation map. function S = k_means_clustering(I_MS, n_segm) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ %%% k-means Segmentation of MS image %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ F1 = zeros(size(I_MS,1)*size(I_MS,2),size(I_MS,3)); for ibands = 1 :size(I_MS,3) a = I_MS(:,:,ibands); F1(:,ibands) = a(:)/max(a(:)); end IDX = kmeans(F1,n_segm); S = reshape(IDX,[size(I_MS,1) size(I_MS,2)]); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/matrix2latex.m ================================================ function matrix2latex(matrix, filename, varargin) % function: matrix2latex(...) % Author: M. Koehler % Contact: koehler@in.tum.de % Version: 1.1 % Date: May 09, 2004 % This software is published under the GNU GPL, by the free software % foundation. For further reading see: http://www.gnu.org/licenses/licenses.html#GPL % Usage: % matrix2late(matrix, filename, varargs) % where % - matrix is a 2 dimensional numerical or cell array % - filename is a valid filename, in which the resulting latex code will % be stored % - varargs is one ore more of the following (denominator, value) combinations % + 'rowLabels', array -> Can be used to label the rows of the % resulting latex table % + 'columnLabels', array -> Can be used to label the columns of the % resulting latex table % + 'alignment', 'value' -> Can be used to specify the alginment of % the table within the latex document. Valid arguments are: 'l', 'c', % and 'r' for left, center, and right, respectively % + 'format', 'value' -> Can be used to format the input data. 'value' % has to be a valid format string, similar to the ones used in % fprintf('format', value); % + 'size', 'value' -> One of latex' recognized font-sizes, e.g. tiny, % HUGE, Large, large, LARGE, etc. % % Example input: % matrix = [1.5 1.764; 3.523 0.2]; % rowLabels = {'row 1', 'row 2'}; % columnLabels = {'col 1', 'col 2'}; % matrix2latex(matrix, 'out.tex', 'rowLabels', rowLabels, 'columnLabels', columnLabels, 'alignment', 'c', 'format', '%-6.2f', 'size', 'tiny'); % % The resulting latex file can be included into any latex document by: % /input{out.tex} % % Enjoy life!!! rowLabels = []; colLabels = []; alignment = 'l'; format = []; textsize = []; if (rem(nargin,2) == 1 || nargin < 2) error('matrix2latex: ', 'Incorrect number of arguments to %s.', mfilename); end okargs = {'rowlabels','columnlabels', 'alignment', 'format', 'size'}; for j=1:2:(nargin-2) pname = varargin{j}; pval = varargin{j+1}; k = strmatch(lower(pname), okargs); if isempty(k) error('matrix2latex: ', 'Unknown parameter name: %s.', pname); elseif length(k)>1 error('matrix2latex: ', 'Ambiguous parameter name: %s.', pname); else switch(k) case 1 % rowlabels rowLabels = pval; if isnumeric(rowLabels) rowLabels = cellstr(num2str(rowLabels(:))); end case 2 % column labels colLabels = pval; if isnumeric(colLabels) colLabels = cellstr(num2str(colLabels(:))); end case 3 % alignment alignment = lower(pval); if alignment == 'right' alignment = 'r'; end if alignment == 'left' alignment = 'l'; end if alignment == 'center' alignment = 'c'; end if alignment ~= 'l' && alignment ~= 'c' && alignment ~= 'r' alignment = 'l'; warning('matrix2latex: ', 'Unkown alignment. (Set it to \''left\''.)'); end case 4 % format format = lower(pval); case 5 % format textsize = pval; end end end fid = fopen(filename, 'a'); width = size(matrix, 2); height = size(matrix, 1); if isnumeric(matrix) matrix = num2cell(matrix); for h=1:height for w=1:width if(~isempty(format)) matrix{h, w} = num2str(matrix{h, w}, format); else matrix{h, w} = num2str(matrix{h, w}); end end end end if(~isempty(textsize)) fprintf(fid, '\\begin{%s}', textsize); end fprintf(fid, '\\begin{tabular}{|'); if(~isempty(rowLabels)) fprintf(fid, 'l|'); end for i=1:width fprintf(fid, '%c|', alignment); end fprintf(fid, '}\r\n'); fprintf(fid, '\\hline\r\n'); if(~isempty(colLabels)) if(~isempty(rowLabels)) fprintf(fid, '&'); end for w=1:width-1 fprintf(fid, '\\textbf{%s}&', colLabels{w}); end fprintf(fid, '\\textbf{%s}\\\\\\hline\r\n', colLabels{width}); end for h=1:height if(~isempty(rowLabels)) fprintf(fid, '\\textbf{%s}&', rowLabels{h}); end for w=1:width-1 fprintf(fid, '%s&', matrix{h, w}); end fprintf(fid, '%s\\\\\\hline\r\n', matrix{h, width}); end fprintf(fid, '\\end{tabular}\r\n'); if(~isempty(textsize)) fprintf(fid, '\\end{%s}', textsize); end fclose(fid); ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/ndwt2_working.m ================================================ function varargout = ndwt2_working(X,level,varargin) %NDWT2 Nondecimated 2-D wavelet transform. % NDWT2 will be removed in a future release of MATLAB. Use the % following function instead: % swt2 % Error in R2015a % error(message('Wavelet:warnobsolete:ErrorReplaceNDWT2')); nbIn = length(varargin); if nbIn < 1 error(message('MATLAB:narginchk:notEnoughInputs')); elseif nbIn > 5 error(message('MATLAB:narginchk:tooManyInputs')); end LoD = cell(1,2); HiD = cell(1,2); LoR = cell(1,2); HiR = cell(1,2); if ischar(varargin{1}) [LD,HD,LR,HR] = wfilters(varargin{1}); for k = 1:2 LoD{k} = LD; HiD{k} = HD; LoR{k} = LR; HiR{k} = HR; end elseif isstruct(varargin{1}) if isfield(varargin{1},'w1') && isfield(varargin{1},'w2') for k = 1:2 [LoD{k},HiD{k},LoR{k},HiR{k}] = ... wfilters(varargin{1}.(['w' int2str(k)])); end elseif isfield(varargin{1},'LoD') && isfield(varargin{1},'HiD') && ... isfield(varargin{1},'LoR') && isfield(varargin{1},'HiR') for k = 1:2 LoD{k} = varargin{1}.LoD{k}; HiD{k} = varargin{1}.HiD{k}; LoR{k} = varargin{1}.LoR{k}; HiR{k} = varargin{1}.HiR{k}; end else error(message('Wavelet:FunctionArgVal:Invalid_ArgVal')); end elseif iscell(varargin{1}) if ischar(varargin{1}{1}) for k = 1:2 [LoD{k},HiD{k},LoR{k},HiR{k}] = wfilters(varargin{1}{k}); end else LoD(1:end) = varargin{1}(1); HiD(1:end) = varargin{1}(2); LoR(1:end) = varargin{1}(3); HiR(1:end) = varargin{1}(4); end else end nextArg = 2; dwtEXTM = 'sym'; while nbIn>=nextArg argName = varargin{nextArg}; argVal = varargin{nextArg+1}; nextArg = nextArg + 2; switch argName case 'mode' , dwtEXTM = argVal; end end % Initialization. if isempty(X) , varargout{1} = []; return; end sX = size(X); X = double(X); sizes = zeros(level+1,length(sX)); sizes(level+1,:) = sX; for k=1:level dec = decFUNC(X,LoD,HiD,dwtEXTM); X = dec{1,1,1}; sizes(level+1-k,:) = size(X); dec = reshape(dec,4,1,1); if k>1 cfs(1) = []; cfs = cat(1,dec,cfs); else cfs = dec; end end WT.sizeINI = sX; WT.level = level; WT.filters.LoD = LoD; WT.filters.HiD = HiD; WT.filters.LoR = LoR; WT.filters.HiR = HiR; WT.mode = dwtEXTM; WT.dec = cfs; WT.sizes = sizes; varargout{1} = WT; %-------------------------------------------------------------------------% function dec = decFUNC(X,LoD,HiD,dwtEXTM) dec = cell(2,2); permVect = []; [a_Lo,d_Hi] = wdec1D(X,LoD{1},HiD{1},permVect,dwtEXTM); permVect = [2,1,3]; [dec{1,1},dec{1,2}] = wdec1D(a_Lo,LoD{2},HiD{2},permVect,dwtEXTM); [dec{2,1},dec{2,2}] = wdec1D(d_Hi,LoD{2},HiD{2},permVect,dwtEXTM); %-------------------------------------------------------------------------% function [L,H] = wdec1D(X,Lo,Hi,perm,dwtEXTM) if ~isempty(perm) , X = permute(X,perm); end sX = size(X); if length(sX)<3 , sX(3) = 1; end lf = length(Lo); lx = sX(2); lc = lx+lf-1; switch dwtEXTM case 'zpd' % Zero extension. case {'sym','symh'} % Symmetric extension (half-point). X = [X(:,lf-1:-1:1,:) , X , X(:,end:-1:end-lf+1,:)]; case 'sp0' % Smooth extension of order 0. X = [X(:,ones(1,lf-1),:) , X , X(:,lx*ones(1,lf-1),:)]; case {'sp1','spd'} % Smooth extension of order 1. Z = zeros(sX(1),sX(2)+ 2*lf-2,sX(3)); Z(:,lf:lf+lx-1,:) = X; last = sX(2)+lf-1; for k = 1:lf-1 Z(:,last+k,:) = 2*Z(:,last+k-1,:)- Z(:,last+k-2,:); Z(:,lf-k,:) = 2*Z(:,lf-k+1,:)- Z(:,lf-k+2,:); end X = Z; clear Z; case 'symw' % Symmetric extension (whole-point). X = [X(:,lf:-1:2,:) , X , X(:,end-1:-1:end-lf,:)]; case {'asym','asymh'} % Antisymmetric extension (half-point). X = [-X(:,lf-1:-1:1,:) , X , -X(:,end:-1:end-lf+1,:)]; case 'asymw' % Antisymmetric extension (whole-point). X = [-X(:,lf:-1:2,:) , X , -X(:,end-1:-1:end-lf,:)]; case 'rndu' % Uniformly randomized extension. X = [randn(sX(1),lf-1,sX(3)) , X , randn(sX(1),lf-1,sX(3))]; case 'rndn' % Normally randomized extension. X = [randn(sX(1),lf-1,sX(3)) , X , randn(sX(1),lf-1,sX(3))]; case 'ppd' % Periodized extension (1). X = [X(:,end-lf+2:end,:) , X , X(:,1:lf-1,:)]; case 'per' % Periodized extension (2). if rem(lx,2) , X = [X , X(:,end,:)]; end X = [X(:,end-lf+2:end,:) , X , X(:,1:lf-1,:)]; end L = convn(X,Lo); H = convn(X,Hi); clear X switch dwtEXTM case 'zpd' otherwise lenL = size(L,2); first = lf; last = lenL-lf+1; L = L(:,first:last,:); H = H(:,first:last,:); lenL = size(L,2); first = 1+floor((lenL-lc)/2); last = first+lc-1; L = L(:,first:last,:); H = H(:,first:last,:); end if isequal(dwtEXTM,'per') first = 1; last = lx; L = L(:,first:last,:); H = H(:,first:last,:); end if ~isempty(perm) L = permute(L,perm); H = permute(H,perm); end %-------------------------------------------------------------------------% ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/printAllImagesImWriteFR.m ================================================ MatrixPrint(:,:,:,1) = I_MS; MatrixPrint(:,:,:,2) = I_BT_H; MatrixPrint(:,:,:,3) = I_BDSD; MatrixPrint(:,:,:,4) = I_C_BDSD; MatrixPrint(:,:,:,5) = I_BDSD_PC; MatrixPrint(:,:,:,6) = I_GS; MatrixPrint(:,:,:,7) = I_GSA; MatrixPrint(:,:,:,8) = I_C_GSA; MatrixPrint(:,:,:,9) = I_PRACS; MatrixPrint(:,:,:,10) = I_AWLP; MatrixPrint(:,:,:,11) = I_MTF_GLP; MatrixPrint(:,:,:,12) = I_MTF_GLP_FS; MatrixPrint(:,:,:,13) = I_MTF_GLP_HPM; MatrixPrint(:,:,:,14) = I_MTF_GLP_HPM_H; MatrixPrint(:,:,:,15) = I_MTF_GLP_HPM_R; MatrixPrint(:,:,:,16) = I_MTF_GLP_CBD; MatrixPrint(:,:,:,17) = I_C_MTF_GLP_CBD; MatrixPrint(:,:,:,18) = I_MF; MatrixPrint(:,:,:,19) = I_FE_HPM; MatrixPrint(:,:,:,20) = I_SR_D; MatrixPrint(:,:,:,21) = I_PWMBF; MatrixPrint(:,:,:,22) = I_TV; MatrixPrint(:,:,:,23) = I_RR; MatrixPrint(:,:,:,24) = I_PNN; MatrixPrint(:,:,:,25) = I_PNN_IDX; MatrixPrint(:,:,:,26) = I_A_PNN; MatrixPrint(:,:,:,27) = I_A_PNN_FT; if size(I_MS,3) == 4 vect_index_RGB = [3,2,1]; else vect_index_RGB = [5,3,2]; end titleImages = algorithms; addpath([pwd,'\Tools']); figure, MP = showImagesAll(MatrixPrint,titleImages,vect_index_RGB,flag_cut_bounds,dim_cut,0); cd 'Outputs' for ii = 1 : size(MP,4) imwrite(MP(:,:,:,ii),sprintf('%s.png',algorithms{ii})); end imwrite(showPan(I_PAN,0,1,flag_cut_bounds,dim_cut),'PAN.png') cd .. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/printAllImagesImWriteRR.m ================================================ MatrixPrint(:,:,:,1) = I_GT; MatrixPrint(:,:,:,2) = I_MS; MatrixPrint(:,:,:,3) = I_BT_H; MatrixPrint(:,:,:,4) = I_BDSD; MatrixPrint(:,:,:,5) = I_C_BDSD; MatrixPrint(:,:,:,6) = I_BDSD_PC; MatrixPrint(:,:,:,7) = I_GS; MatrixPrint(:,:,:,8) = I_GSA; MatrixPrint(:,:,:,9) = I_C_GSA; MatrixPrint(:,:,:,10) = I_PRACS; MatrixPrint(:,:,:,11) = I_AWLP; MatrixPrint(:,:,:,12) = I_MTF_GLP; MatrixPrint(:,:,:,13) = I_MTF_GLP_FS; MatrixPrint(:,:,:,14) = I_MTF_GLP_HPM; MatrixPrint(:,:,:,15) = I_MTF_GLP_HPM_H; MatrixPrint(:,:,:,16) = I_MTF_GLP_HPM_R; MatrixPrint(:,:,:,17) = I_MTF_GLP_CBD; MatrixPrint(:,:,:,18) = I_C_MTF_GLP_CBD; MatrixPrint(:,:,:,19) = I_MF; MatrixPrint(:,:,:,20) = I_FE_HPM; MatrixPrint(:,:,:,21) = I_SR_D; MatrixPrint(:,:,:,22) = I_PWMBF; MatrixPrint(:,:,:,23) = I_TV; MatrixPrint(:,:,:,24) = I_RR; MatrixPrint(:,:,:,25) = I_PNN; MatrixPrint(:,:,:,26) = I_PNN_IDX; MatrixPrint(:,:,:,27) = I_A_PNN; MatrixPrint(:,:,:,28) = I_A_PNN_FT; if size(I_MS,3) == 4 vect_index_RGB = [3,2,1]; else vect_index_RGB = [5,3,2]; end titleImages = algorithms; addpath([pwd,'\Tools']); figure, MP = showImagesAll(MatrixPrint,titleImages,vect_index_RGB,flag_cut_bounds,dim_cut,0); cd 'Outputs' for ii = 1 : size(MP,4) imwrite(MP(:,:,:,ii),sprintf('%s.png',algorithms{ii})); end imwrite(showPan(I_PAN,0,1,flag_cut_bounds,dim_cut),'PAN.png') cd .. ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/printImage.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Print EPS image. % % Interface: % printImage(I_MS,title) % % Inputs: % I_MS: Image to print; % title: Filename. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function printImage(I_MS,title) figure,imshow(I_MS,'Border','tight','InitialMagnification',100); print(sprintf(title,'.eps'),'-depsc2','-r300'); % print(sprintf(title,'.png'),'-dpng','-r400'); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/rectangleonimage.m ================================================ function ent=rectangleonimage(pic,sw,n, ch, c, scale, type) % sw: the location of the up-left, down-right % n: the width of the line % ch: ch = 1 (gray image); ch = 3 (color image) % c: the color of the line: c=1(red); c=2(green); c=3(blue);c=others % scale: the salce of zooming in for SR % type =1 (put to down-left); type =2 (put to down-right); % type =3 (put to up-right); type =4 (put to up-left); % Liang-Jian Deng (UESTC) % improved time: 2017-3-11 %==============================% if nargin< 5 scale = []; end x0=sw(1);x1=sw(2);y0=sw(3);y1=sw(4); [p q ch]=size(pic); max_val = 1; %ch=1:gray image; ch=3: color image if ch==1 if c==1 pic(x0:x1,y0:y0+n)=max_val; pic(x0:x1,y1-n:y1)=max_val; pic(x0:x0+n,y0:y1)=max_val; pic(x1-n:x1,y0:y1)=max_val; elseif c==2 pic(x0:x1,y0:y0+n)=0; pic(x0:x1,y1-n:y1)=0; pic(x0:x0+n,y0:y1)=0; pic(x1-n:x1,y0:y1)=0; else pic(x0:x1,y0:y0+n)=max_val-pic(x0:x1,y0:y0+n); %ȡ pic(x0:x1,y1-n:y1)=max_val- pic(x0:x1,y1-n:y1); pic(x0:x0+n,y0:y1)=max_val-pic(x0:x0+n,y0:y1); pic(x1-n:x1,y0:y1)=max_val-pic(x1-n:x1,y0:y1); end end if ch==3 if c==1 pic(x0:x1,y0:y0+n,1)=max_val; pic(x0:x1,y0:y0+n,2)=0; pic(x0:x1,y0:y0+n,3)=0; pic(x0:x1,y1-n:y1,1)=max_val; pic(x0:x1,y1-n:y1,2)=0; pic(x0:x1,y1-n:y1,3)=0; pic(x0:x0+n,y0:y1,1)=max_val; pic(x0:x0+n,y0:y1,2)=0; pic(x0:x0+n,y0:y1,3)=0; pic(x1-n:x1,y0:y1,1)=max_val; pic(x1-n:x1,y0:y1,2)=0; pic(x1-n:x1,y0:y1,3)=0; elseif c==2 pic(x0:x1,y0:y0+n,1)=0;pic(x0:x1,y0:y0+n,2)=max_val;pic(x0:x1,y0:y0+n,3)=0; pic(x0:x1,y1-n:y1,1)=0;pic(x0:x1,y1-n:y1,2)=max_val;pic(x0:x1,y1-n:y1,3)=0; pic(x0:x0+n,y0:y1,1)=0;pic(x0:x0+n,y0:y1,2)=max_val;pic(x0:x0+n,y0:y1,3)=0; pic(x1-n:x1,y0:y1,1)=0;pic(x1-n:x1,y0:y1,2)=max_val;pic(x1-n:x1,y0:y1,3)=0; elseif c==3 pic(x0:x1,y0:y0+n,1)=0;pic(x0:x1,y0:y0+n,2)=0;pic(x0:x1,y0:y0+n,3)=max_val; pic(x0:x1,y1-n:y1,1)=0;pic(x0:x1,y1-n:y1,2)=0;pic(x0:x1,y1-n:y1,3)=max_val; pic(x0:x0+n,y0:y1,1)=0;pic(x0:x0+n,y0:y1,2)=0;pic(x0:x0+n,y0:y1,3)=max_val; pic(x1-n:x1,y0:y1,1)=0;pic(x1-n:x1,y0:y1,2)=0;pic(x1-n:x1,y0:y1,3)=max_val; else %inverse pic(x0:x1,y0:y0+n,1:3)=max_val-pic(x0:x1,y0:y0+n,1:3); pic(x0:x1,y1-n:y1,1:3)=max_val-pic(x0:x1,y1-n:y1,1:3); pic(x0:x0+n,y0:y1,1:3)=max_val-pic(x0:x0+n,y0:y1,1:3); pic(x1-n:x1,y0:y1,1:3)=max_val-pic(x1-n:x1,y0:y1,1:3); end end ent=pic; sampIm = pic(x0:x1, y0:y1, :); SampIm = imresize(sampIm, scale,'nearest'); % nearest to zooming in the local part switch type case 1 % put zoom in image on the down-left [a, b, third] = size(SampIm); ent((p-a+1):p,1:b, :) = SampIm; case 2 % put zoom in image on the down-left [a, b, third] = size(SampIm); ent((p-a+1):p,(q-b+1):q, :) = SampIm; case 3 % put zoom in image on the up-right [a, b, third] = size(SampIm); ent(1:a,(q-b+1):q, :) = SampIm; case 4 % put zoom in image on the up-right [a, b, third] = size(SampIm); ent(1:a,1:b, :) = SampIm; end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/resize_images.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Resize_images generates the low resolution panchromatic (PAN) and multispectral (MS) images according to Wald's protocol. % % Interface: % [I_MS_LR, I_PAN_LR] = resize_images(I_MS,I_PAN,ratio,sensor) % % Inputs: % I_MS: MS image upsampled at PAN scale; % I_PAN: PAN image; % ratio: Scale ratio between MS and PAN. Pre-condition: Integer value; % sensor: String for type of sensor (e.g. 'WV2', 'IKONOS'). % % Outputs: % I_MS_LR: Low Resolution MS image; % I_PAN_LR: Low Resolution PAN image. % % References: % [Wald97] L. Wald, T. Ranchin, and M. Mangolini, Fusion of satellite images of different spatial resolutions: assessing the quality of resulting images, % Photogrammetric Engineering and Remote Sensing, vol. 63, no. 6, pp. 691699, June 1997. % [Aiazzi02] B. Aiazzi, L. Alparone, S. Baronti, and A. Garzelli, Context-driven fusion of high spatial and spectral resolution images based on % oversampled multiresolution analysis, IEEE Transactions on Geoscience and Remote Sensing, vol. 40, no. 10, pp. 23002312, October % 2002. % [Aiazzi06] B. Aiazzi, L. Alparone, S. Baronti, A. Garzelli, and M. Selva, MTF-tailored multiscale fusion of high-resolution MS and Pan imagery, % Photogrammetric Engineering and Remote Sensing, vol. 72, no. 5, pp. 591596, May 2006. % [Vivone14a] G. Vivone, R. Restaino, M. Dalla Mura, G. Licciardi, and J. Chanussot, Contrast and error-based fusion schemes for multispectral % image pansharpening, IEEE Geoscience and Remote Sensing Letters, vol. 11, no. 5, pp. 930934, May 2014. % [Vivone15] G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, % IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function [I_MS_LR, I_PAN_LR] = resize_images(I_MS,I_PAN,ratio,sensor) I_MS = double(I_MS); I_PAN = double(I_PAN); I_MS_LP = MTF(I_MS,sensor,ratio); %%% Decimation MS I_MS_LP_D = zeros(round(size(I_MS,1)/ratio),round(size(I_MS,2)/ratio),size(I_MS,3)); for idim = 1 : size(I_MS,3) I_MS_LP_D(:,:,idim) = imresize(I_MS_LP(:,:,idim),1/ratio,'nearest'); end I_MS_LR = double(I_MS_LP_D); I_PAN_LR = imresize(I_PAN, 1/ratio); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/showImage4.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Visualize and print a four-band multispectral image. % % Interface: % showImage4(I_F,print,id,flag_cut_bounds,dim_cut,thvalues,L) % % Inputs: % I_MS: Four band multispectral image; % print: Flag. If print == 1, print EPS image; % id: Identifier (name) of the printed EPS image; % flag_cut_bounds: Cut the boundaries of the viewed Panchromatic image; % dim_cut: Define the dimension of the boundary cut; % th_values: Flag. If th_values == 1, apply an hard threshold to the dynamic range; % L: Radiomatric resolution of the input image. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function showImage4(I_MS,print,id,flag_cut_bounds,dim_cut,th_values,L) if flag_cut_bounds I_MS = I_MS(dim_cut:end-dim_cut,dim_cut:end-dim_cut,:); end if th_values I_MS(I_MS > 2^L) = 2^L; I_MS(I_MS < 0) = 0; end IMN = viewimage(I_MS(:,:,1:3)); IMN = IMN(:,:,3:-1:1); if print printImage(IMN,sprintf('Outputs/%d.eps',id)); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/showImage4LR.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Visualize and print the original four-band multispectral image. % % Interface: % showImage4LR(I_F,print,id,flag_cut_bounds,dim_cut,thvalues,L,ratio) % % Inputs: % I_MS: Four band multispectral image; % print: Flag. If print == 1, print EPS image; % id: Identifier (name) of the printed EPS image; % flag_cut_bounds: Cut the boundaries of the viewed Panchromatic image; % dim_cut: Define the dimension of the boundary cut; % th_values: Flag. If th_values == 1, apply an hard threshold to the dynamic range; % L: Radiomatric resolution of the input image; % ratio: Resize factor. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function showImage4LR(I_MS,print,id,flag_cut_bounds,dim_cut,th_values,L,ratio) if flag_cut_bounds I_MS = I_MS(round(dim_cut/ratio):end-round(dim_cut/ratio),round(dim_cut/ratio):end-round(dim_cut/ratio),:); end if th_values I_MS(I_MS > 2^L) = 2^L; I_MS(I_MS < 0) = 0; end IMN = viewimage(I_MS(:,:,1:3)); IMN = IMN(:,:,3:-1:1); if print printImage(IMN,sprintf('Outputs/%d.eps',id)); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/showImage4LR_zoomin.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Visualize and print the original four-band multispectral image. % % Interface: % showImage4LR(I_F,print,id,flag_cut_bounds,dim_cut,thvalues,L,ratio) % % Inputs: % I_MS: Four band multispectral image; % print: Flag. If print == 1, print EPS image; % id: Identifier (name) of the printed EPS image; % flag_cut_bounds: Cut the boundaries of the viewed Panchromatic image; % dim_cut: Define the dimension of the boundary cut; % th_values: Flag. If th_values == 1, apply an hard threshold to the dynamic range; % L: Radiomatric resolution of the input image; % ratio: Resize factor. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function showImage4LR_zoomin(I_MS,print,id,flag_cut_bounds,dim_cut,th_values,L,ratio, location1, location2) if flag_cut_bounds I_MS = I_MS(round(dim_cut/ratio):end-round(dim_cut/ratio),round(dim_cut/ratio):end-round(dim_cut/ratio),:); end if th_values I_MS(I_MS > 2^L) = 2^L; I_MS(I_MS < 0) = 0; end IMN = viewimage(I_MS(:,:,1:3)); IMN = IMN(:,:,3:-1:1); if isempty(location2) ent=rectangleonimage(IMN,location1,1, 3, 3, 3, 1); % put close-up to up-right corner figure,imshow(ent,[]) else % type =1 (put to down-left); type =2 (put to down-right); % type =3 (put to up-right); type =4 (put to up-left); ent=rectangleonimage(IMN,location1,1, 3, 3, 3, 1); % put close-up to up-right corner ent=rectangleonimage(ent,location2,1, 3, 2, 3, 2); % put close-up to down-right corner figure,imshow(ent,[]) end if print printImage(IMN,sprintf('Outputs/%d.eps',id)); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/showImage4_zoomin.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Visualize and print a four-band multispectral image. % % Interface: % showImage4(I_F,print,id,flag_cut_bounds,dim_cut,thvalues,L) % % Inputs: % I_MS: Four band multispectral image; % print: Flag. If print == 1, print EPS image; % id: Identifier (name) of the printed EPS image; % flag_cut_bounds: Cut the boundaries of the viewed Panchromatic image; % dim_cut: Define the dimension of the boundary cut; % th_values: Flag. If th_values == 1, apply an hard threshold to the dynamic range; % L: Radiomatric resolution of the input image. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function showImage4_zoomin(I_MS,print,id,flag_cut_bounds,dim_cut,th_values,L, location1, location2) if flag_cut_bounds I_MS = I_MS(dim_cut:end-dim_cut,dim_cut:end-dim_cut,:); end if th_values I_MS(I_MS > 2^L) = 2^L; I_MS(I_MS < 0) = 0; end IMN = viewimage(I_MS(:,:,1:3)); IMN = IMN(:,:,3:-1:1); if isempty(location2) ent=rectangleonimage(IMN,location1,1, 3, 3, 3, 1); % put close-up to up-right corner figure,imshow(ent,[]) else % type =1 (put to down-left); type =2 (put to down-right); % type =3 (put to up-right); type =4 (put to up-left); ent=rectangleonimage(IMN,location1,1, 3, 3, 3, 1); % put close-up to up-right corner ent=rectangleonimage(ent,location2,1, 3, 2, 3, 2); % put close-up to down-right corner figure,imshow(ent,[]) end if print printImage(IMN,sprintf('Outputs/%d.eps',id)); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/showImage8.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Visualize and print an eight-band multispectral image. % % Interface: % showImage8(I_MS,print,id,flag_cut_bounds,dim_cut,th_values,L) % % Inputs: % I_MS: Eight band multispectral image; % print: Flag. If print == 1, print EPS image; % id: Identifier (name) of the printed EPS image; % flag_cut_bounds: Cut the boundaries of the viewed Panchromatic image; % dim_cut: Define the dimension of the boundary cut; % th_values: Flag. If th_values == 1, apply an hard threshold to the dynamic range; % L: Radiomatric resolution of the input image. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function showImage8(I_MS,print,id,flag_cut_bounds,dim_cut,th_values,L) if flag_cut_bounds I_MS = I_MS(dim_cut:end-dim_cut,dim_cut:end-dim_cut,:); end if th_values I_MS(I_MS > 2^L) = 2^L; I_MS(I_MS < 0) = 0; end if id == 1 IMN = viewimage(I_MS(:,:,[1,3,5])); IMN = IMN(:,:,3:-1:1); else IMN = viewimage(I_MS(:,:,[1,3,5]),[0.01 0.995]); IMN = IMN(:,:,3:-1:1); end if print printImage(IMN,sprintf('Outputs/%d.eps',id)); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/showImage8LR.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Visualize and print an eight-band multispectral image. % % Interface: % showImage8LR(I_MS,print,id,flag_cut_bounds,dim_cut,th_values,L,ratio) % % Inputs: % I_MS: Eight band multispectral image; % print: Flag. If print == 1, print EPS image; % id: Identifier (name) of the printed EPS image; % flag_cut_bounds: Cut the boundaries of the viewed Panchromatic image; % dim_cut: Define the dimension of the boundary cut; % th_values: Flag. If th_values == 1, apply an hard threshold to the dynamic range; % L: Radiomatric resolution of the input image; % ratio: Resize factor. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function showImage8LR(I_MS,print,id,flag_cut_bounds,dim_cut,th_values,L,ratio) if flag_cut_bounds I_MS = I_MS(round(dim_cut/ratio):end-round(dim_cut/ratio),round(dim_cut/ratio):end-round(dim_cut/ratio),:); end if th_values I_MS(I_MS > 2^L) = 2^L; I_MS(I_MS < 0) = 0; end if id == 1 IMN = viewimage(I_MS(:,:,[1,3,5])); IMN = IMN(:,:,3:-1:1); else IMN = viewimage(I_MS(:,:,[1,3,5]),[0.01 0.995]); IMN = IMN(:,:,3:-1:1); end if print printImage(IMN,sprintf('Outputs/%d.eps',id)); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/showImage8LR_zoomin.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Visualize and print an eight-band multispectral image. % % Interface: % showImage8LR(I_MS,print,id,flag_cut_bounds,dim_cut,th_values,L,ratio) % % Inputs: % I_MS: Eight band multispectral image; % print: Flag. If print == 1, print EPS image; % id: Identifier (name) of the printed EPS image; % flag_cut_bounds: Cut the boundaries of the viewed Panchromatic image; % dim_cut: Define the dimension of the boundary cut; % th_values: Flag. If th_values == 1, apply an hard threshold to the dynamic range; % L: Radiomatric resolution of the input image; % ratio: Resize factor. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function showImage8LR_zoomin(I_MS,print,id,flag_cut_bounds,dim_cut,th_values,L,ratio, location1, location2) if flag_cut_bounds I_MS = I_MS(round(dim_cut/ratio):end-round(dim_cut/ratio),round(dim_cut/ratio):end-round(dim_cut/ratio),:); end if th_values I_MS(I_MS > 2^L) = 2^L; I_MS(I_MS < 0) = 0; end if id == 1 IMN = viewimage(I_MS(:,:,[1,3,5])); IMN = IMN(:,:,3:-1:1); else IMN = viewimage(I_MS(:,:,[1,3,5]),[0.01 0.995]); IMN = IMN(:,:,3:-1:1); end if isempty(location2) ent=rectangleonimage(IMN,location1,1, 3, 3, 3, 1); % put close-up to up-right corner figure,imshow(ent,[]) else % type =1 (put to down-left); type =2 (put to down-right); % type =3 (put to up-right); type =4 (put to up-left); ent=rectangleonimage(IMN,location1,1, 3, 3, 3, 1); % put close-up to up-right corner ent=rectangleonimage(ent,location2,1, 3, 2, 3, 2); % put close-up to down-right corner figure,imshow(ent,[]) end if print printImage(IMN,sprintf('Outputs/%d.eps',id)); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/showImage8_zoomin.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Visualize and print an eight-band multispectral image. % % Interface: % showImage8(I_MS,print,id,flag_cut_bounds,dim_cut,th_values,L) % % Inputs: % I_MS: Eight band multispectral image; % print: Flag. If print == 1, print EPS image; % id: Identifier (name) of the printed EPS image; % flag_cut_bounds: Cut the boundaries of the viewed Panchromatic image; % dim_cut: Define the dimension of the boundary cut; % th_values: Flag. If th_values == 1, apply an hard threshold to the dynamic range; % L: Radiomatric resolution of the input image. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function showImage8_zoomin(I_MS,print,id,flag_cut_bounds,dim_cut,th_values,L, location1, location2) if flag_cut_bounds I_MS = I_MS(dim_cut:end-dim_cut,dim_cut:end-dim_cut,:); end if th_values I_MS(I_MS > 2^L) = 2^L; I_MS(I_MS < 0) = 0; end if id == 1 IMN = viewimage(I_MS(:,:,[1,3,5])); IMN = IMN(:,:,3:-1:1); else IMN = viewimage(I_MS(:,:,[1,3,5]),[0.01 0.995]); IMN = IMN(:,:,3:-1:1); end if isempty(location2) ent=rectangleonimage(IMN,location1,1, 3, 3, 3, 1); % put close-up to up-right corner figure,imshow(ent,[]) else % type =1 (put to down-left); type =2 (put to down-right); % type =3 (put to up-right); type =4 (put to up-left); ent=rectangleonimage(IMN,location1,1, 3, 3, 3, 1); % put close-up to up-right corner ent=rectangleonimage(ent,location2,1, 3, 2, 3, 2); % put close-up to down-right corner figure,imshow(ent,[]) end if print printImage(IMN,sprintf('Outputs/%d.eps',id)); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/showImagesAll.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Visualize all the images applying the same stretching for visual comparison. % % Interface: % MatrixPrint = showImagesAll(MatrixImage,titleImages,vect_index_RGB,flag_cut_bounds,dim_cut,flagPAN) % % Inputs: % MatrixImage: Matrix that contains all the images to visualize; Size: [M x N x B x Z], where [M x N] is the % dimension of a single image band, B represents the number of bands for each image, and Z is the number of images to plot. % titleImages: Vector of strings that represents the titles for each image to plot; Size: [1 x Z]. % vect_index_RGB: Identify the bands to plot to obtain an RGB representation of the multispectral data; % flag_cut_bounds: Cut the boundaries of the images to plot; % dim_cut: Define the dimension of the boundary cut; % flagPAN: Flag. If flagPAN == 1, the first image to plot is the panchromatic image otherwise it is the ground-truth. % % Outputs: % MatrixPrint: Matrix, with the same structure of MatrixImage, which contains the plotted images. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function MatrixPrint = showImagesAll(MatrixImage,titleImages,vect_index_RGB,flag_cut_bounds,dim_cut,flagPAN) if flag_cut_bounds MatrixImageCat = zeros(numel(dim_cut:size(MatrixImage,1)-dim_cut),numel(dim_cut:size(MatrixImage,2)-dim_cut),size(MatrixImage,3),size(MatrixImage,4)); for ii = 1 : size(MatrixImageCat,4) t = MatrixImage(:,:,:,ii); MatrixImageCat(:,:,:,ii) = t(dim_cut:end-dim_cut,dim_cut:end-dim_cut,:); end else MatrixImageCat = MatrixImage; end [r,c,~] = size(MatrixImageCat(:,:,:,1)); if flagPAN T = []; for ii = 2 : size(MatrixImageCat,4) T = cat(2,T,MatrixImageCat(:,:,vect_index_RGB,ii)); end else T = []; for ii = 1 : size(MatrixImageCat,4) T = cat(2,T,MatrixImageCat(:,:,vect_index_RGB,ii)); end end IMN = viewimage2(T); if flagPAN MatrixPrint = zeros(size(MatrixImageCat(:,:,vect_index_RGB,:))); MatrixPrint(:,:,:,1) = viewimage2(MatrixImageCat(:,:,vect_index_RGB,1)); ind_c = 1; for ii = 2 : size(MatrixImageCat,4) MatrixPrint(:,:,:,ii) = IMN(1 : r,ind_c : ind_c + c - 1,:); ind_c = ind_c + c; end else MatrixPrint = zeros(size(MatrixImageCat(:,:,vect_index_RGB,:))); ind_c = 1; for ii = 1 : size(MatrixImageCat,4) MatrixPrint(:,:,:,ii) = IMN(1 : r,ind_c : ind_c + c - 1,:); ind_c = ind_c + c; end end % ha = tight_subplot(5,5,[.06 .03],[.01 .06],[.01 .01]); ha = tight_subplot(4,7,[.02 0],[.01 .03],[.0 .0]); for ii = 1 : size(MatrixImageCat,4) axes(ha(ii)); imshow(MatrixPrint(:,:,:,ii),[]); title(ha(ii),titleImages{ii}); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/showImagesAllOld.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Visualize all the images applying the same stretching for visual comparison. % % Interface: % MatrixPrint = showImagesAll(MatrixImage,titleImages,vect_index_RGB,flag_cut_bounds,dim_cut,flagPAN) % % Inputs: % MatrixImage: Matrix that contains all the images to visualize; Size: [M x N x B x Z], where [M x N] is the % dimension of a single image band, B represents the number of bands for each image, and Z is the number of images to plot. % titleImages: Vector of strings that represents the titles for each image to plot; Size: [1 x Z]. % vect_index_RGB: Identify the bands to plot to obtain an RGB representation of the multispectral data; % flag_cut_bounds: Cut the boundaries of the images to plot; % dim_cut: Define the dimension of the boundary cut; % flagPAN: Flag. If flagPAN == 1, the first image to plot is the panchromatic image otherwise it is the ground-truth. % % Outputs: % MatrixPrint: Matrix, with the same structure of MatrixImage, which contains the plotted images. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function MatrixPrint = showImagesAll(MatrixImage,titleImages,vect_index_RGB,flag_cut_bounds,dim_cut,flagPAN) if flag_cut_bounds MatrixImageCat = zeros(numel(dim_cut:size(MatrixImage,1)-dim_cut),numel(dim_cut:size(MatrixImage,2)-dim_cut),size(MatrixImage,3),size(MatrixImage,4)); for ii = 1 : size(MatrixImageCat,4) t = MatrixImage(:,:,:,ii); MatrixImageCat(:,:,:,ii) = t(dim_cut:end-dim_cut,dim_cut:end-dim_cut,:); end else MatrixImageCat = MatrixImage; end [r,c,~] = size(MatrixImageCat(:,:,:,1)); if flagPAN T = []; for ii = 2 : size(MatrixImageCat,4) T = cat(2,T,MatrixImageCat(:,:,vect_index_RGB,ii)); end else T = []; for ii = 1 : size(MatrixImageCat,4) T = cat(2,T,MatrixImageCat(:,:,vect_index_RGB,ii)); end end IMN = viewimage2(T); if flagPAN MatrixPrint = zeros(size(MatrixImageCat(:,:,vect_index_RGB,:))); MatrixPrint(:,:,:,1) = viewimage2(MatrixImageCat(:,:,vect_index_RGB,1)); ind_c = 1; for ii = 2 : size(MatrixImageCat,4) MatrixPrint(:,:,:,ii) = IMN(1 : r,ind_c : ind_c + c - 1,:); ind_c = ind_c + c; end else MatrixPrint = zeros(size(MatrixImageCat(:,:,vect_index_RGB,:))); ind_c = 1; for ii = 1 : size(MatrixImageCat,4) MatrixPrint(:,:,:,ii) = IMN(1 : r,ind_c : ind_c + c - 1,:); ind_c = ind_c + c; end end ha = tight_subplot(5,5,[.06 .03],[.01 .06],[.01 .01]); % ha = tight_subplot(5,5,[.02 0],[.01 .03],[.0 .0]); for ii = 1 : size(MatrixImageCat,4) axes(ha(ii)); imshow(MatrixPrint(:,:,:,ii),[]); title(ha(ii),titleImages{ii}); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/showPan.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Visualize and print the panchromatic image. % % Interface: % showPan(Pan,print,id,flag_cut_bounds,dim_cut) % % Inputs: % Pan: Panchromatic image; % print: Flag. If print == 1, print EPS image; % id: Identifier (name) of the printed EPS image; % flag_cut_bounds: Cut the boundaries of the viewed Panchromatic image; % dim_cut: Define the dimension of the boundary cut; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function IN = showPan(Pan,print,id,flag_cut_bounds,dim_cut) if flag_cut_bounds Pan = Pan(dim_cut:end-dim_cut,dim_cut:end-dim_cut,:); end IN = viewimage(Pan); if print printImage(IN,sprintf('Outputs/%d.eps',id)); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/showPan_zoomin.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Visualize and print the panchromatic image. % % Interface: % showPan(Pan,print,id,flag_cut_bounds,dim_cut) % % Inputs: % Pan: Panchromatic image; % print: Flag. If print == 1, print EPS image; % id: Identifier (name) of the printed EPS image; % flag_cut_bounds: Cut the boundaries of the viewed Panchromatic image; % dim_cut: Define the dimension of the boundary cut; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function IN = showPan_zoomin(Pan,print,id,flag_cut_bounds,dim_cut, location1, location2) ratio = 4; if flag_cut_bounds %Pan = Pan(dim_cut:end-dim_cut,dim_cut:end-dim_cut,:); Pan = Pan(round(dim_cut/ratio):end-round(dim_cut/ratio),round(dim_cut/ratio):end-round(dim_cut/ratio),:); end IN = viewimage(Pan); if isempty(location2) ent=rectangleonimage(IN,location1,1, 3, 3, 3, 1); % put close-up to up-right corner figure,imshow(ent,[]) else % type =1 (put to down-left); type =2 (put to down-right); % type =3 (put to up-right); type =4 (put to up-left); ent=rectangleonimage(IN,location1,1, 3, 3, 3, 1); % put close-up to up-right corner ent=rectangleonimage(ent,location2,1, 3, 2, 3, 2); % put close-up to down-right corner figure,imshow(ent,[]) end if print printImage(IN,sprintf('Outputs/%d.eps',id)); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/tight_subplot.m ================================================ function ha = tight_subplot(Nh, Nw, gap, marg_h, marg_w) % tight_subplot creates "subplot" axes with adjustable gaps and margins % % ha = tight_subplot(Nh, Nw, gap, marg_h, marg_w) % % in: Nh number of axes in hight (vertical direction) % Nw number of axes in width (horizontaldirection) % gap gaps between the axes in normalized units (0...1) % or [gap_h gap_w] for different gaps in height and width % marg_h margins in height in normalized units (0...1) % or [lower upper] for different lower and upper margins % marg_w margins in width in normalized units (0...1) % or [left right] for different left and right margins % % out: ha array of handles of the axes objects % starting from upper left corner, going row-wise as in % going row-wise as in % % Example: ha = tight_subplot(3,2,[.01 .03],[.1 .01],[.01 .01]) % for ii = 1:6; axes(ha(ii)); plot(randn(10,ii)); end % set(ha(1:4),'XTickLabel',''); set(ha,'YTickLabel','') % Pekka Kumpulainen 20.6.2010 @tut.fi % Tampere University of Technology / Automation Science and Engineering if nargin<3; gap = .02; end if nargin<4 || isempty(marg_h); marg_h = .05; end if nargin<5; marg_w = .05; end if numel(gap)==1; gap = [gap gap]; end if numel(marg_w)==1; marg_w = [marg_w marg_w]; end if numel(marg_h)==1; marg_h = [marg_h marg_h]; end axh = (1-sum(marg_h)-(Nh-1)*gap(1))/Nh; axw = (1-sum(marg_w)-(Nw-1)*gap(2))/Nw; py = 1-marg_h(2)-axh; ha = zeros(Nh*Nw,1); ii = 0; for ih = 1:Nh px = marg_w(1); for ix = 1:Nw ii = ii+1; ha(ii) = axes('Units','normalized', ... 'Position',[px py axw axh], ... 'XTickLabel','', ... 'YTickLabel',''); px = px+axw+gap(2); end py = py-axh-gap(1); end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/viewimage.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Visualization [3-2-1] of images with 3 bands by exploiting linear stretching and fixing the saturation. % % Interface: % ImageToView = viewimage(ImageToView,tol) % % Inputs: % ImageToView: Image to view; % tol: Saturation; Default values: [0.01 0.99] equal for all the three bands. % % Outputs: % ImageToView: Image to view. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function ImageToView = viewimage(ImageToView,tol1,tol2,tol3) iptsetpref('ImshowBorder', 'tight') ImageToView = double(ImageToView); L=size(ImageToView,3); if (L<3) ImageToView=ImageToView(:,:,[1 1 1]); end if nargin == 1 tol1 = [0.01 0.99]; end if nargin <= 2 tol = [tol1;tol1;tol1]; ImageToView = linstretch(ImageToView,tol); figure,imshow(ImageToView(:,:,3:-1:1),[]) elseif nargin == 4 if sum(tol1(2)+tol2(2)+tol3(2)) <= 3 tol = [tol1;tol2;tol3]; ImageToView = linstretch(ImageToView,tol); figure,imshow(ImageToView(:,:,3:-1:1),[]) else tol = [tol1;tol2;tol3]; [N,M,~] = size(ImageToView); NM = N*M; for i=1:3 b = reshape(double(uint16(ImageToView(:,:,i))),NM,1); b(btol(i,2))=tol(i,2); b = (b-tol(i,1))/(tol(i,2)-tol(i,1)); ImageToView(:,:,i) = reshape(b,N,M); end figure,imshow(ImageToView(:,:,3:-1:1),[]) end end iptsetpref('ImshowBorder', 'loose') end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Linear Stretching. % % Interface: % ImageToView = linstretch(ImageToView,tol) % % Inputs: % ImageToView: Image to stretch; % tol: ; % % Outputs: % ImageToView: Stretched image. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function ImageToView = linstretch(ImageToView,tol) [N,M,~] = size(ImageToView); NM = N*M; for i=1:3 b = reshape(double(uint16(ImageToView(:,:,i))),NM,1); [hb,levelb] = hist(b,max(b)-min(b)); chb = cumsum(hb); t(1)=ceil(levelb(find(chb>NM*tol(i,1), 1 ))); t(2)=ceil(levelb(find(chbt(2))=t(2); b = (b-t(1))/(t(2)-t(1)); ImageToView(:,:,i) = reshape(b,N,M); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/viewimage2.m ================================================ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Visualization [3-2-1] of images with 3 bands by exploiting linear stretching and fixing the saturation. % % Interface: % ImageToView = viewimage2(ImageToView,tol) % % Inputs: % ImageToView: Image to view; % tol: Saturation; Default values: [0.01 0.99] equal for all the three bands. % % Outputs: % ImageToView: Image to view. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function ImageToView = viewimage2(ImageToView,tol1,tol2,tol3) iptsetpref('ImshowBorder', 'tight') ImageToView = double(ImageToView); L=size(ImageToView,3); if (L<3) ImageToView=ImageToView(:,:,[1 1 1]); end if nargin == 1 tol1 = [0.01 0.99]; end if nargin <= 2 tol = [tol1;tol1;tol1]; ImageToView = linstretch(ImageToView,tol); elseif nargin == 4 if sum(tol1(2)+tol2(2)+tol3(2)) <= 3 tol = [tol1;tol2;tol3]; ImageToView = linstretch(ImageToView,tol); else tol = [tol1;tol2;tol3]; [N,M,~] = size(ImageToView); NM = N*M; for i=1:3 b = reshape(double(uint16(ImageToView(:,:,i))),NM,1); b(btol(i,2))=tol(i,2); b = (b-tol(i,1))/(tol(i,2)-tol(i,1)); ImageToView(:,:,i) = reshape(b,N,M); end end end iptsetpref('ImshowBorder', 'loose') end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Description: % Linear Stretching. % % Interface: % ImageToView = linstretch(ImageToView,tol) % % Inputs: % ImageToView: Image to stretch; % tol: ; % % Outputs: % ImageToView: Stretched image. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function ImageToView = linstretch(ImageToView,tol) [N,M,~] = size(ImageToView); NM = N*M; for i=1:3 b = reshape(double(uint16(ImageToView(:,:,i))),NM,1); [hb,levelb] = hist(b,max(b)-min(b)); chb = cumsum(hb); t(1)=ceil(levelb(find(chb>NM*tol(i,1), 1 ))); t(2)=ceil(levelb(find(chbt(2))=t(2); b = (b-t(1))/(t(2)-t(1)); ImageToView(:,:,i) = reshape(b,N,M); end end ================================================ FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/readme.md ================================================ # Test toolbox for traditional and DL "Test toolbox for traditional and DL" for simultaneously evaluating traditional and DL approaches, and finally output metrics and eps-format figures for your latex editing [English](https://github.com/.md) | [简体中文](https://github.com.md) This repository is the official Matlab implementation of our IEEE GRSM paper “Machine Learning in Pansharpening: A Benchmark, from Shallow to Deep Networks”, 2022 ([paper](https://github.com/liangjiandeng/liangjiandeng.github.io/tree/master/papers/2022/review-grsm2022.pdf) | [homepage](https://github.com/liangjiandeng/DLPan-Toolbox)). ## Features ## Requirements * Matlab software ## Quick Start ### Full-resolution Evaluation * Directly run ``Demo_Full_Resolution.m`` which includes an WV3 example. After running this demo, readers can understand the whole procedure. * Note: the test dataset of full-resolution are too huge to upload to GitHub, thus we provide cloud links to readers to download them to successfully run this demo, including: - i) Download link for full-resolution WV3-NewYork example (named "NY1_WV3_FR.mat"): [[Link]](https://drive.google.com/file/d/1j1nyHuBxsNzIn-UEwZUgeziGCAFMLes9/view?usp=sharing) (put into the folder of "1_TestData/Datasets Testing") - ii) Download link of DL's results for full-resolution WV3-NewYork example: [[Link]](https://drive.google.com/file/d/16FSxdq6BY7STbmMzxcxJ5atNQ7ZV3mPT/view?usp=sharing) (put into the folder of "'2_DL_Result/WV3") * Once you have above datasets, you can run this demo successfully, then understand how this demo run! ### Reduced-resolution Evaluation * Directly run ``Demo_Reduced_Resolution.m`` which includes an WV3 example. After running this demo, readers can understand the whole procedure. * Note: the test dataset of reduced-resolution are too huge to upload to GitHub, thus we provide cloud links to readers to download them to successfully run this demo, including: - i) Download link for reduced-resolution WV3-NewYork example (named "NY1_WV3_RR.mat"): same link as above i), then put into the folder of "1_TestData/Datasets Testing" - ii) Download link of DL's results for reduced-resolution WV3-NewYork example: same link as above ii), then put into the folder of "2_DL_Result/WV3" * Once you have above datasets, you can run this demo successfully, then understand how this demo run! ### Others * You may find the quantitative results from Tex files such as ``FR_Assessment.tex``, ``RR_Assessment.tex`` and ``Avg_RR_Assessment.tex``, then copy for your Latex editing. * You may also find the generated high-resolution eps-format figures in the folder of "3_EPS" for your Latex editing. ## Acknowledgement - We appreciate the great contribution of [Xiao Wu](https://xiaoxiao-woo.github.io/) who is a graduate student in [UESTC](https://www.uestc.edu.cn/) to this toolbox. ## Citation * If you use this toolbox, please kindly cite our paper: ```bibtex @ARTICLE{deng2022grsm, author={L.-J. Deng, G. Vivone, M. E. Paoletti, G. Scarpa, J. He, Y. Zhang, J. Chanussot, and A. Plaza}, booktitle={IEEE Geoscience and Remote Sensing Magazine}, title={Machine Learning in Pansharpening: A Benchmark, from Shallow to Deep Networks}, year={2022}, pages={}, } ``` * Also, the codes of traditional methods are from the "pansharpening toolbox for distribution", thus please cite the corresponding paper: ```bibtex @ARTICLE{vivone2021grsm, author={Vivone, Gemine and Dalla Mura, Mauro and Garzelli, Andrea and Restaino, Rocco and Scarpa, Giuseppe and Ulfarsson, Magnus O. and Alparone, Luciano and Chanussot, Jocelyn}, journal={IEEE Geoscience and Remote Sensing Magazine}, title={A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting Pansharpening With Classical and Emerging Pansharpening Methods}, year={2021}, volume={9}, number={1}, pages={53-81}, doi={10.1109/MGRS.2020.3019315} } ``` ## License & Copyright This project is open sourced under GNU General Public License v3.0. ================================================ FILE: 03-Data-Simulation(Matlab)/01-DataSimu/QB/readme.md.txt ================================================ ================================================ FILE: 03-Data-Simulation(Matlab)/Demo_DataSimu_qb.m ================================================ %% This is a demo to segment image into small patches (and big test imgs) % for the training=64x64x8 (and testing=256x256x8) pansharpening in remote sensing % L.-J. Deng(UESTC) % 2020-10-04 %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% clear; close all; % please download the QB data from the website: % then put into the folder of "Imgs_qb" % at last run the demo directly to get patching examples files = dir('Imgs_qb/*.mat'); leng = length(files); Pre_NumInd = 1; Pre_NumInd_test = 1; scale = 4; %% ---------------------------------- for i = 1:leng % load inpainted images str = files(i).name; dir = strcat('load', 32, 'Imgs_qb/', str); eval(dir) PAN2 = I_PAN; LMS2 = I_MS; GT2 = I_GT; MS2 = I_MS_LR; maxval = max(PAN2(:)); figure, subplot(2,2,1), imshow(PAN2/maxval); title('original PAN') subplot(2,2,2), imshow(MS2(:,:,[3 2 1])/maxval); title('LR MS') subplot(2,2,3), imshow(GT2(:,:,[3 2 1])/maxval); title('GT') subplot(2,2,4), imshow(LMS2(:,:,[3 2 1])/maxval); title('UP MS') %% leave one half of data 1 as the test data! if (i==1) % take Indianapolis to get test imags; cut_num = 512; [a, b, c] = size(GT2); GT = GT2(:, cut_num+1:end,:); % for training dataset GT_test = GT2(:, 1:cut_num, :); % for testing dataset PAN = PAN2(:, cut_num+1:end); PAN_test = PAN2(:, 1:cut_num); LMS = LMS2(:, cut_num+1:end, :); LMS_test = LMS2(:, 1:cut_num, :); MS = MS2(:, fix(cut_num/4)+1:end, :); MS_test = MS2(:, 1:fix(cut_num/4), :); %% 1) Big Test Imgs: segment pan into big Imgs 512x512x8 (testing Exm) size_l_test = 128; size_h_test = 512; overlap_test = 1; % (for testing data: 0<=overlaop<=64) tic [gt_Oneimg_test, pan_Oneimg_test, ms_Oneimg_test, lms_Oneimg_test] = segImg_new(PAN_test, LMS_test, GT_test, MS_test, size_l_test, size_h_test, scale, overlap_test); toc % save the Imgs into a tensor [NumInd_test, ~, ~, ~] = size(gt_Oneimg_test); Post_NumInd_test = Pre_NumInd_test + NumInd_test - 1; fprintf(['%d-th Img. (test): ', 'Pre_NumInd_test = %d; ', ' Post_NumInd_test = %d \n'], i, Pre_NumInd_test, Post_NumInd_test) % save data gt_tmp_test(Pre_NumInd_test: Post_NumInd_test, :, :, :) = gt_Oneimg_test; % gt tensor: Nx512x512x8 pan_tmp_test(Pre_NumInd_test: Post_NumInd_test, :, :) = pan_Oneimg_test; % pan tensor: Nx512x512 ms_tmp_test(Pre_NumInd_test: Post_NumInd_test, :, :, :) = ms_Oneimg_test; % ms tensor: Nx128x128x8 lms_tmp_test(Pre_NumInd_test: Post_NumInd_test, :, :, :)= lms_Oneimg_test; % lms tensor: Nx512x512x8 Pre_NumInd_test = Post_NumInd_test + 1; else GT = GT2; PAN = PAN2; LMS = LMS2; MS = MS2; end %% 2) small training patches (training) size_l = 16; size_h = 64; overlap = 4; % (for traning data: 0<=overlaop<=16) tic [gt_Oneimg, pan_Oneimg, ms_Oneimg, lms_Oneimg] = segImg_new(PAN, LMS, GT, MS, size_l, size_h, scale, overlap); toc % save the patches into a tensor [NumInd, ~, ~, ~] = size(gt_Oneimg); Post_NumInd = Pre_NumInd + NumInd - 1; fprintf(['%d-th Img.(patching for training): ', 'Pre_NumInd = %d; ', ' Post_NumInd = %d \n'], i, Pre_NumInd, Post_NumInd) % save data gt_tmp1(Pre_NumInd: Post_NumInd, :, :, :) = gt_Oneimg; % gt tensor: Nx64x64x8 pan_tmp1(Pre_NumInd: Post_NumInd, :, :) = pan_Oneimg; % pan tensor: Nx64x64 ms_tmp1(Pre_NumInd: Post_NumInd, :, :, :) = ms_Oneimg; % ms tensor: Nx16x16x8 lms_tmp1(Pre_NumInd: Post_NumInd, :, :, :)= lms_Oneimg; % lms tensor: Nx64x64x8 Pre_NumInd = Post_NumInd + 1; end %% ========================================================== %% ==== Increase samples to 10,000 (NxCxHxW's inverse = WxHxCxN) %% ========================================================== exp_num = size(gt_tmp1, 1); if exp_num < 10000 % Step2: two flips (lr + ud) to add examples gt_tmp(1:exp_num, :, :, :) = gt_tmp1; gt_tmp(exp_num+1:2*exp_num, :, :, :) = flip(gt_tmp1, 2); % two flips (lr + ud) to add examples gt_tmp(2*exp_num+1:3*exp_num, :, :, :) = flip(gt_tmp1, 3); ms_tmp(1:exp_num, :, :, :) = ms_tmp1; ms_tmp(exp_num+1:2*exp_num, :, :, :) = flip(ms_tmp1, 2); % two flips (lr + ud) to add examples ms_tmp(2*exp_num+1:3*exp_num, :, :, :) = flip(ms_tmp1, 3); lms_tmp(1:exp_num, :, :, :) = lms_tmp1; lms_tmp(exp_num+1:2*exp_num, :, :, :) = flip(lms_tmp1, 2); % two flips (lr + ud) to add examples lms_tmp(2*exp_num+1:3*exp_num, :, :, :) = flip(lms_tmp1, 3); pan_tmp(1:exp_num, :, :) = pan_tmp1; pan_tmp(exp_num+1:2*exp_num, :, :) = flip(pan_tmp1, 2); % two flips (lr + ud) to add examples pan_tmp(2*exp_num+1:3*exp_num, :, :) = flip(pan_tmp1, 3); % Step3: only select first 10000 patches for training: num_cut = 10000; gt_tmp(num_cut+1:end, :, :, :) = []; ms_tmp(num_cut+1:end, :, :, :) = []; lms_tmp(num_cut+1:end, :, :, :) = []; pan_tmp(num_cut+1:end, :, :) = []; else num_cut = exp_num; gt_tmp = gt_tmp1; ms_tmp = ms_tmp1; lms_tmp=lms_tmp1; pan_tmp=pan_tmp1; end %% ========================================================== %% (A) generate training: 1) training data (90%); 2) validation data (10%); %% ========================================================== Post_NumInd = num_cut; nz_idx = randperm(Post_NumInd); num_train = fix(0.9*Post_NumInd); % # training samples num_valid = Post_NumInd - num_train ; % # validation samples %% ==== save to H5 file (NxCxHxW's inverse = WxHxCxN) ===== %========================================================== %% == generate training dataset: gt = gt_tmp(nz_idx(1:num_train), :, :, :); % NxHxWxC=1x2x3x4 pan = pan_tmp(nz_idx(1:num_train), :, :); % NxHxW = 1x2x3 (PAN) ms = ms_tmp(nz_idx(1:num_train), :, :, :); lms = lms_tmp(nz_idx(1:num_train), :, :, :); %--- for training data: filename_train = '01-DataSimu/QB/train_qb_10000.h5'; gt = permute(gt,[3 2 4 1]); % beyond 2G, have to change dimension pan_t(1,:,:,:) = pan; % CxNxHxW = 1x2x3x4 (PAN) pan = permute(pan_t,[4 3 1 2]); % WxHxCxN ms = permute(ms,[3 2 4 1]); lms = permute(lms,[3 2 4 1]); gtsz = size(gt); mssz = size(ms); lmssz = size(lms); pansz =size(pan); h5create(filename_train, '/gt', gtsz(1:end), 'Datatype', 'double'); % width, height, channels, number h5create(filename_train, '/ms', mssz(1:end), 'Datatype', 'double'); % width, height, channels, number h5create(filename_train, '/lms', lmssz(1:end), 'Datatype', 'double'); % width, height, channels, number h5create(filename_train, '/pan', pansz(1:end), 'Datatype', 'double'); % width, height, channels, number h5write(filename_train, '/gt', double(gt), [1,1,1,1], size(gt)); h5write(filename_train, '/ms', double(ms), [1,1,1,1], size(ms)); h5write(filename_train, '/lms', double(lms), [1,1,1,1], size(lms)); h5write(filename_train, '/pan', double(pan), [1,1,1,1], size(pan)); clear gt ms lms pan pan_t %% == generate validation dataset: gt = gt_tmp(nz_idx(num_train+1: num_train+num_valid), :, :, :); pan = pan_tmp(nz_idx(num_train+1: num_train+num_valid), :, :); ms = ms_tmp(nz_idx(num_train+1: num_train+num_valid), :, :, :); lms = lms_tmp(nz_idx(num_train+1: num_train+num_valid), :, :, :); %--- for valid data: filename_valid = '01-DataSimu/QB/valid_qb_10000.h5'; gt = permute(gt,[3 2 4 1]); % beyond 2G, have to change dimension pan_t(1, :,:,:) = pan; % NxHxWx1 = 1x2x3x4 (PAN) pan = permute(pan_t,[4 3 1 2]); ms = permute(ms,[3 2 4 1]); lms = permute(lms,[3 2 4 1]); gtsz = size(gt); mssz = size(ms); pansz =size(pan); h5create(filename_valid, '/gt', gtsz(1:end), 'Datatype', 'double'); % width, height, channels, number h5create(filename_valid, '/ms', mssz(1:end), 'Datatype', 'double'); % width, height, channels, number h5create(filename_valid, '/lms', gtsz(1:end), 'Datatype', 'double'); % width, height, channels, number h5create(filename_valid, '/pan', pansz(1:end), 'Datatype', 'double'); % width, height, channels, number h5write(filename_valid, '/gt', double(gt), [1,1,1,1], size(gt)); h5write(filename_valid, '/ms', double(ms), [1,1,1,1], size(ms)); h5write(filename_valid, '/lms', double(lms), [1,1,1,1], size(lms)); h5write(filename_valid, '/pan', double(pan), [1,1,1,1], size(pan)); clear gt ms lms pan pan_t %% ========================================================== %% (B) generate Testing data: %% ========================================================== filename_test = '01-DataSimu/QB/TestData_qb.h5'; gt = permute(gt_tmp_test,[3 2 4 1]); % beyond 2G, have to change dimension pan_t(1,:,:,:) = pan_tmp_test; % CxNxHxW = 1x2x3x4 (PAN) pan = permute(pan_t,[4 3 1 2]); % WxHxCxN ms = permute(ms_tmp_test,[3 2 4 1]); lms = permute(lms_tmp_test,[3 2 4 1]); gtsz = size(gt); mssz = size(ms); lmssz = size(lms); pansz = size(pan); h5create(filename_test, '/gt', gtsz(1:end), 'Datatype', 'double'); % width, height, channels, number h5create(filename_test, '/ms', mssz(1:end), 'Datatype', 'double'); % width, height, channels, number h5create(filename_test, '/lms', lmssz(1:end), 'Datatype', 'double'); % width, height, channels, number h5create(filename_test, '/pan', pansz(1:end), 'Datatype', 'double'); % width, height, channels, number h5write(filename_test, '/gt', double(gt), [1,1,1,1], size(gt)); h5write(filename_test, '/ms', double(ms), [1,1,1,1], size(ms)); h5write(filename_test, '/lms', double(lms), [1,1,1,1], size(lms)); h5write(filename_test, '/pan', double(pan), [1,1,1,1], size(pan)); clear gt ms lms pan pan_t ================================================ FILE: 03-Data-Simulation(Matlab)/imgs/readme ================================================ You may download an original QB data to understand the usage of this toolbox from: https://www.dropbox.com/s/2ujmag14bkiw0mq/QB_Indianapolis_training_DL.mat?dl=0 ================================================ FILE: 03-Data-Simulation(Matlab)/segImg_new.m ================================================ function [gt, pan, ms, lms] = segImg_new(PAN, LMS, GT, MS, size_l, size_high, scale, overlap) % This is a core function to segment big images into small patches % LJ Deng (UESTC); 2020-10-09 % PAN: big PAN image % LMS: big upsampled MS image % GT: big original HRMS image % MS: big original LR MS image % size_l: the patch size LR patch % size_high: the patch size HR patch % scale: spatial ration of PAN and MS, here, scale = 4 % overlap: the overlap among segmented patches % gt: segmented ground-truth (gt) or labeled data % pan: segmented pan data % ms: segmented ms data % lms: segmented lms data %% -------------------------- [h, w, c] = size(MS); % size of LR: H = scale*h; W = scale*w; size_low = size_l; % patch size of LR: 16x16 size_h = size_high; % patch size of LR: 64x64 overlap_low = overlap; % overlap of LR overlap_h = scale*overlap; % overlap of HR % set patch indexs %---- LR indexs --------- gridy = 1:size_low - overlap_low : w;%-(mod(w,size_low-overlap_low)+1+size_low-overlap_low); gridy((gridy+size_low-1) > w) = []; % delet boudary points gridx = 1:size_low - overlap_low: h;%-(mod(h,size_low-overlap_low)+1+size_low-overlap_low); gridx((gridx+size_low-1) > h) = []; % delet boudary points %---- HR indexs --------- Gridy = 1:size_h - overlap_h : W;%-(mod(W,size_h-overlap_h)+1+size_h-overlap_h); % is 2 or 8? ===>must be some problem here! Gridy((Gridy+size_h-1) > W) = []; % delet boudary points Gridx = 1:size_h - overlap_h : H;%-(mod(H,size_h-overlap_h)+1+size_h-overlap_h); Gridx((Gridx+size_h-1) > H) = []; % delet boudary points %% -----Pre-define variables' sizes-------- pan = zeros(size(gridx,2)*size(gridy,2), size_h, size_h); lms = zeros(size(gridx,2)*size(gridy,2), size_h, size_h, c); gt = zeros(size(gridx,2)*size(gridy,2), size_h, size_h, c); ms = zeros(size(gridx,2)*size(gridy,2), size_low, size_low, c); %% -----loops to segment-------- cnt = 0; Num = 0; for i = 1: length(gridx) for j = 1:length(gridy) cnt = cnt + 1; Num = Num + 1; xx = gridx(i); yy = gridy(j); XX = Gridx(i); YY = Gridy(j); % ---start to segment------ pan_p = PAN(XX:XX+size_h-1, YY:YY+size_h-1);% 64x64: signle pan patch pan(Num, :, :) = pan_p; % save single to a "pan" tensor: Nx64x64 lms_p = LMS(XX:XX+size_h-1, YY:YY+size_h-1, :); % 64x64x8: signle lms patch lms(Num, :, :, :) = lms_p; % save single to a "lms" tensor: Nx64x64x8 gt_p = GT(XX:XX+size_h-1, YY:YY+size_h-1, :); % 64x64x8: signle gt patch gt(Num, :, :, :) = gt_p; % save single to a "gt" tensor: Nx64x64x8 ms_p = MS(xx:xx+size_low-1, yy:yy+size_low-1, :); % 16x16x8: signle ms patch ms(Num, :, :, :) = ms_p; % save single to a "ms" tensor: Nx16x16x8 if Num == 1 % to see if there needs registration! maxval = max(PAN(:)); ww(:,:,1)=gt_p(:,:,3); % gt ww(:,:,2)=gt_p(:,:,2); ww(:,:,3)=gt_p(:,:,1); kk(:,:,1)=lms_p(:,:,3); % gt kk(:,:,2)=lms_p(:,:,2); kk(:,:,3)=lms_p(:,:,1); pp = pan_p; % pan figure, subplot(1,3,1), imshow(double(ww)/maxval + 0.3); title('gt') subplot(1,3,2), imshow(double(pp)/maxval + 0.3); title('pan') subplot(1,3,3), imshow(double(kk)/maxval + 0.3); title('lms') end end end %% -----End loops-------- end ================================================ FILE: LICENSE ================================================ GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . ================================================ FILE: README.md ================================================ # DLPan-Toolbox * This toolbox is related to the paper ``Machine Learning in Pansharpening: A Benchmark, from Shallow to Deep Networks, IEEE Geoscience and Remote Sensing Magazine, 2022`` (see the following reference [1]). Download: [[paper]](https://github.com/liangjiandeng/liangjiandeng.github.io/tree/master/papers/2022/review-grsm2022.pdf). * This is a deep learning (DL) toolbox for pansharpening, which can be used for training and testing getting the comparison between traditional and DL methods. ## Introduction This toolbox mainly contains two parts: one is the pytorch source codes for the eight DL-based methods presented in the paper (i.e., the folder "01-DL toolbox (Pytorch)"); the other is the Matlab source codes which can simultaneously evaluate the performance of traditional and DL approaches in a uniformed framework ("02-Test toolbox for traditional and DL (Matlab)"). Please see more details: - 01-DL-toolbox(Pytorch) contains source codes of DL methods, you may check the ``readme`` file for the usage. - 02-Test-toolbox-for-traditional-and-DL(Matlab) contains Matlab source codes (mainly from 'G. Vivone et al., A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods, IEEE GRSM, 2021', see the following reference [2]) for simultaneously evaluating traditional and DL approaches and outputing results, you may check the ``readme`` file for the usage. - 03-Data-Simulation(Matlab) contains Matlab source codes that are patching images to patches for training and validation. Also, you can simulate test examples by this toolbox. Note that, readers also could check the structure and relationship of these two folders in the following ``overview figure`` (also find it in the respository). ## Dataset Due to the copyright issue, the datasets used in this GRSM paper are not available. Therefore, we recommend readers use the following dataset for pansharpening, both training and testing. The following dataset can be directly applied in our DLPan-Toolbox (put the data to the director for training: 01-DL-toolbox(Pytorch)/UDL/Data/pansharpening/training_data/). - [[PanCollection](https://github.com/liangjiandeng/PanCollection)] for multispectral pansharpening - [[HyperPanCollection](https://github.com/liangjiandeng/HyperPanCollection)] for hyperspectral pansharpening ## Citation * [1] If you use this toolbox, please kindly cite our paper: ```bibtex @ARTICLE{deng2022grsm, author={L.-J. Deng, G. Vivone, M. E. Paoletti, G. Scarpa, J. He, Y. Zhang, J. Chanussot, and A. Plaza}, booktitle={IEEE Geoscience and Remote Sensing Magazine}, title={Machine Learning in Pansharpening: A Benchmark, from Shallow to Deep Networks}, year={2022}, pages={2-38}, doi={10.1109/MGRS.2020.3019315} } ``` * [2] Also, the codes of traditional methods are from the "pansharpening toolbox for distribution", thus please cite the corresponding paper: ```bibtex @ARTICLE{vivone2021grsm, author={Vivone, Gemine and Dalla Mura, Mauro and Garzelli, Andrea and Restaino, Rocco and Scarpa, Giuseppe and Ulfarsson, Magnus O. and Alparone, Luciano and Chanussot, Jocelyn}, journal={IEEE Geoscience and Remote Sensing Magazine}, title={A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting Pansharpening With Classical and Emerging Pansharpening Methods}, year={2021}, volume={9}, number={1}, pages={53-81}, doi={10.1109/MGRS.2020.3019315} } ``` ## Acknowledgement - We appreciate the great contribution to this toolbox of [Xiao Wu](https://xiaoxiao-woo.github.io/) and Ran Ran, who are graduate students in [UESTC](https://www.uestc.edu.cn/). ## License & Copyright This project is open sourced under GNU General Public License v3.0. ================================================ FILE: docs/en/DLPanToolbox/Evaluation.md ================================================ ## Evaluation ================================================ FILE: docs/en/DLPanToolbox/Example.md ================================================ ================================================ FILE: docs/en/DLPanToolbox/PreProcess.md ================================================ ## Data PreProcess ### Image This module provides some image processing methods, which requires `opencv` to be installed first. #### Read/Write/Show To read or write images files, use `imread` or `imwrite`. ```python import mmcv img = mmcv.imread('test.jpg') img = mmcv.imread('test.jpg', flag='grayscale') img_ = mmcv.imread(img) # nothing will happen, img_ = img mmcv.imwrite(img, 'out.jpg') ``` To read images from bytes ```python with open('test.jpg', 'rb') as f: data = f.read() img = mmcv.imfrombytes(data) ``` To show an image file or a loaded image ```python mmcv.imshow('tests/data/color.jpg') # this is equivalent to for i in range(10): img = np.random.randint(256, size=(100, 100, 3), dtype=np.uint8) mmcv.imshow(img, win_name='test image', wait_time=200) ``` #### Color space conversion Supported conversion methods: - bgr2gray - gray2bgr - bgr2rgb - rgb2bgr - bgr2hsv - hsv2bgr ```python img = mmcv.imread('tests/data/color.jpg') img1 = mmcv.bgr2rgb(img) img2 = mmcv.rgb2gray(img1) img3 = mmcv.bgr2hsv(img) ``` #### Resize There are three resize methods. All `imresize_*` methods have an argument `return_scale`, if this argument is `False`, then the return value is merely the resized image, otherwise is a tuple `(resized_img, scale)`. ```python # resize to a given size mmcv.imresize(img, (1000, 600), return_scale=True) # resize to the same size of another image mmcv.imresize_like(img, dst_img, return_scale=False) # resize by a ratio mmcv.imrescale(img, 0.5) # resize so that the max edge no longer than 1000, short edge no longer than 800 # without changing the aspect ratio mmcv.imrescale(img, (1000, 800)) ``` #### Rotate To rotate an image by some angle, use `imrotate`. The center can be specified, which is the center of original image by default. There are two modes of rotating, one is to keep the image size unchanged so that some parts of the image will be cropped after rotating, the other is to extend the image size to fit the rotated image. ```python img = mmcv.imread('tests/data/color.jpg') # rotate the image clockwise by 30 degrees. img_ = mmcv.imrotate(img, 30) # rotate the image counterclockwise by 90 degrees. img_ = mmcv.imrotate(img, -90) # rotate the image clockwise by 30 degrees, and rescale it by 1.5x at the same time. img_ = mmcv.imrotate(img, 30, scale=1.5) # rotate the image clockwise by 30 degrees, with (100, 100) as the center. img_ = mmcv.imrotate(img, 30, center=(100, 100)) # rotate the image clockwise by 30 degrees, and extend the image size. img_ = mmcv.imrotate(img, 30, auto_bound=True) ``` #### Flip To flip an image, use `imflip`. ```python img = mmcv.imread('tests/data/color.jpg') # flip the image horizontally mmcv.imflip(img) # flip the image vertically mmcv.imflip(img, direction='vertical') ``` #### Crop `imcrop` can crop the image with one or more regions. Each region is represented by the upper left and lower right coordinates as (x1, y1, x2, y2). ```python import mmcv import numpy as np img = mmcv.imread('tests/data/color.jpg') # crop the region (10, 10, 100, 120) bboxes = np.array([10, 10, 100, 120]) patch = mmcv.imcrop(img, bboxes) # crop two regions (10, 10, 100, 120) and (0, 0, 50, 50) bboxes = np.array([[10, 10, 100, 120], [0, 0, 50, 50]]) patches = mmcv.imcrop(img, bboxes) # crop two regions, and rescale the patches by 1.2x patches = mmcv.imcrop(img, bboxes, scale=1.2) ``` #### Padding There are two methods, `impad` and `impad_to_multiple`, to pad an image to the specific size with given values. ```python img = mmcv.imread('tests/data/color.jpg') # pad the image to (1000, 1200) with all zeros img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=0) # pad the image to (1000, 1200) with different values for three channels. img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=(100, 50, 200)) # pad the image on left, right, top, bottom borders with all zeros img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=0) # pad the image on left, right, top, bottom borders with different values # for three channels. img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=(100, 50, 200)) # pad an image so that each edge is a multiple of some value. img_ = mmcv.impad_to_multiple(img, 32) ``` ================================================ FILE: docs/en/DLPanToolbox/Simulation.md ================================================ ## Simulation ================================================ FILE: docs/en/Makefile ================================================ # Minimal makefile for Sphinx documentation # # You can set these variables from the command line, and also # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = source BUILDDIR = build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ================================================ FILE: docs/en/_static/css/readthedocs.css ================================================ .header-logo { background-image: url("../image/logo-dlpan.png"); background-size: 160px 40px; height: 40px; width: 160px; } table.colwidths-auto td { width: 50% } ================================================ FILE: docs/en/_templates/classtemplate.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: {{ module }} {{ name | underline}} .. autoclass:: {{ name }} :members: .. autogenerated from source/_templates/classtemplate.rst note it does not have :inherited-members: ================================================ FILE: docs/en/citation.md ================================================ ## Cite DLPan-Toolbox If DLPan-Toolbox is helpful for you, you are encouraged to cite the following paper: ```bibtex @ARTICLE{deng2022grsm, author={L.-J. Deng, G. Vivone, M. E. Paoletti, G. Scarpa, J. He, Y. Zhang, J. Chanussot, and A. Plaza}, booktitle={IEEE Geoscience and Remote Sensing Magazine}, title={Machine Learning in Pansharpening: A Benchmark, from Shallow to Deep Networks}, year={2022}, pages={2-38}, doi={10.1109/MGRS.2020.3019315} } ``` Also, the codes of traditional methods are from the "pansharpening toolbox for distribution", thus please cite the corresponding paper: ```bibtex @ARTICLE{vivone2021grsm, author={Vivone, Gemine and Dalla Mura, Mauro and Garzelli, Andrea and Restaino, Rocco and Scarpa, Giuseppe and Ulfarsson, Magnus O. and Alparone, Luciano and Chanussot, Jocelyn}, journal={IEEE Geoscience and Remote Sensing Magazine}, title={A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting Pansharpening With Classical and Emerging Pansharpening Methods}, year={2021}, volume={9}, number={1}, pages={53-81}, doi={10.1109/MGRS.2020.3019315} } ``` ================================================ FILE: docs/en/conf.py ================================================ # GPL v3.0 License # Copyright (C) UESTC # All Rights Reserved # @Time : 2023/9/21 # @Author : Xiao Wu # @reference: # # # Configuration file for the Sphinx documentation builder. # # This file does only contain a selection of the most common options. For a # full list see the documentation: # http://www.sphinx-doc.org/en/master/config # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os import sys import pytorch_sphinx_theme from sphinx.builders.html import StandaloneHTMLBuilder sys.path.insert(0, os.path.abspath('../..')) # version_file = '../../mmcv/version.py' # with open(version_file) as f: # exec(compile(f.read(), version_file, 'exec')) # __version__ = locals()['__version__'] __version__ = "0.3.6" # -- Project information ----------------------------------------------------- project = 'DLPan-Toolbox' copyright = '2023, UESTC' author = 'Xiao Wu' # The short X.Y version version = __version__ # The full version, including alpha/beta/rc tags release = __version__ # -- General configuration --------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.autosummary', 'sphinx.ext.intersphinx', 'sphinx.ext.napoleon', 'sphinx.ext.viewcode', 'sphinx_markdown_tables', 'myst_parser', 'sphinx_copybutton', ] # yapf: disable myst_heading_anchors = 4 myst_enable_extensions = ['colon_fence'] # Configuration for intersphinx intersphinx_mapping = { 'python': ('https://docs.python.org/3', None), 'numpy': ('https://numpy.org/doc/stable', None), 'torch': ('https://pytorch.org/docs/stable/', None) } # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # source_suffix = { '.rst': 'restructuredtext', '.md': 'markdown', } # The master toctree document. master_doc = 'index' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # # html_theme = 'sphinx_rtd_theme' html_theme = 'pytorch_sphinx_theme' html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()] # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = { 'menu': [ { 'name': 'GitHub', 'url': 'https://github.com/liangjiandeng/DLPan-Toolbox' }, ], # Specify the language of shared menu 'menu_lang': 'en', } # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] html_css_files = ['css/readthedocs.css'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. # # The default sidebars (for documents that don't match any pattern) are # defined by theme itself. Builtin themes are using these templates by # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', # 'searchbox.html']``. # # html_sidebars = {} # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. htmlhelp_basename = 'dlpantoolboxndoc' # -- Options for LaTeX output ------------------------------------------------ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'dlpantoolbox.tex', 'dlpantoolbox Documentation', 'DLPanToolbox Contributors', 'manual'), ] # -- Options for manual page output ------------------------------------------ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [(master_doc, 'dlpantoolbox', 'dlpantoolbox Documentation', [author], 1)] # -- Options for Texinfo output ---------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'dlpantoolbox', 'dlpantoolbox Documentation', author, 'dlpantoolbox', 'One line description of project.', 'Miscellaneous'), ] # -- Options for Epub output ------------------------------------------------- # Bibliographic Dublin Core info. epub_title = project # The unique identifier of the text. This can be a ISBN number # or the project homepage. # # epub_identifier = '' # A unique identification for the text. # # epub_uid = '' # A list of files that should not be packed into the epub file. epub_exclude_files = ['search.html'] # set priority when building html StandaloneHTMLBuilder.supported_image_types = [ 'image/svg+xml', 'image/gif', 'image/png', 'image/jpeg' ] # -- Extension configuration ------------------------------------------------- # Ignore >>> when copying code copybutton_prompt_text = r'>>> |\.\.\. ' copybutton_prompt_is_regexp = True ================================================ FILE: docs/en/docutils.conf ================================================ [html writers] table_style: colwidths-auto ================================================ FILE: docs/en/faq.md ================================================ ## Frequently Asked Questions We list some common troubles faced by many users and their corresponding solutions here. Feel free to enrich the list if you find any frequent issues and have ways to help others to solve them. ### Installation - KeyError: "xxx: 'yyy is not in the zzz registry'" The registry mechanism will be triggered only when the file of the module is imported. So you need to import that file somewhere. More details can be found at [KeyError: "MaskRCNN: 'RefineRoIHead is not in the models registry'"](https://github.com/open-mmlab/mmdetection/issues/5974). - "No module named 'mmcv.ops'"; "No module named 'mmcv.\_ext'" 1. Uninstall existing mmcv in the environment using `pip uninstall mmcv` 2. Install mmcv-full following the [installation instruction](https://mmcv.readthedocs.io/en/latest/get_started/installation.html) or [Build MMCV from source](https://mmcv.readthedocs.io/en/latest/get_started/build.html) - "invalid device function" or "no kernel image is available for execution" 1. Check the CUDA compute capability of you GPU 2. Run `python mmdet/utils/collect_env.py` to check whether PyTorch, torchvision, and MMCV are built for the correct GPU architecture. You may need to set `TORCH_CUDA_ARCH_LIST` to reinstall MMCV. The compatibility issue could happen when using old GPUS, e.g., Tesla K80 (3.7) on colab. 3. Check whether the running environment is the same as that when mmcv/mmdet is compiled. For example, you may compile mmcv using CUDA 10.0 bug run it on CUDA9.0 environments - "undefined symbol" or "cannot open xxx.so" 1. If those symbols are CUDA/C++ symbols (e.g., libcudart.so or GLIBCXX), check whether the CUDA/GCC runtimes are the same as those used for compiling mmcv 2. If those symbols are Pytorch symbols (e.g., symbols containing caffe, aten, and TH), check whether the Pytorch version is the same as that used for compiling mmcv 3. Run `python mmdet/utils/collect_env.py` to check whether PyTorch, torchvision, and MMCV are built by and running on the same environment - "RuntimeError: CUDA error: invalid configuration argument" This error may be caused by the poor performance of GPU. Try to decrease the value of [THREADS_PER_BLOCK](https://github.com/open-mmlab/mmcv/blob/cac22f8cf5a904477e3b5461b1cc36856c2793da/mmcv/ops/csrc/common_cuda_helper.hpp#L10) and recompile mmcv. - "RuntimeError: nms is not compiled with GPU support" This error is because your CUDA environment is not installed correctly. You may try to re-install your CUDA environment and then delete the build/ folder before re-compile mmcv. - "Segmentation fault" 1. Check your GCC version and use GCC >= 5.4. This usually caused by the incompatibility between PyTorch and the environment (e.g., GCC \< 4.9 for PyTorch). We also recommend the users to avoid using GCC 5.5 because many feedbacks report that GCC 5.5 will cause "segmentation fault" and simply changing it to GCC 5.4 could solve the problem 2. Check whether PyTorch is correctly installed and could use CUDA op, e.g. type the following command in your terminal and see whether they could correctly output results ```shell python -c 'import torch; print(torch.cuda.is_available())' ``` 3. If PyTorch is correctly installed, check whether MMCV is correctly installed. If MMCV is correctly installed, then there will be no issue of the command ```shell python -c 'import mmcv; import mmcv.ops' ``` 4. If MMCV and PyTorch are correctly installed, you can use `ipdb` to set breakpoints or directly add `print` to debug and see which part leads the `segmentation fault` - "libtorch_cuda_cu.so: cannot open shared object file" `mmcv-full` depends on the share object but it can not be found. We can check whether the object exists in `~/miniconda3/envs/{environment-name}/lib/python3.7/site-packages/torch/lib` or try to re-install the PyTorch. - "fatal error C1189: #error: -- unsupported Microsoft Visual Studio version!" If you are building mmcv-full on Windows and the version of CUDA is 9.2, you will probably encounter the error `"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include\crt/host_config.h(133): fatal error C1189: #error: -- unsupported Microsoft Visual Studio version! Only the versions 2012, 2013, 2015 and 2017 are supported!"`, in which case you can use a lower version of Microsoft Visual Studio like vs2017. - "error: member "torch::jit::detail::ModulePolicy::all_slots" may not be initialized" If your version of PyTorch is 1.5.0 and you are building mmcv-full on Windows, you will probably encounter the error `- torch/csrc/jit/api/module.h(474): error: member "torch::jit::detail::ModulePolicy::all_slots" may not be initialized`. The way to solve the error is to replace all the `static constexpr bool all_slots = false;` with `static bool all_slots = false;` at this file `https://github.com/pytorch/pytorch/blob/v1.5.0/torch/csrc/jit/api/module.h`. More details can be found at [member "torch::jit::detail::AttributePolicy::all_slots" may not be initialized](https://github.com/pytorch/pytorch/issues/39394). - "error: a member with an in-class initializer must be const" If your version of PyTorch is 1.6.0 and you are building mmcv-full on Windows, you will probably encounter the error `"- torch/include\torch/csrc/jit/api/module.h(483): error: a member with an in-class initializer must be const"`. The way to solve the error is to replace all the `CONSTEXPR_EXCEPT_WIN_CUDA ` with `const` at `torch/include\torch/csrc/jit/api/module.h`. More details can be found at [Ninja: build stopped: subcommand failed](https://github.com/open-mmlab/mmcv/issues/575). - "error: member "torch::jit::ProfileOptionalOp::Kind" may not be initialized" If your version of PyTorch is 1.7.0 and you are building mmcv-full on Windows, you will probably encounter the error `torch/include\torch/csrc/jit/ir/ir.h(1347): error: member "torch::jit::ProfileOptionalOp::Kind" may not be initialized`. The way to solve the error needs to modify several local files of PyTorch: - delete `static constexpr Symbol Kind = ::c10::prim::profile;` and `tatic constexpr Symbol Kind = ::c10::prim::profile_optional;` at `torch/include\torch/csrc/jit/ir/ir.h` - replace `explicit operator type&() { return *(this->value); }` with `explicit operator type&() { return *((type*)this->value); }` at `torch\include\pybind11\cast.h` - replace all the `CONSTEXPR_EXCEPT_WIN_CUDA` with `const` at `torch/include\torch/csrc/jit/api/module.h` More details can be found at [Ensure default extra_compile_args](https://github.com/pytorch/pytorch/pull/45956). - Compatibility issue between MMCV and MMDetection; "ConvWS is already registered in conv layer" Please install the correct version of MMCV for the version of your MMDetection following the [installation instruction](https://mmdetection.readthedocs.io/en/latest/get_started.html#installation). ### Usage - "RuntimeError: Expected to have finished reduction in the prior iteration before starting a new one" 1. This error indicates that your module has parameters that were not used in producing loss. This phenomenon may be caused by running different branches in your code in DDP mode. More datails at [Expected to have finished reduction in the prior iteration before starting a new one](https://github.com/pytorch/pytorch/issues/55582). 2. You can set ` find_unused_parameters = True` in the config to solve the above problems or find those unused parameters manually - "RuntimeError: Trying to backward through the graph a second time" `GradientCumulativeOptimizerHook` and `OptimizerHook` are both set which causes the `loss.backward()` to be called twice so `RuntimeError` was raised. We can only use one of these. More datails at [Trying to backward through the graph a second time](https://github.com/open-mmlab/mmcv/issues/1379). ================================================ FILE: docs/en/get_started/Installation.md ================================================ ## Installation There are two versions of MMCV: - **mmcv**: comprehensive, with full features and various CUDA ops out of box. It takes longer time to build. - **mmcv-lite**: lite, without CUDA ops but all other features, similar to mmcv\<1.0.0. It is useful when you do not need those CUDA ops. ```{warning} Do not install both versions in the same environment, otherwise you may encounter errors like `ModuleNotFound`. You need to uninstall one before installing the other. `Installing the full version is highly recommended if CUDA is avaliable`. ``` ### Install mmcv Before installing mmcv, make sure that PyTorch has been successfully installed following the [PyTorch official installation guide](https://pytorch.org/get-started/locally/#start-locally). This can be verified using the following command ```bash python -c 'import torch;print(torch.__version__)' ``` If version information is output, then PyTorch is installed. #### Install with mim (recommended) [mim](https://github.com/open-mmlab/mim) is the package management tool for the OpenMMLab projects, which makes it easy to install mmcv ```bash pip install -U openmim mim install mmcv ``` If you find that the above installation command does not use a pre-built package ending with `.whl` but a source package ending with `.tar.gz`, you may not have a pre-build package corresponding to the PyTorch or CUDA or mmcv version, in which case you can [build mmcv from source](build.md).
Installation log using pre-built packages Looking in links: https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html
Collecting mmcv
Downloading https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/mmcv-2.0.0-cp38-cp38-manylinux1_x86_64.whl
Installation log using source packages Looking in links: https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html
Collecting mmcv==2.0.0
Downloading mmcv-2.0.0.tar.gz
To install a specific version of mmcv, for example, mmcv version 2.0.0, you can use the following command ```bash mim install mmcv==2.0.0 ``` :::{note} If you would like to use `opencv-python-headless` instead of `opencv-python`, e.g., in a minimum container environment or servers without GUI, you can first install it before installing MMCV to skip the installation of `opencv-python`. Alternatively, if it takes too long to install a dependency library, you can specify the pypi source ```bash mim install mmcv -i https://pypi.tuna.tsinghua.edu.cn/simple ``` ::: You can run [check_installation.py](https://github.com/open-mmlab/mmcv/blob/main/.dev_scripts/check_installation.py) to check the installation of mmcv-full after running the installation commands. #### Install with pip Use the following command to check the version of CUDA and PyTorch ```bash python -c 'import torch;print(torch.__version__);print(torch.version.cuda)' ``` Select the appropriate installation command depending on the type of system, CUDA version, PyTorch version, and MMCV version





If you do not find a corresponding version in the dropdown box above, you probably do not have a pre-built package corresponding to the PyTorch or CUDA or mmcv version, at which point you can [build mmcv from source](build.md).

:::{note}
mmcv is only compiled on PyTorch 1.x.0 because the compatibility
usually holds between 1.x.0 and 1.x.1. If your PyTorch version is 1.x.1, you
can install mmcv compiled with PyTorch 1.x.0 and it usually works well.
For example, if your PyTorch version is 1.8.1, you can feel free to choose 1.8.x.
:::

:::{note}
If you would like to use `opencv-python-headless` instead of `opencv-python`,
e.g., in a minimum container environment or servers without GUI,
you can first install it before installing MMCV to skip the installation of `opencv-python`.

Alternatively, if it takes too long to install a dependency library, you can specify the pypi source

```bash
mim install mmcv -i https://pypi.tuna.tsinghua.edu.cn/simple
```

:::

You can run [check_installation.py](https://github.com/open-mmlab/mmcv/blob/main/.dev_scripts/check_installation.py) to check the installation of mmcv after running the installation commands.

#### Using mmcv with Docker

Build with local repository

```bash
git clone https://github.com/open-mmlab/mmcv.git && cd mmcv
docker build -t mmcv -f docker/release/Dockerfile .
```

Or build with remote repository

```bash
docker build -t mmcv https://github.com/open-mmlab/mmcv.git#main:docker/release
```

The [Dockerfile](release/Dockerfile) installs latest released version of mmcv-full by default, but you can specify mmcv versions to install expected versions.

```bash
docker image build -t mmcv -f docker/release/Dockerfile --build-arg MMCV=2.0.0 .
```

If you also want to use other versions of PyTorch and CUDA, you can also pass them when building docker images.

An example to build an image with PyTorch 1.11 and CUDA 11.3.

```bash
docker build -t mmcv -f docker/release/Dockerfile \
    --build-arg PYTORCH=1.11.0 \
    --build-arg CUDA=11.3 \
    --build-arg CUDNN=8 \
    --build-arg MMCV=2.0.0 .
```

More available versions of PyTorch and CUDA can be found at [dockerhub/pytorch](https://hub.docker.com/r/pytorch/pytorch/tags).

### Install mmcv-lite

If you need to use PyTorch-related modules, make sure PyTorch has been successfully installed in your environment by referring to the [PyTorch official installation guide](https://github.com/pytorch/pytorch#installation).

```python
pip install mmcv-lite
```

================================================
FILE: docs/en/get_started/Introduction.md
================================================
## Introduction

MMCV is a foundational library for computer vision research and provides the following functionalities.

- [Image/Video processing](../understand_mmcv/data_process.md)
- [Image and annotation visualization](../understand_mmcv/visualization.md)
- [Image transformation](../understand_mmcv/data_transform.md)
- [Various CNN architectures](../understand_mmcv/cnn.md)
- [High-quality implementation of common CUDA ops](../understand_mmcv/ops.md)

It supports the following systems:

- Linux
- Windows
- macOS

It supports many research projects as below:

- [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab image classification toolbox and benchmark.
- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab detection toolbox and benchmark.
- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab's next-generation platform for general 3D object detection.
- [MMRotate](https://github.com/open-mmlab/mmrotate): OpenMMLab rotated object detection toolbox and benchmark.
- [MMYOLO](https://github.com/open-mmlab/mmyolo): OpenMMLab YOLO series toolbox and benchmark.
- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab semantic segmentation toolbox and benchmark.
- [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab text detection, recognition, and understanding toolbox.
- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab pose estimation toolbox and benchmark.
- [MMHuman3D](https://github.com/open-mmlab/mmhuman3d): OpenMMLab 3D human parametric model toolbox and benchmark.
- [MMSelfSup](https://github.com/open-mmlab/mmselfsup): OpenMMLab self-supervised learning toolbox and benchmark.
- [MMRazor](https://github.com/open-mmlab/mmrazor): OpenMMLab model compression toolbox and benchmark.
- [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab fewshot learning toolbox and benchmark.
- [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab's next-generation action understanding toolbox and benchmark.
- [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab video perception toolbox and benchmark.
- [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab optical flow toolbox and benchmark.
- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab image and video editing toolbox.
- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab image and video generative models toolbox.
- [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab model deployment framework.

================================================
FILE: docs/en/index.rst
================================================
Welcome to DLPanToolbox's documentation!
================================



You can switch between Chinese and English documents in the lower-left corner of the layout.

.. toctree::
   :caption: Switch Language

   switch_language.md

.. toctree::
   :glob:
   :caption: Get Started

   get_started/Introduction.md
   get_started/Installation.md

.. toctree::
   :glob:
   :caption: PanCollection

   PanCollection/Simulation.md
   PanCollection/PreProcess.md
   PanCollection/Example.md
   PanCollection/Evaluation.md

.. toctree::
   :caption: Utilization

   citation.md
   faq.md

.. toctree::
   :glob:
   :maxdepth: 2
   :caption: Python API




Indices and tables
==================

* :ref:`genindex`
* :ref:`search`

================================================
FILE: docs/en/make.bat
================================================
@ECHO OFF

pushd %~dp0

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
	set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build

if "%1" == "" goto help

%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
	echo.
	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
	echo.installed, then set the SPHINXBUILD environment variable to point
	echo.to the full path of the 'sphinx-build' executable. Alternatively you
	echo.may add the Sphinx directory to PATH.
	echo.
	echo.If you don't have Sphinx installed, grab it from
	echo.http://sphinx-doc.org/
	exit /b 1
)

%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
goto end

:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%

:end
popd

================================================
FILE: docs/en/switch_language.md
================================================
## Change to English

## 切换到简体中文

================================================
FILE: docs/requirements.txt
================================================
-e git+https://github.com/XiaoXiao-Woo/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
sphinx==4.0.2
sphinx-copybutton
sphinx_markdown_tables>=0.0.16
myst-parser
sphinx-autobuild

================================================
FILE: docs/run.sh
================================================
sphinx-autobuild en build/html

================================================
FILE: docs/zh-cn/DLPanToolbox/Evaluation.md
================================================
## Evaluation

================================================
FILE: docs/zh-cn/DLPanToolbox/Example.md
================================================


================================================
FILE: docs/zh-cn/DLPanToolbox/PreProcess.md
================================================
## Data PreProcess

### Image

This module provides some image processing methods, which requires `opencv` to be installed first.

#### Read/Write/Show

To read or write images files, use `imread` or `imwrite`.

```python
import mmcv

img = mmcv.imread('test.jpg')
img = mmcv.imread('test.jpg', flag='grayscale')
img_ = mmcv.imread(img)  # nothing will happen, img_ = img
mmcv.imwrite(img, 'out.jpg')
```

To read images from bytes

```python
with open('test.jpg', 'rb') as f:
    data = f.read()
img = mmcv.imfrombytes(data)
```

To show an image file or a loaded image

```python
mmcv.imshow('tests/data/color.jpg')
# this is equivalent to

for i in range(10):
    img = np.random.randint(256, size=(100, 100, 3), dtype=np.uint8)
    mmcv.imshow(img, win_name='test image', wait_time=200)
```

#### Color space conversion

Supported conversion methods:

- bgr2gray
- gray2bgr
- bgr2rgb
- rgb2bgr
- bgr2hsv
- hsv2bgr

```python
img = mmcv.imread('tests/data/color.jpg')
img1 = mmcv.bgr2rgb(img)
img2 = mmcv.rgb2gray(img1)
img3 = mmcv.bgr2hsv(img)
```

#### Resize

There are three resize methods. All `imresize_*` methods have an argument `return_scale`,
if this argument is `False`, then the return value is merely the resized image, otherwise
is a tuple `(resized_img, scale)`.

```python
# resize to a given size
mmcv.imresize(img, (1000, 600), return_scale=True)

# resize to the same size of another image
mmcv.imresize_like(img, dst_img, return_scale=False)

# resize by a ratio
mmcv.imrescale(img, 0.5)

# resize so that the max edge no longer than 1000, short edge no longer than 800
# without changing the aspect ratio
mmcv.imrescale(img, (1000, 800))
```

#### Rotate

To rotate an image by some angle, use `imrotate`. The center can be specified,
which is the center of original image by default. There are two modes of rotating,
one is to keep the image size unchanged so that some parts of the image will be
cropped after rotating, the other is to extend the image size to fit the rotated
image.

```python
img = mmcv.imread('tests/data/color.jpg')

# rotate the image clockwise by 30 degrees.
img_ = mmcv.imrotate(img, 30)

# rotate the image counterclockwise by 90 degrees.
img_ = mmcv.imrotate(img, -90)

# rotate the image clockwise by 30 degrees, and rescale it by 1.5x at the same time.
img_ = mmcv.imrotate(img, 30, scale=1.5)

# rotate the image clockwise by 30 degrees, with (100, 100) as the center.
img_ = mmcv.imrotate(img, 30, center=(100, 100))

# rotate the image clockwise by 30 degrees, and extend the image size.
img_ = mmcv.imrotate(img, 30, auto_bound=True)
```

#### Flip

To flip an image, use `imflip`.

```python
img = mmcv.imread('tests/data/color.jpg')

# flip the image horizontally
mmcv.imflip(img)

# flip the image vertically
mmcv.imflip(img, direction='vertical')
```

#### Crop

`imcrop` can crop the image with one or more regions. Each region is represented by the upper left and lower right coordinates as (x1, y1, x2, y2).

```python
import mmcv
import numpy as np

img = mmcv.imread('tests/data/color.jpg')

# crop the region (10, 10, 100, 120)
bboxes = np.array([10, 10, 100, 120])
patch = mmcv.imcrop(img, bboxes)

# crop two regions (10, 10, 100, 120) and (0, 0, 50, 50)
bboxes = np.array([[10, 10, 100, 120], [0, 0, 50, 50]])
patches = mmcv.imcrop(img, bboxes)

# crop two regions, and rescale the patches by 1.2x
patches = mmcv.imcrop(img, bboxes, scale=1.2)
```

#### Padding

There are two methods, `impad` and `impad_to_multiple`, to pad an image to the
specific size with given values.

```python
img = mmcv.imread('tests/data/color.jpg')

# pad the image to (1000, 1200) with all zeros
img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=0)

# pad the image to (1000, 1200) with different values for three channels.
img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=(100, 50, 200))

# pad the image on left, right, top, bottom borders with all zeros
img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=0)

# pad the image on left, right, top, bottom borders with different values
# for three channels.
img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=(100, 50, 200))

# pad an image so that each edge is a multiple of some value.
img_ = mmcv.impad_to_multiple(img, 32)
```

================================================
FILE: docs/zh-cn/DLPanToolbox/Simulation.md
================================================
## Simulation

================================================
FILE: docs/zh-cn/Makefile
================================================
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS    ?=
SPHINXBUILD   ?= sphinx-build
SOURCEDIR     = source
BUILDDIR      = build

# Put it first so that "make" without argument is like "make help".
help:
	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

================================================
FILE: docs/zh-cn/_static/css/readthedocs.css
================================================
.header-logo {
    background-image: url("../image/logo-dlpan.png");
    background-size: 85px 40px;
    height: 40px;
    width: 85px;
}

table.colwidths-auto td {
    width: 50%
}

================================================
FILE: docs/zh-cn/_templates/classtemplate.rst
================================================
.. role:: hidden
    :class: hidden-section
.. currentmodule:: {{ module }}


{{ name | underline}}

.. autoclass:: {{ name }}
    :members:


..
  autogenerated from source/_templates/classtemplate.rst
  note it does not have :inherited-members:

================================================
FILE: docs/zh-cn/citation.md
================================================
## Cite PanCollecton
If PanCollection is helpful for you,  you are encouraged to cite the following paper:
```bibtex
@misc{PanCollection,
    author = {Xiao Wu, Liang-Jian Deng and Ran Ran},
    title = {"PanCollection" for Remote Sensing Pansharpening},
    url = {https://github.com/XiaoXiao-Woo/PanCollection/},
    year = {2022},
}
```
```bibtex
@ARTICLE{deng2022grsm,
author={L.-J. Deng, G. Vivone, M. E. Paoletti, G. Scarpa, J. He, Y. Zhang, J. Chanussot, and A. Plaza},
booktitle={IEEE Geoscience and Remote Sensing Magazine},
title={Machine Learning in Pansharpening: A Benchmark, from Shallow to Deep Networks},
year={2022},
pages={2-38},
doi={10.1109/MGRS.2020.3019315}
}
```
For Chinese Paper,
```bibtex
@ARTICLE{dengjig2022,
	author={邓良剑,冉燃,吴潇,张添敬},
	journal={中国图象图形学报},
	title={遥感图像全色锐化的卷积神经网络方法研究进展},
 	year={2022},
  	volume={},
  	number={9},
  	pages={},
  	doi={10.11834/jig.220540}
   }
```
Also, the codes of traditional methods are from the "pansharpening toolbox for distribution", thus please cite the corresponding paper:
```bibtex
@ARTICLE{vivone2021grsm,
  author={Vivone, Gemine and Dalla Mura, Mauro and Garzelli, Andrea and Restaino, Rocco and Scarpa, Giuseppe and Ulfarsson, Magnus O. and   Alparone, Luciano and Chanussot, Jocelyn},
  journal={IEEE Geoscience and Remote Sensing Magazine}, 
  title={A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting Pansharpening With Classical and Emerging Pansharpening Methods}, 
  year={2021},
  volume={9},
  number={1},
  pages={53-81},
  doi={10.1109/MGRS.2020.3019315}
}
```

================================================
FILE: docs/zh-cn/conf.py
================================================
#  GPL v3.0 License
#  Copyright (C) UESTC
#  All Rights Reserved
#  @Time    : 2023/9/21
#  @Author  : Xiao Wu
#  @reference:
#

#
# Configuration file for the Sphinx documentation builder.
#
# This file does only contain a selection of the most common options. For a
# full list see the documentation:
# http://www.sphinx-doc.org/en/master/config

# -- Path setup --------------------------------------------------------------

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys

import pytorch_sphinx_theme
from sphinx.builders.html import StandaloneHTMLBuilder

sys.path.insert(0, os.path.abspath('../..'))

# version_file = '../../mmcv/version.py'
# with open(version_file) as f:
#     exec(compile(f.read(), version_file, 'exec'))
# __version__ = locals()['__version__']
__version__ = "0.3.6"

# -- Project information -----------------------------------------------------

project = 'pancollection'
copyright = '2023, UESTC'
author = 'Xiao Wu'

# The short X.Y version
version = __version__
# The full version, including alpha/beta/rc tags
release = __version__

# -- General configuration ---------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.

extensions = [
    'sphinx.ext.autodoc',
    'sphinx.ext.autosummary',
    'sphinx.ext.intersphinx',
    'sphinx.ext.napoleon',
    'sphinx.ext.viewcode',
    'sphinx_markdown_tables',
    'myst_parser',
    'sphinx_copybutton',
]  # yapf: disable

myst_heading_anchors = 4

myst_enable_extensions = ['colon_fence']

# Configuration for intersphinx
intersphinx_mapping = {
    'python': ('https://docs.python.org/3', None),
    'numpy': ('https://numpy.org/doc/stable', None),
    'torch': ('https://pytorch.org/docs/stable/', None)
}

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']

# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
source_suffix = {
    '.rst': 'restructuredtext',
    '.md': 'markdown',
}

# The master toctree document.
master_doc = 'index'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'

# -- Options for HTML output -------------------------------------------------

# The theme to use for HTML and HTML Help pages.  See the documentation for
# a list of builtin themes.
#
# html_theme = 'sphinx_rtd_theme'
html_theme = 'pytorch_sphinx_theme'
html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]

# Theme options are theme-specific and customize the look and feel of a theme
# further.  For a list of options available for each theme, see the
# documentation.
#
html_theme_options = {
    'menu': [
        {
            'name': 'GitHub',
            'url': 'https://github.com/XiaoXiao-Woo/PanCollection'
        },
    ],
    # Specify the language of shared menu
    'menu_lang': 'en',
}

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_css_files = ['css/readthedocs.css']

# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself.  Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
# html_sidebars = {}

# -- Options for HTMLHelp output ---------------------------------------------

# Output file base name for HTML help builder.
htmlhelp_basename = 'pancollectiondoc'

# -- Options for LaTeX output ------------------------------------------------

latex_elements = {
    # The paper size ('letterpaper' or 'a4paper').
    #
    # 'papersize': 'letterpaper',

    # The font size ('10pt', '11pt' or '12pt').
    #
    # 'pointsize': '10pt',

    # Additional stuff for the LaTeX preamble.
    #
    # 'preamble': '',

    # Latex figure (float) alignment
    #
    # 'figure_align': 'htbp',
}

# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
#  author, documentclass [howto, manual, or own class]).
latex_documents = [
    (master_doc, 'pancollection.tex', 'pancollection Documentation', 'PanCollection Contributors',
     'manual'),
]

# -- Options for manual page output ------------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [(master_doc, 'pancollection', 'pancollection Documentation', [author], 1)]

# -- Options for Texinfo output ----------------------------------------------

# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
#  dir menu entry, description, category)
texinfo_documents = [
    (master_doc, 'pancollection', 'pancollection Documentation', author, 'pancollection',
     'One line description of project.', 'Miscellaneous'),
]

# -- Options for Epub output -------------------------------------------------

# Bibliographic Dublin Core info.
epub_title = project

# The unique identifier of the text. This can be a ISBN number
# or the project homepage.
#
# epub_identifier = ''

# A unique identification for the text.
#
# epub_uid = ''

# A list of files that should not be packed into the epub file.
epub_exclude_files = ['search.html']

# set priority when building html
StandaloneHTMLBuilder.supported_image_types = [
    'image/svg+xml', 'image/gif', 'image/png', 'image/jpeg'
]
# -- Extension configuration -------------------------------------------------
# Ignore >>> when copying code
copybutton_prompt_text = r'>>> |\.\.\. '
copybutton_prompt_is_regexp = True

================================================
FILE: docs/zh-cn/docutils.conf
================================================
[html writers]
table_style: colwidths-auto

================================================
FILE: docs/zh-cn/faq.md
================================================
## Frequently Asked Questions

We list some common troubles faced by many users and their corresponding solutions here.
Feel free to enrich the list if you find any frequent issues and have ways to help others to solve them.

### Installation

- KeyError: "xxx: 'yyy is not in the zzz registry'"

  The registry mechanism will be triggered only when the file of the module is imported.
  So you need to import that file somewhere. More details can be found at [KeyError: "MaskRCNN: 'RefineRoIHead is not in the models registry'"](https://github.com/open-mmlab/mmdetection/issues/5974).

- "No module named 'mmcv.ops'"; "No module named 'mmcv.\_ext'"

  1. Uninstall existing mmcv in the environment using `pip uninstall mmcv`
  2. Install mmcv-full following the [installation instruction](https://mmcv.readthedocs.io/en/latest/get_started/installation.html) or [Build MMCV from source](https://mmcv.readthedocs.io/en/latest/get_started/build.html)

- "invalid device function" or "no kernel image is available for execution"

  1. Check the CUDA compute capability of you GPU
  2. Run `python mmdet/utils/collect_env.py` to check whether PyTorch, torchvision, and MMCV are built for the correct GPU architecture. You may need to set `TORCH_CUDA_ARCH_LIST` to reinstall MMCV. The compatibility issue could happen when  using old GPUS, e.g., Tesla K80 (3.7) on colab.
  3. Check whether the running environment is the same as that when mmcv/mmdet is compiled. For example, you may compile mmcv using CUDA 10.0 bug run it on CUDA9.0 environments

- "undefined symbol" or "cannot open xxx.so"

  1. If those symbols are CUDA/C++ symbols (e.g., libcudart.so or GLIBCXX), check
     whether the CUDA/GCC runtimes are the same as those used for compiling mmcv
  2. If those symbols are Pytorch symbols (e.g., symbols containing caffe, aten, and TH), check whether the Pytorch version is the same as that used for compiling mmcv
  3. Run `python mmdet/utils/collect_env.py` to check whether PyTorch, torchvision, and MMCV are built by and running on the same environment

- "RuntimeError: CUDA error: invalid configuration argument"

  This error may be caused by the poor performance of GPU. Try to decrease the value of [THREADS_PER_BLOCK](https://github.com/open-mmlab/mmcv/blob/cac22f8cf5a904477e3b5461b1cc36856c2793da/mmcv/ops/csrc/common_cuda_helper.hpp#L10)
  and recompile mmcv.

- "RuntimeError: nms is not compiled with GPU support"

  This error is because your CUDA environment is not installed correctly.
  You may try to re-install your CUDA environment and then delete the build/ folder before re-compile mmcv.

- "Segmentation fault"

  1. Check your GCC version and use GCC >= 5.4. This usually caused by the incompatibility between PyTorch and the environment (e.g., GCC \< 4.9 for PyTorch). We also recommend the users to avoid using GCC 5.5 because many feedbacks report that GCC 5.5 will cause "segmentation fault" and simply changing it to GCC 5.4 could solve the problem
  2. Check whether PyTorch is correctly installed and could use CUDA op, e.g. type the following command in your terminal and see whether they could correctly output results
     ```shell
     python -c 'import torch; print(torch.cuda.is_available())'
     ```
  3. If PyTorch is correctly installed, check whether MMCV is correctly installed. If MMCV is correctly installed, then there will be no issue of the command
     ```shell
     python -c 'import mmcv; import mmcv.ops'
     ```
  4. If MMCV and PyTorch are correctly installed, you can use `ipdb` to set breakpoints or directly add `print` to debug and see which part leads the `segmentation fault`

- "libtorch_cuda_cu.so: cannot open shared object file"

  `mmcv-full` depends on the share object but it can not be found. We can check whether the object exists in `~/miniconda3/envs/{environment-name}/lib/python3.7/site-packages/torch/lib` or try to re-install the PyTorch.

- "fatal error C1189: #error:  -- unsupported Microsoft Visual Studio version!"

  If you are building mmcv-full on Windows and the version of CUDA is 9.2, you will probably encounter the error `"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include\crt/host_config.h(133): fatal error C1189: #error:  -- unsupported Microsoft Visual Studio version! Only the versions 2012, 2013, 2015 and 2017 are supported!"`, in which case you can use a lower version of Microsoft Visual Studio like vs2017.

- "error: member "torch::jit::detail::ModulePolicy::all_slots" may not be initialized"

  If your version of PyTorch is 1.5.0 and you are building mmcv-full on Windows, you will probably encounter the error `- torch/csrc/jit/api/module.h(474): error: member "torch::jit::detail::ModulePolicy::all_slots" may not be initialized`. The way to solve the error is to replace all the `static constexpr bool all_slots = false;` with `static bool all_slots = false;` at this file `https://github.com/pytorch/pytorch/blob/v1.5.0/torch/csrc/jit/api/module.h`. More details can be found at [member "torch::jit::detail::AttributePolicy::all_slots" may not be initialized](https://github.com/pytorch/pytorch/issues/39394).

- "error: a member with an in-class initializer must be const"

  If your version of PyTorch is 1.6.0 and you are building mmcv-full on Windows, you will probably encounter the error `"- torch/include\torch/csrc/jit/api/module.h(483): error: a member with an in-class initializer must be const"`. The way to solve the error is to replace all the `CONSTEXPR_EXCEPT_WIN_CUDA ` with `const` at `torch/include\torch/csrc/jit/api/module.h`. More details can be found at [Ninja: build stopped: subcommand failed](https://github.com/open-mmlab/mmcv/issues/575).

- "error: member "torch::jit::ProfileOptionalOp::Kind" may not be initialized"

  If your version of PyTorch is 1.7.0 and you are building mmcv-full on Windows, you will probably encounter the error `torch/include\torch/csrc/jit/ir/ir.h(1347): error: member "torch::jit::ProfileOptionalOp::Kind" may not be initialized`. The way to solve the error needs to modify several local files of PyTorch:

  - delete `static constexpr Symbol Kind = ::c10::prim::profile;` and `tatic constexpr Symbol Kind = ::c10::prim::profile_optional;` at `torch/include\torch/csrc/jit/ir/ir.h`
  - replace `explicit operator type&() { return *(this->value); }` with `explicit operator type&() { return *((type*)this->value); }` at `torch\include\pybind11\cast.h`
  - replace all the `CONSTEXPR_EXCEPT_WIN_CUDA` with `const` at `torch/include\torch/csrc/jit/api/module.h`

  More details can be found at [Ensure default extra_compile_args](https://github.com/pytorch/pytorch/pull/45956).

- Compatibility issue between MMCV and MMDetection; "ConvWS is already registered in conv layer"

  Please install the correct version of MMCV for the version of your MMDetection following the [installation instruction](https://mmdetection.readthedocs.io/en/latest/get_started.html#installation).

### Usage

- "RuntimeError: Expected to have finished reduction in the prior iteration before starting a new one"

  1. This error indicates that your module has parameters that were not used in producing loss. This phenomenon may be caused by running different branches in your code in DDP mode. More datails at [Expected to have finished reduction in the prior iteration before starting a new one](https://github.com/pytorch/pytorch/issues/55582).
  2. You can set ` find_unused_parameters = True` in the config to solve the above problems or find those unused parameters manually

- "RuntimeError: Trying to backward through the graph a second time"

  `GradientCumulativeOptimizerHook` and `OptimizerHook` are both set which causes the `loss.backward()` to be called twice so `RuntimeError` was raised. We can only use one of these. More datails at [Trying to backward through the graph a second time](https://github.com/open-mmlab/mmcv/issues/1379).

================================================
FILE: docs/zh-cn/get_started/Installation.md
================================================
## Installation

There are two versions of MMCV:

- **mmcv**: comprehensive, with full features and various CUDA ops out of box. It takes longer time to build.
- **mmcv-lite**: lite, without CUDA ops but all other features, similar to mmcv\<1.0.0. It is useful when you do not need those CUDA ops.

```{warning}
Do not install both versions in the same environment, otherwise you may encounter errors like `ModuleNotFound`. You need to uninstall one before installing the other. `Installing the full version is highly recommended if CUDA is avaliable`.
```

### Install mmcv

Before installing mmcv, make sure that PyTorch has been successfully installed following the [PyTorch official installation guide](https://pytorch.org/get-started/locally/#start-locally). This can be verified using the following command

```bash
python -c 'import torch;print(torch.__version__)'
```

If version information is output, then PyTorch is installed.

#### Install with mim (recommended)

[mim](https://github.com/open-mmlab/mim) is the package management tool for the OpenMMLab projects, which makes it easy to install mmcv

```bash
pip install -U openmim
mim install mmcv
```

If you find that the above installation command does not use a pre-built package ending with `.whl` but a source package ending with `.tar.gz`, you may not have a pre-build package corresponding to the PyTorch or CUDA or mmcv version, in which case you can [build mmcv from source](build.md).

Installation log using pre-built packages Looking in links: https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html
Collecting mmcv
Downloading https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/mmcv-2.0.0-cp38-cp38-manylinux1_x86_64.whl
Installation log using source packages Looking in links: https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html
Collecting mmcv==2.0.0
Downloading mmcv-2.0.0.tar.gz
To install a specific version of mmcv, for example, mmcv version 2.0.0, you can use the following command ```bash mim install mmcv==2.0.0 ``` :::{note} If you would like to use `opencv-python-headless` instead of `opencv-python`, e.g., in a minimum container environment or servers without GUI, you can first install it before installing MMCV to skip the installation of `opencv-python`. Alternatively, if it takes too long to install a dependency library, you can specify the pypi source ```bash mim install mmcv -i https://pypi.tuna.tsinghua.edu.cn/simple ``` ::: You can run [check_installation.py](https://github.com/open-mmlab/mmcv/blob/main/.dev_scripts/check_installation.py) to check the installation of mmcv-full after running the installation commands. #### Install with pip Use the following command to check the version of CUDA and PyTorch ```bash python -c 'import torch;print(torch.__version__);print(torch.version.cuda)' ``` Select the appropriate installation command depending on the type of system, CUDA version, PyTorch version, and MMCV version





If you do not find a corresponding version in the dropdown box above, you probably do not have a pre-built package corresponding to the PyTorch or CUDA or mmcv version, at which point you can [build mmcv from source](build.md).

:::{note}
mmcv is only compiled on PyTorch 1.x.0 because the compatibility
usually holds between 1.x.0 and 1.x.1. If your PyTorch version is 1.x.1, you
can install mmcv compiled with PyTorch 1.x.0 and it usually works well.
For example, if your PyTorch version is 1.8.1, you can feel free to choose 1.8.x.
:::

:::{note}
If you would like to use `opencv-python-headless` instead of `opencv-python`,
e.g., in a minimum container environment or servers without GUI,
you can first install it before installing MMCV to skip the installation of `opencv-python`.

Alternatively, if it takes too long to install a dependency library, you can specify the pypi source

```bash
mim install mmcv -i https://pypi.tuna.tsinghua.edu.cn/simple
```

:::

You can run [check_installation.py](https://github.com/open-mmlab/mmcv/blob/main/.dev_scripts/check_installation.py) to check the installation of mmcv after running the installation commands.

#### Using mmcv with Docker

Build with local repository

```bash
git clone https://github.com/open-mmlab/mmcv.git && cd mmcv
docker build -t mmcv -f docker/release/Dockerfile .
```

Or build with remote repository

```bash
docker build -t mmcv https://github.com/open-mmlab/mmcv.git#main:docker/release
```

The [Dockerfile](release/Dockerfile) installs latest released version of mmcv-full by default, but you can specify mmcv versions to install expected versions.

```bash
docker image build -t mmcv -f docker/release/Dockerfile --build-arg MMCV=2.0.0 .
```

If you also want to use other versions of PyTorch and CUDA, you can also pass them when building docker images.

An example to build an image with PyTorch 1.11 and CUDA 11.3.

```bash
docker build -t mmcv -f docker/release/Dockerfile \
    --build-arg PYTORCH=1.11.0 \
    --build-arg CUDA=11.3 \
    --build-arg CUDNN=8 \
    --build-arg MMCV=2.0.0 .
```

More available versions of PyTorch and CUDA can be found at [dockerhub/pytorch](https://hub.docker.com/r/pytorch/pytorch/tags).

### Install mmcv-lite

If you need to use PyTorch-related modules, make sure PyTorch has been successfully installed in your environment by referring to the [PyTorch official installation guide](https://github.com/pytorch/pytorch#installation).

```python
pip install mmcv-lite
```

================================================
FILE: docs/zh-cn/get_started/Introduction.md
================================================
## Introduction

MMCV is a foundational library for computer vision research and provides the following functionalities.

- [Image/Video processing](../understand_mmcv/data_process.md)
- [Image and annotation visualization](../understand_mmcv/visualization.md)
- [Image transformation](../understand_mmcv/data_transform.md)
- [Various CNN architectures](../understand_mmcv/cnn.md)
- [High-quality implementation of common CUDA ops](../understand_mmcv/ops.md)

It supports the following systems:

- Linux
- Windows
- macOS

It supports many research projects as below:

- [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab image classification toolbox and benchmark.
- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab detection toolbox and benchmark.
- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab's next-generation platform for general 3D object detection.
- [MMRotate](https://github.com/open-mmlab/mmrotate): OpenMMLab rotated object detection toolbox and benchmark.
- [MMYOLO](https://github.com/open-mmlab/mmyolo): OpenMMLab YOLO series toolbox and benchmark.
- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab semantic segmentation toolbox and benchmark.
- [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab text detection, recognition, and understanding toolbox.
- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab pose estimation toolbox and benchmark.
- [MMHuman3D](https://github.com/open-mmlab/mmhuman3d): OpenMMLab 3D human parametric model toolbox and benchmark.
- [MMSelfSup](https://github.com/open-mmlab/mmselfsup): OpenMMLab self-supervised learning toolbox and benchmark.
- [MMRazor](https://github.com/open-mmlab/mmrazor): OpenMMLab model compression toolbox and benchmark.
- [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab fewshot learning toolbox and benchmark.
- [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab's next-generation action understanding toolbox and benchmark.
- [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab video perception toolbox and benchmark.
- [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab optical flow toolbox and benchmark.
- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab image and video editing toolbox.
- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab image and video generative models toolbox.
- [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab model deployment framework.

================================================
FILE: docs/zh-cn/index.rst
================================================
Welcome to PanCollection's documentation!
================================

You can switch between Chinese and English documents in the lower-left corner of the layout.

.. toctree::
   :maxdepth: 2
   :caption: Get Started

   get_started/introduction.md
   get_started/installation.md

.. toctree::
   :maxdepth: 2
   :caption: PanCollection

   PanCollection/PreProcess.md
   PanCollection/Evaluation.md

.. toctree::
   :caption: Switch Language

   switch_language.md

.. toctree::
   :maxdepth: 2
   :caption: Related Toolbox

   related.md

.. toctree::

   faq.md

Indices and tables
==================

* :ref:`genindex`
* :ref:`search`

================================================
FILE: docs/zh-cn/make.bat
================================================
@ECHO OFF

pushd %~dp0

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
	set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build

if "%1" == "" goto help

%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
	echo.
	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
	echo.installed, then set the SPHINXBUILD environment variable to point
	echo.to the full path of the 'sphinx-build' executable. Alternatively you
	echo.may add the Sphinx directory to PATH.
	echo.
	echo.If you don't have Sphinx installed, grab it from
	echo.http://sphinx-doc.org/
	exit /b 1
)

%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
goto end

:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%

:end
popd

================================================
FILE: docs/zh-cn/related.md
================================================


================================================
FILE: docs/zh-cn/switch_language.md
================================================
## English

## 简体中文