Repository: ElegantGod/SSHA Branch: master Commit: bbf78059bf70 Files: 22 Total size: 75.6 MB Directory structure: gitextract_ddu7n4mp/ ├── Makefile ├── README.md ├── __init__.py ├── kmodel/ │ ├── e2e-0000.params │ └── e2e-symbol.json ├── rcnn/ │ ├── __init__.py │ ├── cython/ │ │ ├── .gitignore │ │ ├── __init__.py │ │ ├── anchors.pyx │ │ ├── bbox.pyx │ │ ├── cpu_nms.pyx │ │ ├── gpu_nms.hpp │ │ ├── gpu_nms.pyx │ │ ├── nms_kernel.cu │ │ └── setup.py │ └── processing/ │ ├── __init__.py │ ├── bbox_regression.py │ ├── bbox_transform.py │ ├── generate_anchor.py │ └── nms.py ├── ssha_detector.py └── test_kpoint.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: Makefile ================================================ all: cd rcnn/cython/; python setup.py build_ext --inplace; rm -rf build; cd ../../ #cd rcnn/pycocotools/; python setup.py build_ext --inplace; rm -rf build; cd ../../ clean: cd rcnn/cython/; rm *.so *.c *.cpp; cd ../../ #cd rcnn/pycocotools/; rm *.so; cd ../../ ================================================ FILE: README.md ================================================ # SSHA, SSH with Alignment ## Result ![img0](res2.jpg) ![img1](res1.jpg) ![img2](res0.jpg) ## How To Use #### 0. install mxnet and opencv for python version #### 1. clone SSHA git clone https://github.com/ElegantGod/SSHA #### 2. make cython cd SSHA && make #### 3. run it python test_kpoint.py ### FDDB ![fddb](FDDB.png) ### Reference: [Insightface](https://github.com/deepinsight/insightface/SSH) ================================================ FILE: __init__.py ================================================ ================================================ FILE: kmodel/e2e-0000.params ================================================ [File too large to display: 75.5 MB] ================================================ FILE: kmodel/e2e-symbol.json ================================================ { "nodes": [ { "op": "null", "name": "data", "inputs": [] }, { "op": "null", "name": "conv1_1_weight", "attrs": { "kernel": "(3, 3)", "num_filter": "64", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "null", "name": "conv1_1_bias", "attrs": { "kernel": "(3, 3)", "num_filter": "64", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "Convolution", "name": "conv1_1", "attrs": { "kernel": "(3, 3)", "num_filter": "64", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [[0, 0, 0], [1, 0, 0], [2, 0, 0]] }, { "op": "Activation", "name": "relu1_1", "attrs": {"act_type": "relu"}, "inputs": [[3, 0, 0]] }, { "op": "null", "name": "conv1_2_weight", "attrs": { "kernel": "(3, 3)", "num_filter": "64", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "null", "name": "conv1_2_bias", "attrs": { "kernel": "(3, 3)", "num_filter": "64", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "Convolution", "name": "conv1_2", "attrs": { "kernel": "(3, 3)", "num_filter": "64", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [[4, 0, 0], [5, 0, 0], [6, 0, 0]] }, { "op": "Activation", "name": "relu1_2", "attrs": {"act_type": "relu"}, "inputs": [[7, 0, 0]] }, { "op": "Pooling", "name": "pool1", "attrs": { "kernel": "(2, 2)", "pool_type": "max", "stride": "(2, 2)" }, "inputs": [[8, 0, 0]] }, { "op": "null", "name": "conv2_1_weight", "attrs": { "kernel": "(3, 3)", "num_filter": "128", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "null", "name": "conv2_1_bias", "attrs": { "kernel": "(3, 3)", "num_filter": "128", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "Convolution", "name": "conv2_1", "attrs": { "kernel": "(3, 3)", "num_filter": "128", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [[9, 0, 0], [10, 0, 0], [11, 0, 0]] }, { "op": "Activation", "name": "relu2_1", "attrs": {"act_type": "relu"}, "inputs": [[12, 0, 0]] }, { "op": "null", "name": "conv2_2_weight", "attrs": { "kernel": "(3, 3)", "num_filter": "128", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "null", "name": "conv2_2_bias", "attrs": { "kernel": "(3, 3)", "num_filter": "128", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "Convolution", "name": "conv2_2", "attrs": { "kernel": "(3, 3)", "num_filter": "128", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [[13, 0, 0], [14, 0, 0], [15, 0, 0]] }, { "op": "Activation", "name": "relu2_2", "attrs": {"act_type": "relu"}, "inputs": [[16, 0, 0]] }, { "op": "Pooling", "name": "pool2", "attrs": { "kernel": "(2, 2)", "pool_type": "max", "stride": "(2, 2)" }, "inputs": [[17, 0, 0]] }, { "op": "null", "name": "conv3_1_weight", "attrs": { "kernel": "(3, 3)", "num_filter": "256", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "null", "name": "conv3_1_bias", "attrs": { "kernel": "(3, 3)", "num_filter": "256", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "Convolution", "name": "conv3_1", "attrs": { "kernel": "(3, 3)", "num_filter": "256", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [[18, 0, 0], [19, 0, 0], [20, 0, 0]] }, { "op": "Activation", "name": "relu3_1", "attrs": {"act_type": "relu"}, "inputs": [[21, 0, 0]] }, { "op": "null", "name": "conv3_2_weight", "attrs": { "kernel": "(3, 3)", "num_filter": "256", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "null", "name": "conv3_2_bias", "attrs": { "kernel": "(3, 3)", "num_filter": "256", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "Convolution", "name": "conv3_2", "attrs": { "kernel": "(3, 3)", "num_filter": "256", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [[22, 0, 0], [23, 0, 0], [24, 0, 0]] }, { "op": "Activation", "name": "relu3_2", "attrs": {"act_type": "relu"}, "inputs": [[25, 0, 0]] }, { "op": "null", "name": "conv3_3_weight", "attrs": { "kernel": "(3, 3)", "num_filter": "256", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "null", "name": "conv3_3_bias", "attrs": { "kernel": "(3, 3)", "num_filter": "256", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "Convolution", "name": "conv3_3", "attrs": { "kernel": "(3, 3)", "num_filter": "256", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [[26, 0, 0], [27, 0, 0], [28, 0, 0]] }, { "op": "Activation", "name": "relu3_3", "attrs": {"act_type": "relu"}, "inputs": [[29, 0, 0]] }, { "op": "Pooling", "name": "pool3", "attrs": { "kernel": "(2, 2)", "pool_type": "max", "stride": "(2, 2)" }, "inputs": [[30, 0, 0]] }, { "op": "null", "name": "conv4_1_weight", "attrs": { "kernel": "(3, 3)", "num_filter": "512", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "null", "name": "conv4_1_bias", "attrs": { "kernel": "(3, 3)", "num_filter": "512", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "Convolution", "name": "conv4_1", "attrs": { "kernel": "(3, 3)", "num_filter": "512", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [[31, 0, 0], [32, 0, 0], [33, 0, 0]] }, { "op": "Activation", "name": "relu4_1", "attrs": {"act_type": "relu"}, "inputs": [[34, 0, 0]] }, { "op": "null", "name": "conv4_2_weight", "attrs": { "kernel": "(3, 3)", "num_filter": "512", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "null", "name": "conv4_2_bias", "attrs": { "kernel": "(3, 3)", "num_filter": "512", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "Convolution", "name": "conv4_2", "attrs": { "kernel": "(3, 3)", "num_filter": "512", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [[35, 0, 0], [36, 0, 0], [37, 0, 0]] }, { "op": "Activation", "name": "relu4_2", "attrs": {"act_type": "relu"}, "inputs": [[38, 0, 0]] }, { "op": "null", "name": "conv4_3_weight", "attrs": { "kernel": "(3, 3)", "num_filter": "512", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "null", "name": "conv4_3_bias", "attrs": { "kernel": "(3, 3)", "num_filter": "512", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "Convolution", "name": "conv4_3", "attrs": { "kernel": "(3, 3)", "num_filter": "512", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [[39, 0, 0], [40, 0, 0], [41, 0, 0]] }, { "op": "Activation", "name": "relu4_3", "attrs": {"act_type": "relu"}, "inputs": [[42, 0, 0]] }, { "op": "Pooling", "name": "pool4", "attrs": { "kernel": "(2, 2)", "pool_type": "max", "stride": "(2, 2)" }, "inputs": [[43, 0, 0]] }, { "op": "null", "name": "conv5_1_weight", "attrs": { "kernel": "(3, 3)", "num_filter": "512", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "null", "name": "conv5_1_bias", "attrs": { "kernel": "(3, 3)", "num_filter": "512", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "Convolution", "name": "conv5_1", "attrs": { "kernel": "(3, 3)", "num_filter": "512", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [[44, 0, 0], [45, 0, 0], [46, 0, 0]] }, { "op": "Activation", "name": "relu5_1", "attrs": {"act_type": "relu"}, "inputs": [[47, 0, 0]] }, { "op": "null", "name": "conv5_2_weight", "attrs": { "kernel": "(3, 3)", "num_filter": "512", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "null", "name": "conv5_2_bias", "attrs": { "kernel": "(3, 3)", "num_filter": "512", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "Convolution", "name": "conv5_2", "attrs": { "kernel": "(3, 3)", "num_filter": "512", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [[48, 0, 0], [49, 0, 0], [50, 0, 0]] }, { "op": "Activation", "name": "relu5_2", "attrs": {"act_type": "relu"}, "inputs": [[51, 0, 0]] }, { "op": "null", "name": "conv5_3_weight", "attrs": { "kernel": "(3, 3)", "num_filter": "512", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "null", "name": "conv5_3_bias", "attrs": { "kernel": "(3, 3)", "num_filter": "512", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [] }, { "op": "Convolution", "name": "conv5_3", "attrs": { "kernel": "(3, 3)", "num_filter": "512", "pad": "(1, 1)", "workspace": "2048" }, "inputs": [[52, 0, 0], [53, 0, 0], [54, 0, 0]] }, { "op": "Activation", "name": "relu5_3", "attrs": {"act_type": "relu"}, "inputs": [[55, 0, 0]] }, { "op": "Pooling", "name": "pooling0", "attrs": { "kernel": "(2, 2)", "pad": "(0, 0)", "pool_type": "max", "stride": "(2, 2)" }, "inputs": [[56, 0, 0]] }, { "op": "null", "name": "ssh_m3_det_conv1_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "ssh_m3_det_conv1_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "0.0" }, "inputs": [] }, { "op": "Convolution", "name": "ssh_m3_det_conv1", "attrs": { "kernel": "(3, 3)", "num_filter": "256", "pad": "(1, 1)", "stride": "(1, 1)" }, "inputs": [[57, 0, 0], [58, 0, 0], [59, 0, 0]] }, { "op": "null", "name": "ssh_m3_det_context_conv1_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "ssh_m3_det_context_conv1_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "0.0" }, "inputs": [] }, { "op": "Convolution", "name": "ssh_m3_det_context_conv1", "attrs": { "kernel": "(3, 3)", "num_filter": "128", "pad": "(1, 1)", "stride": "(1, 1)" }, "inputs": [[57, 0, 0], [61, 0, 0], [62, 0, 0]] }, { "op": "Activation", "name": "ssh_m3_det_context_conv1_relu", "attrs": {"act_type": "relu"}, "inputs": [[63, 0, 0]] }, { "op": "null", "name": "ssh_m3_det_context_conv2_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "ssh_m3_det_context_conv2_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "0.0" }, "inputs": [] }, { "op": "Convolution", "name": "ssh_m3_det_context_conv2", "attrs": { "kernel": "(3, 3)", "num_filter": "128", "pad": "(1, 1)", "stride": "(1, 1)" }, "inputs": [[64, 0, 0], [65, 0, 0], [66, 0, 0]] }, { "op": "null", "name": "ssh_m3_det_context_conv3_1_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "ssh_m3_det_context_conv3_1_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "0.0" }, "inputs": [] }, { "op": "Convolution", "name": "ssh_m3_det_context_conv3_1", "attrs": { "kernel": "(3, 3)", "num_filter": "128", "pad": "(1, 1)", "stride": "(1, 1)" }, "inputs": [[64, 0, 0], [68, 0, 0], [69, 0, 0]] }, { "op": "Activation", "name": "ssh_m3_det_context_conv3_1_relu", "attrs": {"act_type": "relu"}, "inputs": [[70, 0, 0]] }, { "op": "null", "name": "ssh_m3_det_context_conv3_2_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "ssh_m3_det_context_conv3_2_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "0.0" }, "inputs": [] }, { "op": "Convolution", "name": "ssh_m3_det_context_conv3_2", "attrs": { "kernel": "(3, 3)", "num_filter": "128", "pad": "(1, 1)", "stride": "(1, 1)" }, "inputs": [[71, 0, 0], [72, 0, 0], [73, 0, 0]] }, { "op": "Concat", "name": "ssh_m3_det_concat", "attrs": { "dim": "1", "num_args": "3" }, "inputs": [[60, 0, 0], [67, 0, 0], [74, 0, 0]] }, { "op": "Activation", "name": "ssh_m3_det_concat_relu", "attrs": {"act_type": "relu"}, "inputs": [[75, 0, 0]] }, { "op": "null", "name": "rpn_cls_score_stride32_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "rpn_cls_score_stride32_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "0.0" }, "inputs": [] }, { "op": "Convolution", "name": "rpn_cls_score_stride32", "attrs": { "kernel": "(1, 1)", "num_filter": "4", "pad": "(0, 0)", "stride": "(1, 1)" }, "inputs": [[76, 0, 0], [77, 0, 0], [78, 0, 0]] }, { "op": "Reshape", "name": "rpn_cls_score_reshape_stride32", "attrs": {"shape": "(0, 2, -1, 0)"}, "inputs": [[79, 0, 0]] }, { "op": "SoftmaxActivation", "name": "rpn_cls_prob_stride32", "attrs": {"mode": "channel"}, "inputs": [[80, 0, 0]] }, { "op": "Reshape", "name": "rpn_cls_prob_reshape_stride32", "attrs": {"shape": "(0, 4, -1, 0)"}, "inputs": [[81, 0, 0]] }, { "op": "null", "name": "rpn_bbox_pred_stride32_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "rpn_bbox_pred_stride32_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "0.0" }, "inputs": [] }, { "op": "Convolution", "name": "rpn_bbox_pred_stride32", "attrs": { "kernel": "(1, 1)", "num_filter": "8", "pad": "(0, 0)", "stride": "(1, 1)" }, "inputs": [[76, 0, 0], [83, 0, 0], [84, 0, 0]] }, { "op": "null", "name": "rpn_kpoint_pred_stride32_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "rpn_kpoint_pred_stride32_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "0.0" }, "inputs": [] }, { "op": "Convolution", "name": "rpn_kpoint_pred_stride32", "attrs": { "kernel": "(1, 1)", "num_filter": "20", "pad": "(0, 0)", "stride": "(1, 1)" }, "inputs": [[76, 0, 0], [86, 0, 0], [87, 0, 0]] }, { "op": "null", "name": "ssh_m2_det_conv1_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "ssh_m2_det_conv1_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "0.0" }, "inputs": [] }, { "op": "Convolution", "name": "ssh_m2_det_conv1", "attrs": { "kernel": "(3, 3)", "num_filter": "256", "pad": "(1, 1)", "stride": "(1, 1)" }, "inputs": [[56, 0, 0], [89, 0, 0], [90, 0, 0]] }, { "op": "null", "name": "ssh_m2_det_context_conv1_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "ssh_m2_det_context_conv1_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "0.0" }, "inputs": [] }, { "op": "Convolution", "name": "ssh_m2_det_context_conv1", "attrs": { "kernel": "(3, 3)", "num_filter": "128", "pad": "(1, 1)", "stride": "(1, 1)" }, "inputs": [[56, 0, 0], [92, 0, 0], [93, 0, 0]] }, { "op": "Activation", "name": "ssh_m2_det_context_conv1_relu", "attrs": {"act_type": "relu"}, "inputs": [[94, 0, 0]] }, { "op": "null", "name": "ssh_m2_det_context_conv2_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "ssh_m2_det_context_conv2_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "0.0" }, "inputs": [] }, { "op": "Convolution", "name": "ssh_m2_det_context_conv2", "attrs": { "kernel": "(3, 3)", "num_filter": "128", "pad": "(1, 1)", "stride": "(1, 1)" }, "inputs": [[95, 0, 0], [96, 0, 0], [97, 0, 0]] }, { "op": "null", "name": "ssh_m2_det_context_conv3_1_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "ssh_m2_det_context_conv3_1_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "0.0" }, "inputs": [] }, { "op": "Convolution", "name": "ssh_m2_det_context_conv3_1", "attrs": { "kernel": "(3, 3)", "num_filter": "128", "pad": "(1, 1)", "stride": "(1, 1)" }, "inputs": [[95, 0, 0], [99, 0, 0], [100, 0, 0]] }, { "op": "Activation", "name": "ssh_m2_det_context_conv3_1_relu", "attrs": {"act_type": "relu"}, "inputs": [[101, 0, 0]] }, { "op": "null", "name": "ssh_m2_det_context_conv3_2_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "ssh_m2_det_context_conv3_2_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "0.0" }, "inputs": [] }, { "op": "Convolution", "name": "ssh_m2_det_context_conv3_2", "attrs": { "kernel": "(3, 3)", "num_filter": "128", "pad": "(1, 1)", "stride": "(1, 1)" }, "inputs": [[102, 0, 0], [103, 0, 0], [104, 0, 0]] }, { "op": "Concat", "name": "ssh_m2_det_concat", "attrs": { "dim": "1", "num_args": "3" }, "inputs": [[91, 0, 0], [98, 0, 0], [105, 0, 0]] }, { "op": "Activation", "name": "ssh_m2_det_concat_relu", "attrs": {"act_type": "relu"}, "inputs": [[106, 0, 0]] }, { "op": "null", "name": "rpn_cls_score_stride16_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "rpn_cls_score_stride16_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "0.0" }, "inputs": [] }, { "op": "Convolution", "name": "rpn_cls_score_stride16", "attrs": { "kernel": "(1, 1)", "num_filter": "4", "pad": "(0, 0)", "stride": "(1, 1)" }, "inputs": [[107, 0, 0], [108, 0, 0], [109, 0, 0]] }, { "op": "Reshape", "name": "rpn_cls_score_reshape_stride16", "attrs": {"shape": "(0, 2, -1, 0)"}, "inputs": [[110, 0, 0]] }, { "op": "SoftmaxActivation", "name": "rpn_cls_prob_stride16", "attrs": {"mode": "channel"}, "inputs": [[111, 0, 0]] }, { "op": "Reshape", "name": "rpn_cls_prob_reshape_stride16", "attrs": {"shape": "(0, 4, -1, 0)"}, "inputs": [[112, 0, 0]] }, { "op": "null", "name": "rpn_bbox_pred_stride16_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "rpn_bbox_pred_stride16_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "0.0" }, "inputs": [] }, { "op": "Convolution", "name": "rpn_bbox_pred_stride16", "attrs": { "kernel": "(1, 1)", "num_filter": "8", "pad": "(0, 0)", "stride": "(1, 1)" }, "inputs": [[107, 0, 0], [114, 0, 0], [115, 0, 0]] }, { "op": "null", "name": "rpn_kpoint_pred_stride16_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "rpn_kpoint_pred_stride16_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "0.0" }, "inputs": [] }, { "op": "Convolution", "name": "rpn_kpoint_pred_stride16", "attrs": { "kernel": "(1, 1)", "num_filter": "20", "pad": "(0, 0)", "stride": "(1, 1)" }, "inputs": [[107, 0, 0], [117, 0, 0], [118, 0, 0]] }, { "op": "null", "name": "ssh_m1_red_conv_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "ssh_m1_red_conv_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "1.0" }, "inputs": [] }, { "op": "Convolution", "name": "ssh_m1_red_conv", "attrs": { "kernel": "(1, 1)", "num_filter": "128", "pad": "(0, 0)", "stride": "(1, 1)" }, "inputs": [[43, 0, 0], [120, 0, 0], [121, 0, 0]] }, { "op": "Activation", "name": "ssh_m1_red_conv_relu", "attrs": {"act_type": "relu"}, "inputs": [[122, 0, 0]] }, { "op": "null", "name": "ssh_m2_red_conv_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "ssh_m2_red_conv_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "1.0" }, "inputs": [] }, { "op": "Convolution", "name": "ssh_m2_red_conv", "attrs": { "kernel": "(1, 1)", "num_filter": "128", "pad": "(0, 0)", "stride": "(1, 1)" }, "inputs": [[56, 0, 0], [124, 0, 0], [125, 0, 0]] }, { "op": "Activation", "name": "ssh_m2_red_conv_relu", "attrs": {"act_type": "relu"}, "inputs": [[126, 0, 0]] }, { "op": "null", "name": "ssh_m2_red_upsampling_weight", "attrs": { "__lr_mult__": "0.0", "__wd_mult__": "0.0", "kernel": "(4, 4)", "no_bias": "True", "num_filter": "128", "num_group": "128", "pad": "(1, 1)", "stride": "(2, 2)" }, "inputs": [] }, { "op": "Deconvolution", "name": "ssh_m2_red_upsampling", "attrs": { "__lr_mult__": "0.0", "__wd_mult__": "0.0", "kernel": "(4, 4)", "no_bias": "True", "num_filter": "128", "num_group": "128", "pad": "(1, 1)", "stride": "(2, 2)" }, "inputs": [[127, 0, 0], [128, 0, 0]] }, { "op": "Crop", "name": "crop0", "attrs": {"num_args": "2"}, "inputs": [[123, 0, 0], [129, 0, 0]] }, { "op": "elemwise_add", "name": "_plus0", "inputs": [[130, 0, 0], [129, 0, 0]] }, { "op": "null", "name": "ssh_m1_conv_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "ssh_m1_conv_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "1.0" }, "inputs": [] }, { "op": "Convolution", "name": "ssh_m1_conv", "attrs": { "kernel": "(3, 3)", "num_filter": "128", "pad": "(1, 1)", "stride": "(1, 1)" }, "inputs": [[131, 0, 0], [132, 0, 0], [133, 0, 0]] }, { "op": "Activation", "name": "ssh_m1_conv_relu", "attrs": {"act_type": "relu"}, "inputs": [[134, 0, 0]] }, { "op": "null", "name": "ssh_m1_det_conv1_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "ssh_m1_det_conv1_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "0.0" }, "inputs": [] }, { "op": "Convolution", "name": "ssh_m1_det_conv1", "attrs": { "kernel": "(3, 3)", "num_filter": "128", "pad": "(1, 1)", "stride": "(1, 1)" }, "inputs": [[135, 0, 0], [136, 0, 0], [137, 0, 0]] }, { "op": "null", "name": "ssh_m1_det_context_conv1_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "ssh_m1_det_context_conv1_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "0.0" }, "inputs": [] }, { "op": "Convolution", "name": "ssh_m1_det_context_conv1", "attrs": { "kernel": "(3, 3)", "num_filter": "64", "pad": "(1, 1)", "stride": "(1, 1)" }, "inputs": [[135, 0, 0], [139, 0, 0], [140, 0, 0]] }, { "op": "Activation", "name": "ssh_m1_det_context_conv1_relu", "attrs": {"act_type": "relu"}, "inputs": [[141, 0, 0]] }, { "op": "null", "name": "ssh_m1_det_context_conv2_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "ssh_m1_det_context_conv2_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "0.0" }, "inputs": [] }, { "op": "Convolution", "name": "ssh_m1_det_context_conv2", "attrs": { "kernel": "(3, 3)", "num_filter": "64", "pad": "(1, 1)", "stride": "(1, 1)" }, "inputs": [[142, 0, 0], [143, 0, 0], [144, 0, 0]] }, { "op": "null", "name": "ssh_m1_det_context_conv3_1_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "ssh_m1_det_context_conv3_1_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "0.0" }, "inputs": [] }, { "op": "Convolution", "name": "ssh_m1_det_context_conv3_1", "attrs": { "kernel": "(3, 3)", "num_filter": "64", "pad": "(1, 1)", "stride": "(1, 1)" }, "inputs": [[142, 0, 0], [146, 0, 0], [147, 0, 0]] }, { "op": "Activation", "name": "ssh_m1_det_context_conv3_1_relu", "attrs": {"act_type": "relu"}, "inputs": [[148, 0, 0]] }, { "op": "null", "name": "ssh_m1_det_context_conv3_2_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "ssh_m1_det_context_conv3_2_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "0.0" }, "inputs": [] }, { "op": "Convolution", "name": "ssh_m1_det_context_conv3_2", "attrs": { "kernel": "(3, 3)", "num_filter": "64", "pad": "(1, 1)", "stride": "(1, 1)" }, "inputs": [[149, 0, 0], [150, 0, 0], [151, 0, 0]] }, { "op": "Concat", "name": "ssh_m1_det_concat", "attrs": { "dim": "1", "num_args": "3" }, "inputs": [[138, 0, 0], [145, 0, 0], [152, 0, 0]] }, { "op": "Activation", "name": "ssh_m1_det_concat_relu", "attrs": {"act_type": "relu"}, "inputs": [[153, 0, 0]] }, { "op": "null", "name": "rpn_cls_score_stride8_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "rpn_cls_score_stride8_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "0.0" }, "inputs": [] }, { "op": "Convolution", "name": "rpn_cls_score_stride8", "attrs": { "kernel": "(1, 1)", "num_filter": "4", "pad": "(0, 0)", "stride": "(1, 1)" }, "inputs": [[154, 0, 0], [155, 0, 0], [156, 0, 0]] }, { "op": "Reshape", "name": "rpn_cls_score_reshape_stride8", "attrs": {"shape": "(0, 2, -1, 0)"}, "inputs": [[157, 0, 0]] }, { "op": "SoftmaxActivation", "name": "rpn_cls_prob_stride8", "attrs": {"mode": "channel"}, "inputs": [[158, 0, 0]] }, { "op": "Reshape", "name": "rpn_cls_prob_reshape_stride8", "attrs": {"shape": "(0, 4, -1, 0)"}, "inputs": [[159, 0, 0]] }, { "op": "null", "name": "rpn_bbox_pred_stride8_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "rpn_bbox_pred_stride8_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "0.0" }, "inputs": [] }, { "op": "Convolution", "name": "rpn_bbox_pred_stride8", "attrs": { "kernel": "(1, 1)", "num_filter": "8", "pad": "(0, 0)", "stride": "(1, 1)" }, "inputs": [[154, 0, 0], [161, 0, 0], [162, 0, 0]] }, { "op": "null", "name": "rpn_kpoint_pred_stride8_weight", "attrs": { "__init__": "[\"normal\", {\"sigma\": 0.01}]", "__lr_mult__": "1.0" }, "inputs": [] }, { "op": "null", "name": "rpn_kpoint_pred_stride8_bias", "attrs": { "__init__": "[\"constant\", {\"value\": 0.0}]", "__lr_mult__": "2.0", "__wd_mult__": "0.0" }, "inputs": [] }, { "op": "Convolution", "name": "rpn_kpoint_pred_stride8", "attrs": { "kernel": "(1, 1)", "num_filter": "20", "pad": "(0, 0)", "stride": "(1, 1)" }, "inputs": [[154, 0, 0], [164, 0, 0], [165, 0, 0]] } ], "arg_nodes": [ 0, 1, 2, 5, 6, 10, 11, 14, 15, 19, 20, 23, 24, 27, 28, 32, 33, 36, 37, 40, 41, 45, 46, 49, 50, 53, 54, 58, 59, 61, 62, 65, 66, 68, 69, 72, 73, 77, 78, 83, 84, 86, 87, 89, 90, 92, 93, 96, 97, 99, 100, 103, 104, 108, 109, 114, 115, 117, 118, 120, 121, 124, 125, 128, 132, 133, 136, 137, 139, 140, 143, 144, 146, 147, 150, 151, 155, 156, 161, 162, 164, 165 ], "node_row_ptr": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167 ], "heads": [[82, 0, 0], [85, 0, 0], [88, 0, 0], [113, 0, 0], [116, 0, 0], [119, 0, 0], [160, 0, 0], [163, 0, 0], [166, 0, 0]], "attrs": {"mxnet_version": ["int", 10300]} } ================================================ FILE: rcnn/__init__.py ================================================ ================================================ FILE: rcnn/cython/.gitignore ================================================ *.c *.cpp *.so ================================================ FILE: rcnn/cython/__init__.py ================================================ ================================================ FILE: rcnn/cython/anchors.pyx ================================================ cimport cython import numpy as np cimport numpy as np DTYPE = np.float32 ctypedef np.float32_t DTYPE_t def anchors_cython(int height, int width, int stride, np.ndarray[DTYPE_t, ndim=2] base_anchors): """ Parameters ---------- height: height of plane width: width of plane stride: stride ot the original image anchors_base: (A, 4) a base set of anchors Returns ------- all_anchors: (height, width, A, 4) ndarray of anchors spreading over the plane """ cdef unsigned int A = base_anchors.shape[0] cdef np.ndarray[DTYPE_t, ndim=4] all_anchors = np.zeros((height, width, A, 4), dtype=DTYPE) cdef unsigned int iw, ih cdef unsigned int k cdef unsigned int sh cdef unsigned int sw for iw in range(width): sw = iw * stride for ih in range(height): sh = ih * stride for k in range(A): all_anchors[ih, iw, k, 0] = base_anchors[k, 0] + sw all_anchors[ih, iw, k, 1] = base_anchors[k, 1] + sh all_anchors[ih, iw, k, 2] = base_anchors[k, 2] + sw all_anchors[ih, iw, k, 3] = base_anchors[k, 3] + sh return all_anchors ================================================ FILE: rcnn/cython/bbox.pyx ================================================ # -------------------------------------------------------- # Fast R-CNN # Copyright (c) 2015 Microsoft # Licensed under The MIT License [see LICENSE for details] # Written by Sergey Karayev # -------------------------------------------------------- cimport cython import numpy as np cimport numpy as np DTYPE = np.float ctypedef np.float_t DTYPE_t def bbox_overlaps_cython( np.ndarray[DTYPE_t, ndim=2] boxes, np.ndarray[DTYPE_t, ndim=2] query_boxes): """ Parameters ---------- boxes: (N, 4) ndarray of float query_boxes: (K, 4) ndarray of float Returns ------- overlaps: (N, K) ndarray of overlap between boxes and query_boxes """ cdef unsigned int N = boxes.shape[0] cdef unsigned int K = query_boxes.shape[0] cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) cdef DTYPE_t iw, ih, box_area cdef DTYPE_t ua cdef unsigned int k, n for k in range(K): box_area = ( (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1) ) for n in range(N): iw = ( min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1 ) if iw > 0: ih = ( min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1 ) if ih > 0: ua = float( (boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1) + box_area - iw * ih ) overlaps[n, k] = iw * ih / ua return overlaps ================================================ FILE: rcnn/cython/cpu_nms.pyx ================================================ # -------------------------------------------------------- # Fast R-CNN # Copyright (c) 2015 Microsoft # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- import numpy as np cimport numpy as np cdef inline np.float32_t max(np.float32_t a, np.float32_t b): return a if a >= b else b cdef inline np.float32_t min(np.float32_t a, np.float32_t b): return a if a <= b else b def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] cdef int ndets = dets.shape[0] cdef np.ndarray[np.int_t, ndim=1] suppressed = \ np.zeros((ndets), dtype=np.int) # nominal indices cdef int _i, _j # sorted indices cdef int i, j # temp variables for box i's (the box currently under consideration) cdef np.float32_t ix1, iy1, ix2, iy2, iarea # variables for computing overlap with box j (lower scoring box) cdef np.float32_t xx1, yy1, xx2, yy2 cdef np.float32_t w, h cdef np.float32_t inter, ovr keep = [] for _i in range(ndets): i = order[_i] if suppressed[i] == 1: continue keep.append(i) ix1 = x1[i] iy1 = y1[i] ix2 = x2[i] iy2 = y2[i] iarea = areas[i] for _j in range(_i + 1, ndets): j = order[_j] if suppressed[j] == 1: continue xx1 = max(ix1, x1[j]) yy1 = max(iy1, y1[j]) xx2 = min(ix2, x2[j]) yy2 = min(iy2, y2[j]) w = max(0.0, xx2 - xx1 + 1) h = max(0.0, yy2 - yy1 + 1) inter = w * h ovr = inter / (iarea + areas[j] - inter) if ovr >= thresh: suppressed[j] = 1 return keep ================================================ FILE: rcnn/cython/gpu_nms.hpp ================================================ void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, int boxes_dim, float nms_overlap_thresh, int device_id); ================================================ FILE: rcnn/cython/gpu_nms.pyx ================================================ # -------------------------------------------------------- # Faster R-CNN # Copyright (c) 2015 Microsoft # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- import numpy as np cimport numpy as np assert sizeof(int) == sizeof(np.int32_t) cdef extern from "gpu_nms.hpp": void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, np.int32_t device_id=0): cdef int boxes_num = dets.shape[0] cdef int boxes_dim = dets.shape[1] cdef int num_out cdef np.ndarray[np.int32_t, ndim=1] \ keep = np.zeros(boxes_num, dtype=np.int32) cdef np.ndarray[np.float32_t, ndim=1] \ scores = dets[:, 4] cdef np.ndarray[np.int_t, ndim=1] \ order = scores.argsort()[::-1] cdef np.ndarray[np.float32_t, ndim=2] \ sorted_dets = dets[order, :] _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) keep = keep[:num_out] return list(order[keep]) ================================================ FILE: rcnn/cython/nms_kernel.cu ================================================ // ------------------------------------------------------------------ // Faster R-CNN // Copyright (c) 2015 Microsoft // Licensed under The MIT License [see fast-rcnn/LICENSE for details] // Written by Shaoqing Ren // ------------------------------------------------------------------ #include "gpu_nms.hpp" #include #include #define CUDA_CHECK(condition) \ /* Code block avoids redefinition of cudaError_t error */ \ do { \ cudaError_t error = condition; \ if (error != cudaSuccess) { \ std::cout << cudaGetErrorString(error) << std::endl; \ } \ } while (0) #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) int const threadsPerBlock = sizeof(unsigned long long) * 8; __device__ inline float devIoU(float const * const a, float const * const b) { float left = max(a[0], b[0]), right = min(a[2], b[2]); float top = max(a[1], b[1]), bottom = min(a[3], b[3]); float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); float interS = width * height; float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); return interS / (Sa + Sb - interS); } __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, const float *dev_boxes, unsigned long long *dev_mask) { const int row_start = blockIdx.y; const int col_start = blockIdx.x; // if (row_start > col_start) return; const int row_size = min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); const int col_size = min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); __shared__ float block_boxes[threadsPerBlock * 5]; if (threadIdx.x < col_size) { block_boxes[threadIdx.x * 5 + 0] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; block_boxes[threadIdx.x * 5 + 1] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; block_boxes[threadIdx.x * 5 + 2] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; block_boxes[threadIdx.x * 5 + 3] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; block_boxes[threadIdx.x * 5 + 4] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; } __syncthreads(); if (threadIdx.x < row_size) { const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; const float *cur_box = dev_boxes + cur_box_idx * 5; int i = 0; unsigned long long t = 0; int start = 0; if (row_start == col_start) { start = threadIdx.x + 1; } for (i = start; i < col_size; i++) { if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { t |= 1ULL << i; } } const int col_blocks = DIVUP(n_boxes, threadsPerBlock); dev_mask[cur_box_idx * col_blocks + col_start] = t; } } void _set_device(int device_id) { int current_device; CUDA_CHECK(cudaGetDevice(¤t_device)); if (current_device == device_id) { return; } // The call to cudaSetDevice must come before any calls to Get, which // may perform initialization using the GPU. CUDA_CHECK(cudaSetDevice(device_id)); } void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, int boxes_dim, float nms_overlap_thresh, int device_id) { _set_device(device_id); float* boxes_dev = NULL; unsigned long long* mask_dev = NULL; const int col_blocks = DIVUP(boxes_num, threadsPerBlock); CUDA_CHECK(cudaMalloc(&boxes_dev, boxes_num * boxes_dim * sizeof(float))); CUDA_CHECK(cudaMemcpy(boxes_dev, boxes_host, boxes_num * boxes_dim * sizeof(float), cudaMemcpyHostToDevice)); CUDA_CHECK(cudaMalloc(&mask_dev, boxes_num * col_blocks * sizeof(unsigned long long))); dim3 blocks(DIVUP(boxes_num, threadsPerBlock), DIVUP(boxes_num, threadsPerBlock)); dim3 threads(threadsPerBlock); nms_kernel<<>>(boxes_num, nms_overlap_thresh, boxes_dev, mask_dev); std::vector mask_host(boxes_num * col_blocks); CUDA_CHECK(cudaMemcpy(&mask_host[0], mask_dev, sizeof(unsigned long long) * boxes_num * col_blocks, cudaMemcpyDeviceToHost)); std::vector remv(col_blocks); memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); int num_to_keep = 0; for (int i = 0; i < boxes_num; i++) { int nblock = i / threadsPerBlock; int inblock = i % threadsPerBlock; if (!(remv[nblock] & (1ULL << inblock))) { keep_out[num_to_keep++] = i; unsigned long long *p = &mask_host[0] + i * col_blocks; for (int j = nblock; j < col_blocks; j++) { remv[j] |= p[j]; } } } *num_out = num_to_keep; CUDA_CHECK(cudaFree(boxes_dev)); CUDA_CHECK(cudaFree(mask_dev)); } ================================================ FILE: rcnn/cython/setup.py ================================================ # -------------------------------------------------------- # Fast R-CNN # Copyright (c) 2015 Microsoft # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- import os from os.path import join as pjoin from setuptools import setup from distutils.extension import Extension from Cython.Distutils import build_ext import numpy as np def find_in_path(name, path): "Find a file in a search path" # Adapted fom # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ for dir in path.split(os.pathsep): binpath = pjoin(dir, name) if os.path.exists(binpath): return os.path.abspath(binpath) return None def locate_cuda(): """Locate the CUDA environment on the system Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' and values giving the absolute path to each directory. Starts by looking for the CUDAHOME env variable. If not found, everything is based on finding 'nvcc' in the PATH. """ # first check if the CUDAHOME env variable is in use if 'CUDAHOME' in os.environ: home = os.environ['CUDAHOME'] nvcc = pjoin(home, 'bin', 'nvcc') else: # otherwise, search the PATH for NVCC default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) if nvcc is None: raise EnvironmentError('The nvcc binary could not be ' 'located in your $PATH. Either add it to your path, or set $CUDAHOME') home = os.path.dirname(os.path.dirname(nvcc)) cudaconfig = {'home':home, 'nvcc':nvcc, 'include': pjoin(home, 'include'), 'lib64': pjoin(home, 'lib64')} for k, v in cudaconfig.items(): if not os.path.exists(v): raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) return cudaconfig # Test if cuda could be foun try: CUDA = locate_cuda() except EnvironmentError: CUDA = None # Obtain the numpy include directory. This logic works across numpy versions. try: numpy_include = np.get_include() except AttributeError: numpy_include = np.get_numpy_include() def customize_compiler_for_nvcc(self): """inject deep into distutils to customize how the dispatch to gcc/nvcc works. If you subclass UnixCCompiler, it's not trivial to get your subclass injected in, and still have the right customizations (i.e. distutils.sysconfig.customize_compiler) run on it. So instead of going the OO route, I have this. Note, it's kindof like a wierd functional subclassing going on.""" # tell the compiler it can processes .cu self.src_extensions.append('.cu') # save references to the default compiler_so and _comple methods default_compiler_so = self.compiler_so super = self._compile # now redefine the _compile method. This gets executed for each # object but distutils doesn't have the ability to change compilers # based on source extension: we add it. def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): if os.path.splitext(src)[1] == '.cu': # use the cuda for .cu files self.set_executable('compiler_so', CUDA['nvcc']) # use only a subset of the extra_postargs, which are 1-1 translated # from the extra_compile_args in the Extension class postargs = extra_postargs['nvcc'] else: postargs = extra_postargs['gcc'] super(obj, src, ext, cc_args, postargs, pp_opts) # reset the default compiler_so, which we might have changed for cuda self.compiler_so = default_compiler_so # inject our redefined _compile method into the class self._compile = _compile # run the customize_compiler class custom_build_ext(build_ext): def build_extensions(self): customize_compiler_for_nvcc(self.compiler) build_ext.build_extensions(self) ext_modules = [ Extension( "bbox", ["bbox.pyx"], extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, include_dirs=[numpy_include] ), Extension( "anchors", ["anchors.pyx"], extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, include_dirs=[numpy_include] ), Extension( "cpu_nms", ["cpu_nms.pyx"], extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, include_dirs = [numpy_include] ), ] if CUDA is not None: ext_modules.append( Extension('gpu_nms', ['nms_kernel.cu', 'gpu_nms.pyx'], library_dirs=[CUDA['lib64']], libraries=['cudart'], language='c++', runtime_library_dirs=[CUDA['lib64']], # this syntax is specific to this build system # we're only going to use certain compiler args with nvcc and not with # gcc the implementation of this trick is in customize_compiler() below extra_compile_args={'gcc': ["-Wno-unused-function"], 'nvcc': ['-arch=sm_35', '--ptxas-options=-v', '-c', '--compiler-options', "'-fPIC'"]}, include_dirs = [numpy_include, CUDA['include']] ) ) else: print('Skipping GPU_NMS') setup( name='frcnn_cython', ext_modules=ext_modules, # inject our custom trigger cmdclass={'build_ext': custom_build_ext}, ) ================================================ FILE: rcnn/processing/__init__.py ================================================ ================================================ FILE: rcnn/processing/bbox_regression.py ================================================ """ This file has functions about generating bounding box regression targets """ import numpy as np from ..logger import logger from .bbox_transform import bbox_overlaps, bbox_transform from rcnn.config import config def compute_bbox_regression_targets(rois, overlaps, labels): """ given rois, overlaps, gt labels, compute bounding box regression targets :param rois: roidb[i]['boxes'] k * 4 :param overlaps: roidb[i]['max_overlaps'] k * 1 :param labels: roidb[i]['max_classes'] k * 1 :return: targets[i][class, dx, dy, dw, dh] k * 5 """ # Ensure ROIs are floats rois = rois.astype(np.float, copy=False) # Sanity check if len(rois) != len(overlaps): logger.warning('bbox regression: len(rois) != len(overlaps)') # Indices of ground-truth ROIs gt_inds = np.where(overlaps == 1)[0] if len(gt_inds) == 0: logger.warning('bbox regression: len(gt_inds) == 0') # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= config.TRAIN.BBOX_REGRESSION_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :]) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target gt_assignment = ex_gt_overlaps.argmax(axis=1) gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] targets = np.zeros((rois.shape[0], 5), dtype=np.float32) targets[ex_inds, 0] = labels[ex_inds] targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) return targets def add_bbox_regression_targets(roidb): """ given roidb, add ['bbox_targets'] and normalize bounding box regression targets :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb :return: means, std variances of targets """ logger.info('bbox regression: add bounding box regression targets') assert len(roidb) > 0 assert 'max_classes' in roidb[0] num_images = len(roidb) num_classes = roidb[0]['gt_overlaps'].shape[1] for im_i in range(num_images): rois = roidb[im_i]['boxes'] max_overlaps = roidb[im_i]['max_overlaps'] max_classes = roidb[im_i]['max_classes'] roidb[im_i]['bbox_targets'] = compute_bbox_regression_targets(rois, max_overlaps, max_classes) if config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: # use fixed / precomputed means and stds instead of empirical values means = np.tile(np.array(config.TRAIN.BBOX_MEANS), (num_classes, 1)) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), (num_classes, 1)) else: # compute mean, std values class_counts = np.zeros((num_classes, 1)) + 1e-14 sums = np.zeros((num_classes, 4)) squared_sums = np.zeros((num_classes, 4)) for im_i in range(num_images): targets = roidb[im_i]['bbox_targets'] for cls in range(1, num_classes): cls_indexes = np.where(targets[:, 0] == cls)[0] if cls_indexes.size > 0: class_counts[cls] += cls_indexes.size sums[cls, :] += targets[cls_indexes, 1:].sum(axis=0) squared_sums[cls, :] += (targets[cls_indexes, 1:] ** 2).sum(axis=0) means = sums / class_counts # var(x) = E(x^2) - E(x)^2 stds = np.sqrt(squared_sums / class_counts - means ** 2) # normalized targets for im_i in range(num_images): targets = roidb[im_i]['bbox_targets'] for cls in range(1, num_classes): cls_indexes = np.where(targets[:, 0] == cls)[0] roidb[im_i]['bbox_targets'][cls_indexes, 1:] -= means[cls, :] roidb[im_i]['bbox_targets'][cls_indexes, 1:] /= stds[cls, :] return means.ravel(), stds.ravel() def expand_bbox_regression_targets(bbox_targets_data, num_classes): """ expand from 5 to 4 * num_classes; only the right class has non-zero bbox regression targets :param bbox_targets_data: [k * 5] :param num_classes: number of classes :return: bbox target processed [k * 4 num_classes] bbox_weights ! only foreground boxes have bbox regression computation! """ classes = bbox_targets_data[:, 0] bbox_targets = np.zeros((classes.size, 4 * num_classes), dtype=np.float32) bbox_weights = np.zeros(bbox_targets.shape, dtype=np.float32) indexes = np.where(classes > 0)[0] for index in indexes: cls = classes[index] start = int(4 * cls) end = start + 4 bbox_targets[index, start:end] = bbox_targets_data[index, 1:] bbox_weights[index, start:end] = config.TRAIN.BBOX_WEIGHTS return bbox_targets, bbox_weights ================================================ FILE: rcnn/processing/bbox_transform.py ================================================ import numpy as np from ..cython.bbox import bbox_overlaps_cython def bbox_overlaps(boxes, query_boxes): return bbox_overlaps_cython(boxes, query_boxes) def bbox_overlaps_py(boxes, query_boxes): """ determine overlaps between boxes and query_boxes :param boxes: n * 4 bounding boxes :param query_boxes: k * 4 bounding boxes :return: overlaps: n * k overlaps """ n_ = boxes.shape[0] k_ = query_boxes.shape[0] overlaps = np.zeros((n_, k_), dtype=np.float) for k in range(k_): query_box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1) for n in range(n_): iw = min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1 if iw > 0: ih = min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1 if ih > 0: box_area = (boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1) all_area = float(box_area + query_box_area - iw * ih) overlaps[n, k] = iw * ih / all_area return overlaps def clip_boxes(boxes, im_shape): """ Clip boxes to image boundaries. :param boxes: [N, 4* num_classes] :param im_shape: tuple of 2 :return: [N, 4* num_classes] """ # x1 >= 0 boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) # y1 >= 0 boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) # x2 < im_shape[1] boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) # y2 < im_shape[0] boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) return boxes def clip_points(points, im_shape): """ Clip boxes to image boundaries. :param boxes: [N, 4* num_classes] :param im_shape: tuple of 2 :return: [N, 4* num_classes] """ points[:, 0::10] = np.maximum(np.minimum(points[:, 0::10], im_shape[1] - 1), 0) points[:, 1::10] = np.maximum(np.minimum(points[:, 1::10], im_shape[0] - 1), 0) points[:, 2::10] = np.maximum(np.minimum(points[:, 2::10], im_shape[1] - 1), 0) points[:, 3::10] = np.maximum(np.minimum(points[:, 3::10], im_shape[0] - 1), 0) points[:, 4::10] = np.maximum(np.minimum(points[:, 4::10], im_shape[1] - 1), 0) points[:, 5::10] = np.maximum(np.minimum(points[:, 5::10], im_shape[0] - 1), 0) points[:, 6::10] = np.maximum(np.minimum(points[:, 6::10], im_shape[1] - 1), 0) points[:, 7::10] = np.maximum(np.minimum(points[:, 7::10], im_shape[0] - 1), 0) points[:, 8::10] = np.maximum(np.minimum(points[:, 8::10], im_shape[1] - 1), 0) points[:, 9::10] = np.maximum(np.minimum(points[:, 9::10], im_shape[0] - 1), 0) return points def nonlinear_transform(ex_rois, gt_rois): """ compute bounding box regression targets from ex_rois to gt_rois :param ex_rois: [N, 4] :param gt_rois: [N, 4] :return: [N, 4] """ assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0) ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0) gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 gt_ctr_x = gt_rois[:, 0] + 0.5 * (gt_widths - 1.0) gt_ctr_y = gt_rois[:, 1] + 0.5 * (gt_heights - 1.0) targets_dx = (gt_ctr_x - ex_ctr_x) / (ex_widths + 1e-14) targets_dy = (gt_ctr_y - ex_ctr_y) / (ex_heights + 1e-14) targets_dw = np.log(gt_widths / ex_widths) targets_dh = np.log(gt_heights / ex_heights) targets = np.vstack( (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() return targets def nonlinear_pred(boxes, box_deltas): """ Transform the set of class-agnostic boxes into class-specific boxes by applying the predicted offsets (box_deltas) :param boxes: !important [N 4] :param box_deltas: [N, 4 * num_classes] :return: [N 4 * num_classes] """ if boxes.shape[0] == 0: return np.zeros((0, box_deltas.shape[1])) boxes = boxes.astype(np.float, copy=False) widths = boxes[:, 2] - boxes[:, 0] + 1.0 heights = boxes[:, 3] - boxes[:, 1] + 1.0 ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) dx = box_deltas[:, 0::4] dy = box_deltas[:, 1::4] dw = box_deltas[:, 2::4] dh = box_deltas[:, 3::4] pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] pred_w = np.exp(dw) * widths[:, np.newaxis] pred_h = np.exp(dh) * heights[:, np.newaxis] pred_boxes = np.zeros(box_deltas.shape) # x1 pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * (pred_w - 1.0) # y1 pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * (pred_h - 1.0) # x2 pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * (pred_w - 1.0) # y2 pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * (pred_h - 1.0) return pred_boxes def kpoint_pred(boxes, point_deltas): """ Transform the set of class-agnostic boxes into class-specific boxes by applying the predicted offsets (box_deltas) :param boxes: !important [N 4] :param box_deltas: [N, 4 * num_classes] :return: [N 4 * num_classes] """ if boxes.shape[0] == 0: return np.zeros((0, point_deltas.shape[1])) boxes = boxes.astype(np.float, copy=False) widths = boxes[:, 2] - boxes[:, 0] + 1.0 heights = boxes[:, 3] - boxes[:, 1] + 1.0 ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) d1x = point_deltas[:, 0] d1y = point_deltas[:, 1] d2x = point_deltas[:, 2] d2y = point_deltas[:, 3] d3x = point_deltas[:, 4] d3y = point_deltas[:, 5] d4x = point_deltas[:, 6] d4y = point_deltas[:, 7] d5x = point_deltas[:, 8] d5y = point_deltas[:, 9] pred_points = np.zeros(point_deltas.shape) # x1 x = d1x * widths # print("aa", d1x.shape, widths.shape, ctr_x.shape, x.shape) pred_points[:, 0] = d1x * widths + ctr_x pred_points[:, 1] = d1y * heights + ctr_y pred_points[:, 2] = d2x * widths + ctr_x pred_points[:, 3] = d2y * heights + ctr_y pred_points[:, 4] = d3x * widths + ctr_x pred_points[:, 5] = d3y * heights + ctr_y pred_points[:, 6] = d4x * widths + ctr_x pred_points[:, 7] = d4y * heights + ctr_y pred_points[:, 8] = d5x * widths + ctr_x pred_points[:, 9] = d5y * heights + ctr_y return pred_points def iou_transform(ex_rois, gt_rois): """ return bbox targets, IoU loss uses gt_rois as gt """ assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' return gt_rois def iou_pred(boxes, box_deltas): """ Transform the set of class-agnostic boxes into class-specific boxes by applying the predicted offsets (box_deltas) :param boxes: !important [N 4] :param box_deltas: [N, 4 * num_classes] :return: [N 4 * num_classes] """ if boxes.shape[0] == 0: return np.zeros((0, box_deltas.shape[1])) boxes = boxes.astype(np.float, copy=False) x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2] y2 = boxes[:, 3] dx1 = box_deltas[:, 0::4] dy1 = box_deltas[:, 1::4] dx2 = box_deltas[:, 2::4] dy2 = box_deltas[:, 3::4] pred_boxes = np.zeros(box_deltas.shape) # x1 pred_boxes[:, 0::4] = dx1 + x1[:, np.newaxis] # y1 pred_boxes[:, 1::4] = dy1 + y1[:, np.newaxis] # x2 pred_boxes[:, 2::4] = dx2 + x2[:, np.newaxis] # y2 pred_boxes[:, 3::4] = dy2 + y2[:, np.newaxis] return pred_boxes # define bbox_transform and bbox_pred bbox_transform = nonlinear_transform bbox_pred = nonlinear_pred ================================================ FILE: rcnn/processing/generate_anchor.py ================================================ """ Generate base anchors on index 0 """ from __future__ import print_function import sys #from builtins import range import numpy as np from ..cython.anchors import anchors_cython def anchors_plane(feat_h, feat_w, stride, base_anchor): return anchors_cython(feat_h, feat_w, stride, base_anchor) def generate_anchors(base_size=16, ratios=[0.5, 1, 2], scales=2 ** np.arange(3, 6)): """ Generate anchor (reference) windows by enumerating aspect ratios X scales wrt a reference (0, 0, 15, 15) window. """ base_anchor = np.array([1, 1, base_size, base_size]) - 1 ratio_anchors = _ratio_enum(base_anchor, ratios) anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) for i in range(ratio_anchors.shape[0])]) return anchors def generate_anchors_fpn(base_size=[64,32,16,8,4], ratios=[0.5, 1, 2], scales=8): """ Generate anchor (reference) windows by enumerating aspect ratios X scales wrt a reference (0, 0, 15, 15) window. """ anchors = [] _ratios = ratios.reshape( (len(base_size), -1) ) _scales = scales.reshape( (len(base_size), -1) ) for i,bs in enumerate(base_size): __ratios = _ratios[i] __scales = _scales[i] #print('anchors_fpn', bs, __ratios, __scales, file=sys.stderr) r = generate_anchors(bs, __ratios, __scales) #print('anchors_fpn', r.shape, file=sys.stderr) anchors.append(r) return anchors def _whctrs(anchor): """ Return width, height, x center, and y center for an anchor (window). """ w = anchor[2] - anchor[0] + 1 h = anchor[3] - anchor[1] + 1 x_ctr = anchor[0] + 0.5 * (w - 1) y_ctr = anchor[1] + 0.5 * (h - 1) return w, h, x_ctr, y_ctr def _mkanchors(ws, hs, x_ctr, y_ctr): """ Given a vector of widths (ws) and heights (hs) around a center (x_ctr, y_ctr), output a set of anchors (windows). """ ws = ws[:, np.newaxis] hs = hs[:, np.newaxis] anchors = np.hstack((x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1), x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1))) return anchors def _ratio_enum(anchor, ratios): """ Enumerate a set of anchors for each aspect ratio wrt an anchor. """ w, h, x_ctr, y_ctr = _whctrs(anchor) size = w * h size_ratios = size / ratios ws = np.round(np.sqrt(size_ratios)) hs = np.round(ws * ratios) anchors = _mkanchors(ws, hs, x_ctr, y_ctr) return anchors def _scale_enum(anchor, scales): """ Enumerate a set of anchors for each scale wrt an anchor. """ w, h, x_ctr, y_ctr = _whctrs(anchor) ws = w * scales hs = h * scales anchors = _mkanchors(ws, hs, x_ctr, y_ctr) return anchors ================================================ FILE: rcnn/processing/nms.py ================================================ import numpy as np from ..cython.cpu_nms import cpu_nms try: from ..cython.gpu_nms import gpu_nms except ImportError: gpu_nms = None def py_nms_wrapper(thresh): def _nms(dets): return nms(dets, thresh) return _nms def cpu_nms_wrapper(thresh): def _nms(dets): return cpu_nms(dets, thresh) return _nms def gpu_nms_wrapper(thresh, device_id): def _nms(dets): return gpu_nms(dets[:,0:5], thresh, device_id) if gpu_nms is not None: return _nms else: return cpu_nms_wrapper(thresh) def nms(dets, thresh): """ greedily select boxes with high confidence and overlap with current maximum <= thresh rule out overlap >= thresh :param dets: [[x1, y1, x2, y2 score]] :param thresh: retain overlap < thresh :return: indexes to keep """ x1 = dets[:, 0] y1 = dets[:, 1] x2 = dets[:, 2] y2 = dets[:, 3] scores = dets[:, 4] areas = (x2 - x1 + 1) * (y2 - y1 + 1) order = scores.argsort()[::-1] keep = [] while order.size > 0: i = order[0] keep.append(i) xx1 = np.maximum(x1[i], x1[order[1:]]) yy1 = np.maximum(y1[i], y1[order[1:]]) xx2 = np.minimum(x2[i], x2[order[1:]]) yy2 = np.minimum(y2[i], y2[order[1:]]) w = np.maximum(0.0, xx2 - xx1 + 1) h = np.maximum(0.0, yy2 - yy1 + 1) inter = w * h ovr = inter / (areas[i] + areas[order[1:]] - inter) inds = np.where(ovr <= thresh)[0] order = order[inds + 1] return keep ================================================ FILE: ssha_detector.py ================================================ from __future__ import print_function import sys import cv2 import mxnet as mx from mxnet import ndarray as nd import numpy as np import numpy.random as npr from distutils.util import strtobool from rcnn.processing.bbox_transform import nonlinear_pred, clip_boxes, kpoint_pred, clip_points from rcnn.processing.generate_anchor import generate_anchors_fpn, anchors_plane from rcnn.processing.nms import gpu_nms_wrapper class SSHDetector: def __init__(self, prefix, epoch, ctx_id=0, test_mode=False): self.ctx_id = ctx_id self.ctx = mx.gpu(self.ctx_id) self.fpn_keys = [] fpn_stride = [] fpn_base_size = [] self._feat_stride_fpn = [32, 16, 8] for s in self._feat_stride_fpn: self.fpn_keys.append('stride%s' % s) fpn_stride.append(int(s)) fpn_base_size.append(16) self._scales = np.array([32, 16, 8, 4, 2, 1]) self._ratios = np.array([1.0] * len(self._feat_stride_fpn)) self._anchors_fpn = dict( zip(self.fpn_keys, generate_anchors_fpn(base_size=fpn_base_size, scales=self._scales, ratios=self._ratios))) self._num_anchors = dict(zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) self._rpn_pre_nms_top_n = 1000 # self._rpn_post_nms_top_n = rpn_post_nms_top_n # self.score_threshold = 0.05 self.nms_threshold = 0.3 self._bbox_pred = nonlinear_pred sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) # mx.viz.plot_network(sym).view() self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id) self.pixel_means = np.array([103.939, 116.779, 123.68]) # BGR if not test_mode: image_size = (640, 640) self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names=None) self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) self.model.set_params(arg_params, aux_params) else: from rcnn.core.module import MutableModule image_size = (2400, 2400) data_shape = [('data', (1, 3, image_size[0], image_size[1]))] self.model = MutableModule(symbol=sym, data_names=['data'], label_names=None, context=self.ctx, max_data_shapes=data_shape) self.model.bind(data_shape, None, for_training=False) self.model.set_params(arg_params, aux_params) def detect(self, img, threshold=0.5, scales=[1.0]): proposals_list = [] proposals_kp_list = [] scores_list = [] for im_scale in scales: if im_scale != 1.0: im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) else: im = img im = im.astype(np.float32) # self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) im_info = [im.shape[0], im.shape[1], im_scale] im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1])) for i in range(3): im_tensor[0, i, :, :] = im[:, :, 2 - i] - self.pixel_means[2 - i] data = nd.array(im_tensor) print("data.shape: ", data.shape) db = mx.io.DataBatch(data=(data,), provide_data=[('data', data.shape)]) self.model.forward(db, is_train=False) net_out = self.model.get_outputs() # print("net_out: ", net_out) pre_nms_topN = self._rpn_pre_nms_top_n # post_nms_topN = self._rpn_post_nms_top_n # min_size_dict = self._rpn_min_size_fpn for s in self._feat_stride_fpn: if len(scales) > 1 and s == 32 and im_scale == scales[-1]: continue _key = 'stride%s' % s stride = int(s) idx = 0 if s == 16: idx = 3 elif s == 8: idx = 6 print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr) # print("net_out", net_out) scores = net_out[idx].asnumpy() # print(scores.shape) idx += 1 # print('scores',stride, scores.shape, file=sys.stderr) scores = scores[:, self._num_anchors['stride%s' % s]:, :, :] bbox_deltas = net_out[idx].asnumpy() idx += 1 # if DEBUG: # print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) # print 'scale: {}'.format(im_info[2]) _height, _width = int(im_info[0] / stride), int(im_info[1] / stride) height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] # kpoint kpoint_deltas = net_out[idx].asnumpy() A = self._num_anchors['stride%s' % s] K = height * width anchors = anchors_plane(height, width, stride, self._anchors_fpn['stride%s' % s].astype(np.float32)) # print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr) anchors = anchors.reshape((K * A, 4)) # print('pre', bbox_deltas.shape, height, width) bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) # print('after', bbox_deltas.shape, height, width) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) kpoint_deltas = self._clip_pad(kpoint_deltas, (height, width)) kpoint_deltas = kpoint_deltas.transpose((0, 2, 3, 1)).reshape((-1, 10)) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr) proposals = self._bbox_pred(anchors, bbox_deltas) # proposals = anchors proposals = clip_boxes(proposals, im_info[:2]) proposals_kp = kpoint_pred(anchors, kpoint_deltas) proposals_kp = clip_points(proposals_kp, im_info[:2]) # keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2]) # proposals = proposals[keep, :] # scores = scores[keep] # print('333', proposals.shape) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] proposals_kp = proposals_kp[order, :] scores = scores[order] proposals /= im_scale proposals_kp /= im_scale proposals_list.append(proposals) proposals_kp_list.append(proposals_kp) scores_list.append(scores) proposals = np.vstack(proposals_list) proposals_kp = np.vstack(proposals_kp_list) scores = np.vstack(scores_list) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] # if config.TEST.SCORE_THRESH>0.0: # _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH) # order = order[:_count] # if pre_nms_topN > 0: # order = order[:pre_nms_topN] proposals = proposals[order, :] proposals_kp = proposals_kp[order, :] scores = scores[order] det = np.hstack((proposals, scores, proposals_kp)).astype(np.float32) # if np.shape(det)[0] == 0: # print("Something wrong with the input image(resolution is too low?), generate fake proposals for it.") # proposals = np.array([[1.0, 1.0, 2.0, 2.0]]*post_nms_topN, dtype=np.float32) # scores = np.array([[0.9]]*post_nms_topN, dtype=np.float32) # det = np.array([[1.0, 1.0, 2.0, 2.0, 0.9]]*post_nms_topN, dtype=np.float32) if self.nms_threshold < 1.0: keep = self.nms(det) det = det[keep, :] if threshold > 0.0: keep = np.where(det[:, 4] >= threshold)[0] det = det[keep, :] return det @staticmethod def _filter_boxes(boxes, min_size): """ Remove all boxes with any side smaller than min_size """ ws = boxes[:, 2] - boxes[:, 0] + 1 hs = boxes[:, 3] - boxes[:, 1] + 1 keep = np.where((ws >= min_size) & (hs >= min_size))[0] return keep @staticmethod def _clip_pad(tensor, pad_shape): """ Clip boxes of the pad area. :param tensor: [n, c, H, W] :param pad_shape: [h, w] :return: [n, c, h, w] """ H, W = tensor.shape[2:] h, w = pad_shape if h < H or w < W: tensor = tensor[:, :, :h, :w].copy() return tensor ================================================ FILE: test_kpoint.py ================================================ import cv2 import sys import numpy as np import datetime #sys.path.append('.') from ssha_detector import SSHDetector scales = [1200, 1600] # scales = [200, 600] t = 2 detector = SSHDetector('./kmodel/e2e', 0) f = '../sample-images/t1.jpg' f = 'test_image/test_2.jpg' if len(sys.argv)>1: f = sys.argv[1] img = cv2.imread(f) im_shape = img.shape print(im_shape) target_size = scales[0] max_size = scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) # cv2.copyMakeBorder() img = cv2.copyMakeBorder(img, 5, 5, 5, 5, borderType=cv2.BORDER_CONSTANT, value=[0,0,0]) if im_size_min>target_size or im_size_max>max_size: im_scale = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale) print('resize to', img.shape) # for i in xrange(t-1): #warmup # faces = detector.detect(img) timea = datetime.datetime.now() faces = detector.detect(img, threshold=0.8) timeb = datetime.datetime.now() for num in range(faces.shape[0]): bbox = faces[num, 0:4] cv2.rectangle(img, (bbox[0],bbox[1]),(bbox[2], bbox[3]), (0,255, 0), 2) kpoint = faces[num, 5:15] for knum in range(5): cv2.circle(img, (kpoint[2*knum], kpoint[2*knum+1]), 1, [0,0,255], 2) cv2.imwrite("res.jpg", img[5:-5,5:-5,:]) diff = timeb - timea print('detection uses', diff.total_seconds(), 'seconds') print('find', faces.shape[0], 'faces')