[
  {
    "path": "Makefile",
    "content": "all:\n\tcd rcnn/cython/; python setup.py build_ext --inplace; rm -rf build; cd ../../\n\t#cd rcnn/pycocotools/; python setup.py build_ext --inplace; rm -rf build; cd ../../\nclean:\n\tcd rcnn/cython/; rm *.so *.c *.cpp; cd ../../\n\t#cd rcnn/pycocotools/; rm *.so; cd ../../\n"
  },
  {
    "path": "README.md",
    "content": "# SSHA, SSH with Alignment\n\n## Result\n\n![img0](res2.jpg)\n![img1](res1.jpg)\n![img2](res0.jpg)\n\n## How To Use\n#### 0. install mxnet and opencv for python version\n#### 1. clone SSHA\n    git clone https://github.com/ElegantGod/SSHA\n#### 2. make cython\n    cd SSHA && make\n#### 3. run it \n    python test_kpoint.py\n\n### FDDB\n![fddb](FDDB.png)\n\n### Reference:\n[Insightface](https://github.com/deepinsight/insightface/SSH)\n"
  },
  {
    "path": "__init__.py",
    "content": ""
  },
  {
    "path": "kmodel/e2e-symbol.json",
    "content": "{\n  \"nodes\": [\n    {\n      \"op\": \"null\", \n      \"name\": \"data\", \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv1_1_weight\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"64\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv1_1_bias\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"64\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"conv1_1\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"64\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": [[0, 0, 0], [1, 0, 0], [2, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"relu1_1\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[3, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv1_2_weight\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"64\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv1_2_bias\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"64\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"conv1_2\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"64\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": [[4, 0, 0], [5, 0, 0], [6, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"relu1_2\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[7, 0, 0]]\n    }, \n    {\n      \"op\": \"Pooling\", \n      \"name\": \"pool1\", \n      \"attrs\": {\n        \"kernel\": \"(2, 2)\", \n        \"pool_type\": \"max\", \n        \"stride\": \"(2, 2)\"\n      }, \n      \"inputs\": [[8, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv2_1_weight\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"128\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv2_1_bias\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"128\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"conv2_1\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"128\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": [[9, 0, 0], [10, 0, 0], [11, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"relu2_1\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[12, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv2_2_weight\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"128\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv2_2_bias\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"128\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"conv2_2\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"128\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": [[13, 0, 0], [14, 0, 0], [15, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"relu2_2\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[16, 0, 0]]\n    }, \n    {\n      \"op\": \"Pooling\", \n      \"name\": \"pool2\", \n      \"attrs\": {\n        \"kernel\": \"(2, 2)\", \n        \"pool_type\": \"max\", \n        \"stride\": \"(2, 2)\"\n      }, \n      \"inputs\": [[17, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv3_1_weight\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"256\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv3_1_bias\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"256\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"conv3_1\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"256\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": [[18, 0, 0], [19, 0, 0], [20, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"relu3_1\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[21, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv3_2_weight\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"256\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv3_2_bias\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"256\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"conv3_2\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"256\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": [[22, 0, 0], [23, 0, 0], [24, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"relu3_2\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[25, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv3_3_weight\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"256\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv3_3_bias\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"256\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"conv3_3\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"256\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": [[26, 0, 0], [27, 0, 0], [28, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"relu3_3\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[29, 0, 0]]\n    }, \n    {\n      \"op\": \"Pooling\", \n      \"name\": \"pool3\", \n      \"attrs\": {\n        \"kernel\": \"(2, 2)\", \n        \"pool_type\": \"max\", \n        \"stride\": \"(2, 2)\"\n      }, \n      \"inputs\": [[30, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv4_1_weight\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"512\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv4_1_bias\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"512\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"conv4_1\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"512\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": [[31, 0, 0], [32, 0, 0], [33, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"relu4_1\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[34, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv4_2_weight\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"512\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv4_2_bias\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"512\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"conv4_2\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"512\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": [[35, 0, 0], [36, 0, 0], [37, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"relu4_2\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[38, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv4_3_weight\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"512\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv4_3_bias\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"512\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"conv4_3\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"512\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": [[39, 0, 0], [40, 0, 0], [41, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"relu4_3\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[42, 0, 0]]\n    }, \n    {\n      \"op\": \"Pooling\", \n      \"name\": \"pool4\", \n      \"attrs\": {\n        \"kernel\": \"(2, 2)\", \n        \"pool_type\": \"max\", \n        \"stride\": \"(2, 2)\"\n      }, \n      \"inputs\": [[43, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv5_1_weight\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"512\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv5_1_bias\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"512\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"conv5_1\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"512\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": [[44, 0, 0], [45, 0, 0], [46, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"relu5_1\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[47, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv5_2_weight\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"512\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv5_2_bias\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"512\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"conv5_2\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"512\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": [[48, 0, 0], [49, 0, 0], [50, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"relu5_2\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[51, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv5_3_weight\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"512\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"conv5_3_bias\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"512\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"conv5_3\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"512\", \n        \"pad\": \"(1, 1)\", \n        \"workspace\": \"2048\"\n      }, \n      \"inputs\": [[52, 0, 0], [53, 0, 0], [54, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"relu5_3\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[55, 0, 0]]\n    }, \n    {\n      \"op\": \"Pooling\", \n      \"name\": \"pooling0\", \n      \"attrs\": {\n        \"kernel\": \"(2, 2)\", \n        \"pad\": \"(0, 0)\", \n        \"pool_type\": \"max\", \n        \"stride\": \"(2, 2)\"\n      }, \n      \"inputs\": [[56, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m3_det_conv1_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m3_det_conv1_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"0.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"ssh_m3_det_conv1\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"256\", \n        \"pad\": \"(1, 1)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[57, 0, 0], [58, 0, 0], [59, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m3_det_context_conv1_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m3_det_context_conv1_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"0.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"ssh_m3_det_context_conv1\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"128\", \n        \"pad\": \"(1, 1)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[57, 0, 0], [61, 0, 0], [62, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"ssh_m3_det_context_conv1_relu\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[63, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m3_det_context_conv2_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m3_det_context_conv2_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"0.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"ssh_m3_det_context_conv2\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"128\", \n        \"pad\": \"(1, 1)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[64, 0, 0], [65, 0, 0], [66, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m3_det_context_conv3_1_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m3_det_context_conv3_1_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"0.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"ssh_m3_det_context_conv3_1\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"128\", \n        \"pad\": \"(1, 1)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[64, 0, 0], [68, 0, 0], [69, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"ssh_m3_det_context_conv3_1_relu\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[70, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m3_det_context_conv3_2_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m3_det_context_conv3_2_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"0.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"ssh_m3_det_context_conv3_2\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"128\", \n        \"pad\": \"(1, 1)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[71, 0, 0], [72, 0, 0], [73, 0, 0]]\n    }, \n    {\n      \"op\": \"Concat\", \n      \"name\": \"ssh_m3_det_concat\", \n      \"attrs\": {\n        \"dim\": \"1\", \n        \"num_args\": \"3\"\n      }, \n      \"inputs\": [[60, 0, 0], [67, 0, 0], [74, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"ssh_m3_det_concat_relu\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[75, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"rpn_cls_score_stride32_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"rpn_cls_score_stride32_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"0.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"rpn_cls_score_stride32\", \n      \"attrs\": {\n        \"kernel\": \"(1, 1)\", \n        \"num_filter\": \"4\", \n        \"pad\": \"(0, 0)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[76, 0, 0], [77, 0, 0], [78, 0, 0]]\n    }, \n    {\n      \"op\": \"Reshape\", \n      \"name\": \"rpn_cls_score_reshape_stride32\", \n      \"attrs\": {\"shape\": \"(0, 2, -1, 0)\"}, \n      \"inputs\": [[79, 0, 0]]\n    }, \n    {\n      \"op\": \"SoftmaxActivation\", \n      \"name\": \"rpn_cls_prob_stride32\", \n      \"attrs\": {\"mode\": \"channel\"}, \n      \"inputs\": [[80, 0, 0]]\n    }, \n    {\n      \"op\": \"Reshape\", \n      \"name\": \"rpn_cls_prob_reshape_stride32\", \n      \"attrs\": {\"shape\": \"(0, 4, -1, 0)\"}, \n      \"inputs\": [[81, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"rpn_bbox_pred_stride32_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"rpn_bbox_pred_stride32_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"0.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"rpn_bbox_pred_stride32\", \n      \"attrs\": {\n        \"kernel\": \"(1, 1)\", \n        \"num_filter\": \"8\", \n        \"pad\": \"(0, 0)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[76, 0, 0], [83, 0, 0], [84, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"rpn_kpoint_pred_stride32_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"rpn_kpoint_pred_stride32_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"0.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"rpn_kpoint_pred_stride32\", \n      \"attrs\": {\n        \"kernel\": \"(1, 1)\", \n        \"num_filter\": \"20\", \n        \"pad\": \"(0, 0)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[76, 0, 0], [86, 0, 0], [87, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m2_det_conv1_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m2_det_conv1_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"0.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"ssh_m2_det_conv1\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"256\", \n        \"pad\": \"(1, 1)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[56, 0, 0], [89, 0, 0], [90, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m2_det_context_conv1_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m2_det_context_conv1_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"0.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"ssh_m2_det_context_conv1\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"128\", \n        \"pad\": \"(1, 1)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[56, 0, 0], [92, 0, 0], [93, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"ssh_m2_det_context_conv1_relu\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[94, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m2_det_context_conv2_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m2_det_context_conv2_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"0.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"ssh_m2_det_context_conv2\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"128\", \n        \"pad\": \"(1, 1)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[95, 0, 0], [96, 0, 0], [97, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m2_det_context_conv3_1_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m2_det_context_conv3_1_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"0.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"ssh_m2_det_context_conv3_1\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"128\", \n        \"pad\": \"(1, 1)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[95, 0, 0], [99, 0, 0], [100, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"ssh_m2_det_context_conv3_1_relu\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[101, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m2_det_context_conv3_2_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m2_det_context_conv3_2_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"0.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"ssh_m2_det_context_conv3_2\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"128\", \n        \"pad\": \"(1, 1)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[102, 0, 0], [103, 0, 0], [104, 0, 0]]\n    }, \n    {\n      \"op\": \"Concat\", \n      \"name\": \"ssh_m2_det_concat\", \n      \"attrs\": {\n        \"dim\": \"1\", \n        \"num_args\": \"3\"\n      }, \n      \"inputs\": [[91, 0, 0], [98, 0, 0], [105, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"ssh_m2_det_concat_relu\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[106, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"rpn_cls_score_stride16_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"rpn_cls_score_stride16_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"0.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"rpn_cls_score_stride16\", \n      \"attrs\": {\n        \"kernel\": \"(1, 1)\", \n        \"num_filter\": \"4\", \n        \"pad\": \"(0, 0)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[107, 0, 0], [108, 0, 0], [109, 0, 0]]\n    }, \n    {\n      \"op\": \"Reshape\", \n      \"name\": \"rpn_cls_score_reshape_stride16\", \n      \"attrs\": {\"shape\": \"(0, 2, -1, 0)\"}, \n      \"inputs\": [[110, 0, 0]]\n    }, \n    {\n      \"op\": \"SoftmaxActivation\", \n      \"name\": \"rpn_cls_prob_stride16\", \n      \"attrs\": {\"mode\": \"channel\"}, \n      \"inputs\": [[111, 0, 0]]\n    }, \n    {\n      \"op\": \"Reshape\", \n      \"name\": \"rpn_cls_prob_reshape_stride16\", \n      \"attrs\": {\"shape\": \"(0, 4, -1, 0)\"}, \n      \"inputs\": [[112, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"rpn_bbox_pred_stride16_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"rpn_bbox_pred_stride16_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"0.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"rpn_bbox_pred_stride16\", \n      \"attrs\": {\n        \"kernel\": \"(1, 1)\", \n        \"num_filter\": \"8\", \n        \"pad\": \"(0, 0)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[107, 0, 0], [114, 0, 0], [115, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"rpn_kpoint_pred_stride16_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"rpn_kpoint_pred_stride16_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"0.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"rpn_kpoint_pred_stride16\", \n      \"attrs\": {\n        \"kernel\": \"(1, 1)\", \n        \"num_filter\": \"20\", \n        \"pad\": \"(0, 0)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[107, 0, 0], [117, 0, 0], [118, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m1_red_conv_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m1_red_conv_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"ssh_m1_red_conv\", \n      \"attrs\": {\n        \"kernel\": \"(1, 1)\", \n        \"num_filter\": \"128\", \n        \"pad\": \"(0, 0)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[43, 0, 0], [120, 0, 0], [121, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"ssh_m1_red_conv_relu\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[122, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m2_red_conv_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m2_red_conv_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"ssh_m2_red_conv\", \n      \"attrs\": {\n        \"kernel\": \"(1, 1)\", \n        \"num_filter\": \"128\", \n        \"pad\": \"(0, 0)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[56, 0, 0], [124, 0, 0], [125, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"ssh_m2_red_conv_relu\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[126, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m2_red_upsampling_weight\", \n      \"attrs\": {\n        \"__lr_mult__\": \"0.0\", \n        \"__wd_mult__\": \"0.0\", \n        \"kernel\": \"(4, 4)\", \n        \"no_bias\": \"True\", \n        \"num_filter\": \"128\", \n        \"num_group\": \"128\", \n        \"pad\": \"(1, 1)\", \n        \"stride\": \"(2, 2)\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Deconvolution\", \n      \"name\": \"ssh_m2_red_upsampling\", \n      \"attrs\": {\n        \"__lr_mult__\": \"0.0\", \n        \"__wd_mult__\": \"0.0\", \n        \"kernel\": \"(4, 4)\", \n        \"no_bias\": \"True\", \n        \"num_filter\": \"128\", \n        \"num_group\": \"128\", \n        \"pad\": \"(1, 1)\", \n        \"stride\": \"(2, 2)\"\n      }, \n      \"inputs\": [[127, 0, 0], [128, 0, 0]]\n    }, \n    {\n      \"op\": \"Crop\", \n      \"name\": \"crop0\", \n      \"attrs\": {\"num_args\": \"2\"}, \n      \"inputs\": [[123, 0, 0], [129, 0, 0]]\n    }, \n    {\n      \"op\": \"elemwise_add\", \n      \"name\": \"_plus0\", \n      \"inputs\": [[130, 0, 0], [129, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m1_conv_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m1_conv_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"ssh_m1_conv\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"128\", \n        \"pad\": \"(1, 1)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[131, 0, 0], [132, 0, 0], [133, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"ssh_m1_conv_relu\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[134, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m1_det_conv1_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m1_det_conv1_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"0.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"ssh_m1_det_conv1\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"128\", \n        \"pad\": \"(1, 1)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[135, 0, 0], [136, 0, 0], [137, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m1_det_context_conv1_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m1_det_context_conv1_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"0.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"ssh_m1_det_context_conv1\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"64\", \n        \"pad\": \"(1, 1)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[135, 0, 0], [139, 0, 0], [140, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"ssh_m1_det_context_conv1_relu\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[141, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m1_det_context_conv2_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m1_det_context_conv2_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"0.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"ssh_m1_det_context_conv2\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"64\", \n        \"pad\": \"(1, 1)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[142, 0, 0], [143, 0, 0], [144, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m1_det_context_conv3_1_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m1_det_context_conv3_1_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"0.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"ssh_m1_det_context_conv3_1\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"64\", \n        \"pad\": \"(1, 1)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[142, 0, 0], [146, 0, 0], [147, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"ssh_m1_det_context_conv3_1_relu\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[148, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m1_det_context_conv3_2_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"ssh_m1_det_context_conv3_2_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"0.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"ssh_m1_det_context_conv3_2\", \n      \"attrs\": {\n        \"kernel\": \"(3, 3)\", \n        \"num_filter\": \"64\", \n        \"pad\": \"(1, 1)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[149, 0, 0], [150, 0, 0], [151, 0, 0]]\n    }, \n    {\n      \"op\": \"Concat\", \n      \"name\": \"ssh_m1_det_concat\", \n      \"attrs\": {\n        \"dim\": \"1\", \n        \"num_args\": \"3\"\n      }, \n      \"inputs\": [[138, 0, 0], [145, 0, 0], [152, 0, 0]]\n    }, \n    {\n      \"op\": \"Activation\", \n      \"name\": \"ssh_m1_det_concat_relu\", \n      \"attrs\": {\"act_type\": \"relu\"}, \n      \"inputs\": [[153, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"rpn_cls_score_stride8_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"rpn_cls_score_stride8_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"0.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"rpn_cls_score_stride8\", \n      \"attrs\": {\n        \"kernel\": \"(1, 1)\", \n        \"num_filter\": \"4\", \n        \"pad\": \"(0, 0)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[154, 0, 0], [155, 0, 0], [156, 0, 0]]\n    }, \n    {\n      \"op\": \"Reshape\", \n      \"name\": \"rpn_cls_score_reshape_stride8\", \n      \"attrs\": {\"shape\": \"(0, 2, -1, 0)\"}, \n      \"inputs\": [[157, 0, 0]]\n    }, \n    {\n      \"op\": \"SoftmaxActivation\", \n      \"name\": \"rpn_cls_prob_stride8\", \n      \"attrs\": {\"mode\": \"channel\"}, \n      \"inputs\": [[158, 0, 0]]\n    }, \n    {\n      \"op\": \"Reshape\", \n      \"name\": \"rpn_cls_prob_reshape_stride8\", \n      \"attrs\": {\"shape\": \"(0, 4, -1, 0)\"}, \n      \"inputs\": [[159, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"rpn_bbox_pred_stride8_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"rpn_bbox_pred_stride8_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"0.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"rpn_bbox_pred_stride8\", \n      \"attrs\": {\n        \"kernel\": \"(1, 1)\", \n        \"num_filter\": \"8\", \n        \"pad\": \"(0, 0)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[154, 0, 0], [161, 0, 0], [162, 0, 0]]\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"rpn_kpoint_pred_stride8_weight\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"normal\\\", {\\\"sigma\\\": 0.01}]\", \n        \"__lr_mult__\": \"1.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"null\", \n      \"name\": \"rpn_kpoint_pred_stride8_bias\", \n      \"attrs\": {\n        \"__init__\": \"[\\\"constant\\\", {\\\"value\\\": 0.0}]\", \n        \"__lr_mult__\": \"2.0\", \n        \"__wd_mult__\": \"0.0\"\n      }, \n      \"inputs\": []\n    }, \n    {\n      \"op\": \"Convolution\", \n      \"name\": \"rpn_kpoint_pred_stride8\", \n      \"attrs\": {\n        \"kernel\": \"(1, 1)\", \n        \"num_filter\": \"20\", \n        \"pad\": \"(0, 0)\", \n        \"stride\": \"(1, 1)\"\n      }, \n      \"inputs\": [[154, 0, 0], [164, 0, 0], [165, 0, 0]]\n    }\n  ], \n  \"arg_nodes\": [\n    0, \n    1, \n    2, \n    5, \n    6, \n    10, \n    11, \n    14, \n    15, \n    19, \n    20, \n    23, \n    24, \n    27, \n    28, \n    32, \n    33, \n    36, \n    37, \n    40, \n    41, \n    45, \n    46, \n    49, \n    50, \n    53, \n    54, \n    58, \n    59, \n    61, \n    62, \n    65, \n    66, \n    68, \n    69, \n    72, \n    73, \n    77, \n    78, \n    83, \n    84, \n    86, \n    87, \n    89, \n    90, \n    92, \n    93, \n    96, \n    97, \n    99, \n    100, \n    103, \n    104, \n    108, \n    109, \n    114, \n    115, \n    117, \n    118, \n    120, \n    121, \n    124, \n    125, \n    128, \n    132, \n    133, \n    136, \n    137, \n    139, \n    140, \n    143, \n    144, \n    146, \n    147, \n    150, \n    151, \n    155, \n    156, \n    161, \n    162, \n    164, \n    165\n  ], \n  \"node_row_ptr\": [\n    0, \n    1, \n    2, \n    3, \n    4, \n    5, \n    6, \n    7, \n    8, \n    9, \n    10, \n    11, \n    12, \n    13, \n    14, \n    15, \n    16, \n    17, \n    18, \n    19, \n    20, \n    21, \n    22, \n    23, \n    24, \n    25, \n    26, \n    27, \n    28, \n    29, \n    30, \n    31, \n    32, \n    33, \n    34, \n    35, \n    36, \n    37, \n    38, \n    39, \n    40, \n    41, \n    42, \n    43, \n    44, \n    45, \n    46, \n    47, \n    48, \n    49, \n    50, \n    51, \n    52, \n    53, \n    54, \n    55, \n    56, \n    57, \n    58, \n    59, \n    60, \n    61, \n    62, \n    63, \n    64, \n    65, \n    66, \n    67, \n    68, \n    69, \n    70, \n    71, \n    72, \n    73, \n    74, \n    75, \n    76, \n    77, \n    78, \n    79, \n    80, \n    81, \n    82, \n    83, \n    84, \n    85, \n    86, \n    87, \n    88, \n    89, \n    90, \n    91, \n    92, \n    93, \n    94, \n    95, \n    96, \n    97, \n    98, \n    99, \n    100, \n    101, \n    102, \n    103, \n    104, \n    105, \n    106, \n    107, \n    108, \n    109, \n    110, \n    111, \n    112, \n    113, \n    114, \n    115, \n    116, \n    117, \n    118, \n    119, \n    120, \n    121, \n    122, \n    123, \n    124, \n    125, \n    126, \n    127, \n    128, \n    129, \n    130, \n    131, \n    132, \n    133, \n    134, \n    135, \n    136, \n    137, \n    138, \n    139, \n    140, \n    141, \n    142, \n    143, \n    144, \n    145, \n    146, \n    147, \n    148, \n    149, \n    150, \n    151, \n    152, \n    153, \n    154, \n    155, \n    156, \n    157, \n    158, \n    159, \n    160, \n    161, \n    162, \n    163, \n    164, \n    165, \n    166, \n    167\n  ], \n  \"heads\": [[82, 0, 0], [85, 0, 0], [88, 0, 0], [113, 0, 0], [116, 0, 0], [119, 0, 0], [160, 0, 0], [163, 0, 0], [166, 0, 0]], \n  \"attrs\": {\"mxnet_version\": [\"int\", 10300]}\n}"
  },
  {
    "path": "rcnn/__init__.py",
    "content": ""
  },
  {
    "path": "rcnn/cython/.gitignore",
    "content": "*.c\n*.cpp\n*.so\n"
  },
  {
    "path": "rcnn/cython/__init__.py",
    "content": ""
  },
  {
    "path": "rcnn/cython/anchors.pyx",
    "content": "cimport cython\nimport numpy as np\ncimport numpy as np\n\nDTYPE = np.float32\nctypedef np.float32_t DTYPE_t\n\ndef anchors_cython(int height, int width, int stride, np.ndarray[DTYPE_t, ndim=2] base_anchors):\n    \"\"\"\n    Parameters\n    ----------\n    height: height of plane\n    width:  width of plane\n    stride: stride ot the original image\n    anchors_base: (A, 4) a base set of anchors\n    Returns\n    -------\n    all_anchors: (height, width, A, 4) ndarray of anchors spreading over the plane\n    \"\"\"\n    cdef unsigned int A = base_anchors.shape[0]\n    cdef np.ndarray[DTYPE_t, ndim=4] all_anchors = np.zeros((height, width, A, 4), dtype=DTYPE)\n    cdef unsigned int iw, ih\n    cdef unsigned int k\n    cdef unsigned int sh\n    cdef unsigned int sw\n    for iw in range(width):\n        sw = iw * stride\n        for ih in range(height):\n            sh = ih * stride\n            for k in range(A):\n                all_anchors[ih, iw, k, 0] = base_anchors[k, 0] + sw\n                all_anchors[ih, iw, k, 1] = base_anchors[k, 1] + sh\n                all_anchors[ih, iw, k, 2] = base_anchors[k, 2] + sw\n                all_anchors[ih, iw, k, 3] = base_anchors[k, 3] + sh\n    return all_anchors"
  },
  {
    "path": "rcnn/cython/bbox.pyx",
    "content": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under The MIT License [see LICENSE for details]\n# Written by Sergey Karayev\n# --------------------------------------------------------\n\ncimport cython\nimport numpy as np\ncimport numpy as np\n\nDTYPE = np.float\nctypedef np.float_t DTYPE_t\n\ndef bbox_overlaps_cython(\n        np.ndarray[DTYPE_t, ndim=2] boxes,\n        np.ndarray[DTYPE_t, ndim=2] query_boxes):\n    \"\"\"\n    Parameters\n    ----------\n    boxes: (N, 4) ndarray of float\n    query_boxes: (K, 4) ndarray of float\n    Returns\n    -------\n    overlaps: (N, K) ndarray of overlap between boxes and query_boxes\n    \"\"\"\n    cdef unsigned int N = boxes.shape[0]\n    cdef unsigned int K = query_boxes.shape[0]\n    cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)\n    cdef DTYPE_t iw, ih, box_area\n    cdef DTYPE_t ua\n    cdef unsigned int k, n\n    for k in range(K):\n        box_area = (\n            (query_boxes[k, 2] - query_boxes[k, 0] + 1) *\n            (query_boxes[k, 3] - query_boxes[k, 1] + 1)\n        )\n        for n in range(N):\n            iw = (\n                min(boxes[n, 2], query_boxes[k, 2]) -\n                max(boxes[n, 0], query_boxes[k, 0]) + 1\n            )\n            if iw > 0:\n                ih = (\n                    min(boxes[n, 3], query_boxes[k, 3]) -\n                    max(boxes[n, 1], query_boxes[k, 1]) + 1\n                )\n                if ih > 0:\n                    ua = float(\n                        (boxes[n, 2] - boxes[n, 0] + 1) *\n                        (boxes[n, 3] - boxes[n, 1] + 1) +\n                        box_area - iw * ih\n                    )\n                    overlaps[n, k] = iw * ih / ua\n    return overlaps\n"
  },
  {
    "path": "rcnn/cython/cpu_nms.pyx",
    "content": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under The MIT License [see LICENSE for details]\n# Written by Ross Girshick\n# --------------------------------------------------------\n\nimport numpy as np\ncimport numpy as np\n\ncdef inline np.float32_t max(np.float32_t a, np.float32_t b):\n    return a if a >= b else b\n\ncdef inline np.float32_t min(np.float32_t a, np.float32_t b):\n    return a if a <= b else b\n\ndef cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):\n    cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]\n    cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]\n    cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]\n    cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]\n    cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]\n\n    cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)\n    cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]\n\n    cdef int ndets = dets.shape[0]\n    cdef np.ndarray[np.int_t, ndim=1] suppressed = \\\n            np.zeros((ndets), dtype=np.int)\n\n    # nominal indices\n    cdef int _i, _j\n    # sorted indices\n    cdef int i, j\n    # temp variables for box i's (the box currently under consideration)\n    cdef np.float32_t ix1, iy1, ix2, iy2, iarea\n    # variables for computing overlap with box j (lower scoring box)\n    cdef np.float32_t xx1, yy1, xx2, yy2\n    cdef np.float32_t w, h\n    cdef np.float32_t inter, ovr\n\n    keep = []\n    for _i in range(ndets):\n        i = order[_i]\n        if suppressed[i] == 1:\n            continue\n        keep.append(i)\n        ix1 = x1[i]\n        iy1 = y1[i]\n        ix2 = x2[i]\n        iy2 = y2[i]\n        iarea = areas[i]\n        for _j in range(_i + 1, ndets):\n            j = order[_j]\n            if suppressed[j] == 1:\n                continue\n            xx1 = max(ix1, x1[j])\n            yy1 = max(iy1, y1[j])\n            xx2 = min(ix2, x2[j])\n            yy2 = min(iy2, y2[j])\n            w = max(0.0, xx2 - xx1 + 1)\n            h = max(0.0, yy2 - yy1 + 1)\n            inter = w * h\n            ovr = inter / (iarea + areas[j] - inter)\n            if ovr >= thresh:\n                suppressed[j] = 1\n\n    return keep\n"
  },
  {
    "path": "rcnn/cython/gpu_nms.hpp",
    "content": "void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,\n          int boxes_dim, float nms_overlap_thresh, int device_id);\n"
  },
  {
    "path": "rcnn/cython/gpu_nms.pyx",
    "content": "# --------------------------------------------------------\n# Faster R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under The MIT License [see LICENSE for details]\n# Written by Ross Girshick\n# --------------------------------------------------------\n\nimport numpy as np\ncimport numpy as np\n\nassert sizeof(int) == sizeof(np.int32_t)\n\ncdef extern from \"gpu_nms.hpp\":\n    void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)\n\ndef gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,\n            np.int32_t device_id=0):\n    cdef int boxes_num = dets.shape[0]\n    cdef int boxes_dim = dets.shape[1]\n    cdef int num_out\n    cdef np.ndarray[np.int32_t, ndim=1] \\\n        keep = np.zeros(boxes_num, dtype=np.int32)\n    cdef np.ndarray[np.float32_t, ndim=1] \\\n        scores = dets[:, 4]\n    cdef np.ndarray[np.int_t, ndim=1] \\\n        order = scores.argsort()[::-1]\n    cdef np.ndarray[np.float32_t, ndim=2] \\\n        sorted_dets = dets[order, :]\n    _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)\n    keep = keep[:num_out]\n    return list(order[keep])\n"
  },
  {
    "path": "rcnn/cython/nms_kernel.cu",
    "content": "// ------------------------------------------------------------------\n// Faster R-CNN\n// Copyright (c) 2015 Microsoft\n// Licensed under The MIT License [see fast-rcnn/LICENSE for details]\n// Written by Shaoqing Ren\n// ------------------------------------------------------------------\n\n#include \"gpu_nms.hpp\"\n#include <vector>\n#include <iostream>\n\n#define CUDA_CHECK(condition) \\\n  /* Code block avoids redefinition of cudaError_t error */ \\\n  do { \\\n    cudaError_t error = condition; \\\n    if (error != cudaSuccess) { \\\n      std::cout << cudaGetErrorString(error) << std::endl; \\\n    } \\\n  } while (0)\n\n#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))\nint const threadsPerBlock = sizeof(unsigned long long) * 8;\n\n__device__ inline float devIoU(float const * const a, float const * const b) {\n  float left = max(a[0], b[0]), right = min(a[2], b[2]);\n  float top = max(a[1], b[1]), bottom = min(a[3], b[3]);\n  float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);\n  float interS = width * height;\n  float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);\n  float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);\n  return interS / (Sa + Sb - interS);\n}\n\n__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,\n                           const float *dev_boxes, unsigned long long *dev_mask) {\n  const int row_start = blockIdx.y;\n  const int col_start = blockIdx.x;\n\n  // if (row_start > col_start) return;\n\n  const int row_size =\n        min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);\n  const int col_size =\n        min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);\n\n  __shared__ float block_boxes[threadsPerBlock * 5];\n  if (threadIdx.x < col_size) {\n    block_boxes[threadIdx.x * 5 + 0] =\n        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];\n    block_boxes[threadIdx.x * 5 + 1] =\n        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];\n    block_boxes[threadIdx.x * 5 + 2] =\n        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];\n    block_boxes[threadIdx.x * 5 + 3] =\n        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];\n    block_boxes[threadIdx.x * 5 + 4] =\n        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];\n  }\n  __syncthreads();\n\n  if (threadIdx.x < row_size) {\n    const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;\n    const float *cur_box = dev_boxes + cur_box_idx * 5;\n    int i = 0;\n    unsigned long long t = 0;\n    int start = 0;\n    if (row_start == col_start) {\n      start = threadIdx.x + 1;\n    }\n    for (i = start; i < col_size; i++) {\n      if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {\n        t |= 1ULL << i;\n      }\n    }\n    const int col_blocks = DIVUP(n_boxes, threadsPerBlock);\n    dev_mask[cur_box_idx * col_blocks + col_start] = t;\n  }\n}\n\nvoid _set_device(int device_id) {\n  int current_device;\n  CUDA_CHECK(cudaGetDevice(&current_device));\n  if (current_device == device_id) {\n    return;\n  }\n  // The call to cudaSetDevice must come before any calls to Get, which\n  // may perform initialization using the GPU.\n  CUDA_CHECK(cudaSetDevice(device_id));\n}\n\nvoid _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,\n          int boxes_dim, float nms_overlap_thresh, int device_id) {\n  _set_device(device_id);\n\n  float* boxes_dev = NULL;\n  unsigned long long* mask_dev = NULL;\n\n  const int col_blocks = DIVUP(boxes_num, threadsPerBlock);\n\n  CUDA_CHECK(cudaMalloc(&boxes_dev,\n                        boxes_num * boxes_dim * sizeof(float)));\n  CUDA_CHECK(cudaMemcpy(boxes_dev,\n                        boxes_host,\n                        boxes_num * boxes_dim * sizeof(float),\n                        cudaMemcpyHostToDevice));\n\n  CUDA_CHECK(cudaMalloc(&mask_dev,\n                        boxes_num * col_blocks * sizeof(unsigned long long)));\n\n  dim3 blocks(DIVUP(boxes_num, threadsPerBlock),\n              DIVUP(boxes_num, threadsPerBlock));\n  dim3 threads(threadsPerBlock);\n  nms_kernel<<<blocks, threads>>>(boxes_num,\n                                  nms_overlap_thresh,\n                                  boxes_dev,\n                                  mask_dev);\n\n  std::vector<unsigned long long> mask_host(boxes_num * col_blocks);\n  CUDA_CHECK(cudaMemcpy(&mask_host[0],\n                        mask_dev,\n                        sizeof(unsigned long long) * boxes_num * col_blocks,\n                        cudaMemcpyDeviceToHost));\n\n  std::vector<unsigned long long> remv(col_blocks);\n  memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);\n\n  int num_to_keep = 0;\n  for (int i = 0; i < boxes_num; i++) {\n    int nblock = i / threadsPerBlock;\n    int inblock = i % threadsPerBlock;\n\n    if (!(remv[nblock] & (1ULL << inblock))) {\n      keep_out[num_to_keep++] = i;\n      unsigned long long *p = &mask_host[0] + i * col_blocks;\n      for (int j = nblock; j < col_blocks; j++) {\n        remv[j] |= p[j];\n      }\n    }\n  }\n  *num_out = num_to_keep;\n\n  CUDA_CHECK(cudaFree(boxes_dev));\n  CUDA_CHECK(cudaFree(mask_dev));\n}\n"
  },
  {
    "path": "rcnn/cython/setup.py",
    "content": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under The MIT License [see LICENSE for details]\n# Written by Ross Girshick\n# --------------------------------------------------------\n\nimport os\nfrom os.path import join as pjoin\nfrom setuptools import setup\nfrom distutils.extension import Extension\nfrom Cython.Distutils import build_ext\nimport numpy as np\n\n\ndef find_in_path(name, path):\n    \"Find a file in a search path\"\n    # Adapted fom\n    # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/\n    for dir in path.split(os.pathsep):\n        binpath = pjoin(dir, name)\n        if os.path.exists(binpath):\n            return os.path.abspath(binpath)\n    return None\n\n\ndef locate_cuda():\n    \"\"\"Locate the CUDA environment on the system\n\n    Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'\n    and values giving the absolute path to each directory.\n\n    Starts by looking for the CUDAHOME env variable. If not found, everything\n    is based on finding 'nvcc' in the PATH.\n    \"\"\"\n\n    # first check if the CUDAHOME env variable is in use\n    if 'CUDAHOME' in os.environ:\n        home = os.environ['CUDAHOME']\n        nvcc = pjoin(home, 'bin', 'nvcc')\n    else:\n        # otherwise, search the PATH for NVCC\n        default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')\n        nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)\n        if nvcc is None:\n            raise EnvironmentError('The nvcc binary could not be '\n                'located in your $PATH. Either add it to your path, or set $CUDAHOME')\n        home = os.path.dirname(os.path.dirname(nvcc))\n\n    cudaconfig = {'home':home, 'nvcc':nvcc,\n                  'include': pjoin(home, 'include'),\n                  'lib64': pjoin(home, 'lib64')}\n    for k, v in cudaconfig.items():\n        if not os.path.exists(v):\n            raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))\n\n    return cudaconfig\n\n\n# Test if cuda could be foun\ntry:\n    CUDA = locate_cuda()\nexcept EnvironmentError:\n    CUDA = None\n\n\n# Obtain the numpy include directory.  This logic works across numpy versions.\ntry:\n    numpy_include = np.get_include()\nexcept AttributeError:\n    numpy_include = np.get_numpy_include()\n\n\ndef customize_compiler_for_nvcc(self):\n    \"\"\"inject deep into distutils to customize how the dispatch\n    to gcc/nvcc works.\n\n    If you subclass UnixCCompiler, it's not trivial to get your subclass\n    injected in, and still have the right customizations (i.e.\n    distutils.sysconfig.customize_compiler) run on it. So instead of going\n    the OO route, I have this. Note, it's kindof like a wierd functional\n    subclassing going on.\"\"\"\n\n    # tell the compiler it can processes .cu\n    self.src_extensions.append('.cu')\n\n    # save references to the default compiler_so and _comple methods\n    default_compiler_so = self.compiler_so\n    super = self._compile\n\n    # now redefine the _compile method. This gets executed for each\n    # object but distutils doesn't have the ability to change compilers\n    # based on source extension: we add it.\n    def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):\n        if os.path.splitext(src)[1] == '.cu':\n            # use the cuda for .cu files\n            self.set_executable('compiler_so', CUDA['nvcc'])\n            # use only a subset of the extra_postargs, which are 1-1 translated\n            # from the extra_compile_args in the Extension class\n            postargs = extra_postargs['nvcc']\n        else:\n            postargs = extra_postargs['gcc']\n\n        super(obj, src, ext, cc_args, postargs, pp_opts)\n        # reset the default compiler_so, which we might have changed for cuda\n        self.compiler_so = default_compiler_so\n\n    # inject our redefined _compile method into the class\n    self._compile = _compile\n\n\n# run the customize_compiler\nclass custom_build_ext(build_ext):\n    def build_extensions(self):\n        customize_compiler_for_nvcc(self.compiler)\n        build_ext.build_extensions(self)\n\n\next_modules = [\n    Extension(\n        \"bbox\",\n        [\"bbox.pyx\"],\n        extra_compile_args={'gcc': [\"-Wno-cpp\", \"-Wno-unused-function\"]},\n        include_dirs=[numpy_include]\n    ),\n    Extension(\n        \"anchors\",\n        [\"anchors.pyx\"],\n        extra_compile_args={'gcc': [\"-Wno-cpp\", \"-Wno-unused-function\"]},\n        include_dirs=[numpy_include]\n    ),\n    Extension(\n        \"cpu_nms\",\n        [\"cpu_nms.pyx\"],\n        extra_compile_args={'gcc': [\"-Wno-cpp\", \"-Wno-unused-function\"]},\n        include_dirs = [numpy_include]\n    ),\n]\n\nif CUDA is not None:\n    ext_modules.append(\n        Extension('gpu_nms',\n            ['nms_kernel.cu', 'gpu_nms.pyx'],\n            library_dirs=[CUDA['lib64']],\n            libraries=['cudart'],\n            language='c++',\n            runtime_library_dirs=[CUDA['lib64']],\n            # this syntax is specific to this build system\n            # we're only going to use certain compiler args with nvcc and not with\n            # gcc the implementation of this trick is in customize_compiler() below\n            extra_compile_args={'gcc': [\"-Wno-unused-function\"],\n                                'nvcc': ['-arch=sm_35',\n                                         '--ptxas-options=-v',\n                                         '-c',\n                                         '--compiler-options',\n                                         \"'-fPIC'\"]},\n            include_dirs = [numpy_include, CUDA['include']]\n        )\n    )\nelse:\n    print('Skipping GPU_NMS')\n\n\nsetup(\n    name='frcnn_cython',\n    ext_modules=ext_modules,\n    # inject our custom trigger\n    cmdclass={'build_ext': custom_build_ext},\n)\n"
  },
  {
    "path": "rcnn/processing/__init__.py",
    "content": ""
  },
  {
    "path": "rcnn/processing/bbox_regression.py",
    "content": "\"\"\"\nThis file has functions about generating bounding box regression targets\n\"\"\"\n\nimport numpy as np\n\nfrom ..logger import logger\nfrom .bbox_transform import bbox_overlaps, bbox_transform\nfrom rcnn.config import config\n\n\ndef compute_bbox_regression_targets(rois, overlaps, labels):\n    \"\"\"\n    given rois, overlaps, gt labels, compute bounding box regression targets\n    :param rois: roidb[i]['boxes'] k * 4\n    :param overlaps: roidb[i]['max_overlaps'] k * 1\n    :param labels: roidb[i]['max_classes'] k * 1\n    :return: targets[i][class, dx, dy, dw, dh] k * 5\n    \"\"\"\n    # Ensure ROIs are floats\n    rois = rois.astype(np.float, copy=False)\n\n    # Sanity check\n    if len(rois) != len(overlaps):\n        logger.warning('bbox regression: len(rois) != len(overlaps)')\n\n    # Indices of ground-truth ROIs\n    gt_inds = np.where(overlaps == 1)[0]\n    if len(gt_inds) == 0:\n        logger.warning('bbox regression: len(gt_inds) == 0')\n\n    # Indices of examples for which we try to make predictions\n    ex_inds = np.where(overlaps >= config.TRAIN.BBOX_REGRESSION_THRESH)[0]\n\n    # Get IoU overlap between each ex ROI and gt ROI\n    ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :])\n\n    # Find which gt ROI each ex ROI has max overlap with:\n    # this will be the ex ROI's gt target\n    gt_assignment = ex_gt_overlaps.argmax(axis=1)\n    gt_rois = rois[gt_inds[gt_assignment], :]\n    ex_rois = rois[ex_inds, :]\n\n    targets = np.zeros((rois.shape[0], 5), dtype=np.float32)\n    targets[ex_inds, 0] = labels[ex_inds]\n    targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)\n    return targets\n\n\ndef add_bbox_regression_targets(roidb):\n    \"\"\"\n    given roidb, add ['bbox_targets'] and normalize bounding box regression targets\n    :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb\n    :return: means, std variances of targets\n    \"\"\"\n    logger.info('bbox regression: add bounding box regression targets')\n    assert len(roidb) > 0\n    assert 'max_classes' in roidb[0]\n\n    num_images = len(roidb)\n    num_classes = roidb[0]['gt_overlaps'].shape[1]\n    for im_i in range(num_images):\n        rois = roidb[im_i]['boxes']\n        max_overlaps = roidb[im_i]['max_overlaps']\n        max_classes = roidb[im_i]['max_classes']\n        roidb[im_i]['bbox_targets'] = compute_bbox_regression_targets(rois, max_overlaps, max_classes)\n\n    if config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:\n        # use fixed / precomputed means and stds instead of empirical values\n        means = np.tile(np.array(config.TRAIN.BBOX_MEANS), (num_classes, 1))\n        stds = np.tile(np.array(config.TRAIN.BBOX_STDS), (num_classes, 1))\n    else:\n        # compute mean, std values\n        class_counts = np.zeros((num_classes, 1)) + 1e-14\n        sums = np.zeros((num_classes, 4))\n        squared_sums = np.zeros((num_classes, 4))\n        for im_i in range(num_images):\n            targets = roidb[im_i]['bbox_targets']\n            for cls in range(1, num_classes):\n                cls_indexes = np.where(targets[:, 0] == cls)[0]\n                if cls_indexes.size > 0:\n                    class_counts[cls] += cls_indexes.size\n                    sums[cls, :] += targets[cls_indexes, 1:].sum(axis=0)\n                    squared_sums[cls, :] += (targets[cls_indexes, 1:] ** 2).sum(axis=0)\n\n        means = sums / class_counts\n        # var(x) = E(x^2) - E(x)^2\n        stds = np.sqrt(squared_sums / class_counts - means ** 2)\n\n    # normalized targets\n    for im_i in range(num_images):\n        targets = roidb[im_i]['bbox_targets']\n        for cls in range(1, num_classes):\n            cls_indexes = np.where(targets[:, 0] == cls)[0]\n            roidb[im_i]['bbox_targets'][cls_indexes, 1:] -= means[cls, :]\n            roidb[im_i]['bbox_targets'][cls_indexes, 1:] /= stds[cls, :]\n\n    return means.ravel(), stds.ravel()\n\n\ndef expand_bbox_regression_targets(bbox_targets_data, num_classes):\n    \"\"\"\n    expand from 5 to 4 * num_classes; only the right class has non-zero bbox regression targets\n    :param bbox_targets_data: [k * 5]\n    :param num_classes: number of classes\n    :return: bbox target processed [k * 4 num_classes]\n    bbox_weights ! only foreground boxes have bbox regression computation!\n    \"\"\"\n    classes = bbox_targets_data[:, 0]\n    bbox_targets = np.zeros((classes.size, 4 * num_classes), dtype=np.float32)\n    bbox_weights = np.zeros(bbox_targets.shape, dtype=np.float32)\n    indexes = np.where(classes > 0)[0]\n    for index in indexes:\n        cls = classes[index]\n        start = int(4 * cls)\n        end = start + 4\n        bbox_targets[index, start:end] = bbox_targets_data[index, 1:]\n        bbox_weights[index, start:end] = config.TRAIN.BBOX_WEIGHTS\n    return bbox_targets, bbox_weights\n\n"
  },
  {
    "path": "rcnn/processing/bbox_transform.py",
    "content": "import numpy as np\nfrom ..cython.bbox import bbox_overlaps_cython\n\n\ndef bbox_overlaps(boxes, query_boxes):\n    return bbox_overlaps_cython(boxes, query_boxes)\n\n\ndef bbox_overlaps_py(boxes, query_boxes):\n    \"\"\"\n    determine overlaps between boxes and query_boxes\n    :param boxes: n * 4 bounding boxes\n    :param query_boxes: k * 4 bounding boxes\n    :return: overlaps: n * k overlaps\n    \"\"\"\n    n_ = boxes.shape[0]\n    k_ = query_boxes.shape[0]\n    overlaps = np.zeros((n_, k_), dtype=np.float)\n    for k in range(k_):\n        query_box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1)\n        for n in range(n_):\n            iw = min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1\n            if iw > 0:\n                ih = min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1\n                if ih > 0:\n                    box_area = (boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1)\n                    all_area = float(box_area + query_box_area - iw * ih)\n                    overlaps[n, k] = iw * ih / all_area\n    return overlaps\n\n\ndef clip_boxes(boxes, im_shape):\n    \"\"\"\n    Clip boxes to image boundaries.\n    :param boxes: [N, 4* num_classes]\n    :param im_shape: tuple of 2\n    :return: [N, 4* num_classes]\n    \"\"\"\n    # x1 >= 0\n    boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)\n    # y1 >= 0\n    boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)\n    # x2 < im_shape[1]\n    boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)\n    # y2 < im_shape[0]\n    boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)\n    return boxes\n\ndef clip_points(points, im_shape):\n    \"\"\"\n    Clip boxes to image boundaries.\n    :param boxes: [N, 4* num_classes]\n    :param im_shape: tuple of 2\n    :return: [N, 4* num_classes]\n    \"\"\"\n\n    points[:, 0::10] = np.maximum(np.minimum(points[:, 0::10], im_shape[1] - 1), 0)\n    points[:, 1::10] = np.maximum(np.minimum(points[:, 1::10], im_shape[0] - 1), 0)\n    points[:, 2::10] = np.maximum(np.minimum(points[:, 2::10], im_shape[1] - 1), 0)\n    points[:, 3::10] = np.maximum(np.minimum(points[:, 3::10], im_shape[0] - 1), 0)\n    points[:, 4::10] = np.maximum(np.minimum(points[:, 4::10], im_shape[1] - 1), 0)\n    points[:, 5::10] = np.maximum(np.minimum(points[:, 5::10], im_shape[0] - 1), 0)\n    points[:, 6::10] = np.maximum(np.minimum(points[:, 6::10], im_shape[1] - 1), 0)\n    points[:, 7::10] = np.maximum(np.minimum(points[:, 7::10], im_shape[0] - 1), 0)\n    points[:, 8::10] = np.maximum(np.minimum(points[:, 8::10], im_shape[1] - 1), 0)\n    points[:, 9::10] = np.maximum(np.minimum(points[:, 9::10], im_shape[0] - 1), 0)\n\n    return points\n\ndef nonlinear_transform(ex_rois, gt_rois):\n    \"\"\"\n    compute bounding box regression targets from ex_rois to gt_rois\n    :param ex_rois: [N, 4]\n    :param gt_rois: [N, 4]\n    :return: [N, 4]\n    \"\"\"\n    assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number'\n\n    ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0\n    ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0\n    ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0)\n    ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0)\n\n    gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0\n    gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0\n    gt_ctr_x = gt_rois[:, 0] + 0.5 * (gt_widths - 1.0)\n    gt_ctr_y = gt_rois[:, 1] + 0.5 * (gt_heights - 1.0)\n\n    targets_dx = (gt_ctr_x - ex_ctr_x) / (ex_widths + 1e-14)\n    targets_dy = (gt_ctr_y - ex_ctr_y) / (ex_heights + 1e-14)\n    targets_dw = np.log(gt_widths / ex_widths)\n    targets_dh = np.log(gt_heights / ex_heights)\n\n    targets = np.vstack(\n        (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()\n    return targets\n\n\ndef nonlinear_pred(boxes, box_deltas):\n    \"\"\"\n    Transform the set of class-agnostic boxes into class-specific boxes\n    by applying the predicted offsets (box_deltas)\n    :param boxes: !important [N 4]\n    :param box_deltas: [N, 4 * num_classes]\n    :return: [N 4 * num_classes]\n    \"\"\"\n    if boxes.shape[0] == 0:\n        return np.zeros((0, box_deltas.shape[1]))\n\n    boxes = boxes.astype(np.float, copy=False)\n    widths = boxes[:, 2] - boxes[:, 0] + 1.0\n    heights = boxes[:, 3] - boxes[:, 1] + 1.0\n    ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0)\n    ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0)\n\n    dx = box_deltas[:, 0::4]\n    dy = box_deltas[:, 1::4]\n    dw = box_deltas[:, 2::4]\n    dh = box_deltas[:, 3::4]\n\n    pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]\n    pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]\n    pred_w = np.exp(dw) * widths[:, np.newaxis]\n    pred_h = np.exp(dh) * heights[:, np.newaxis]\n\n    pred_boxes = np.zeros(box_deltas.shape)\n    # x1\n    pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * (pred_w - 1.0)\n    # y1\n    pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * (pred_h - 1.0)\n    # x2\n    pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * (pred_w - 1.0)\n    # y2\n    pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * (pred_h - 1.0)\n\n    return pred_boxes\n\ndef kpoint_pred(boxes, point_deltas):\n    \"\"\"\n    Transform the set of class-agnostic boxes into class-specific boxes\n    by applying the predicted offsets (box_deltas)\n    :param boxes: !important [N 4]\n    :param box_deltas: [N, 4 * num_classes]\n    :return: [N 4 * num_classes]\n    \"\"\"\n    if boxes.shape[0] == 0:\n        return np.zeros((0, point_deltas.shape[1]))\n\n    boxes = boxes.astype(np.float, copy=False)\n    widths = boxes[:, 2] - boxes[:, 0] + 1.0\n    heights = boxes[:, 3] - boxes[:, 1] + 1.0\n    ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0)\n    ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0)\n\n    d1x = point_deltas[:, 0]\n    d1y = point_deltas[:, 1]\n    d2x = point_deltas[:, 2]\n    d2y = point_deltas[:, 3]\n    d3x = point_deltas[:, 4]\n    d3y = point_deltas[:, 5]\n    d4x = point_deltas[:, 6]\n    d4y = point_deltas[:, 7]\n    d5x = point_deltas[:, 8]\n    d5y = point_deltas[:, 9]\n\n\n    pred_points = np.zeros(point_deltas.shape)\n    # x1\n    x = d1x * widths\n    # print(\"aa\", d1x.shape, widths.shape, ctr_x.shape, x.shape)\n    pred_points[:, 0] = d1x * widths + ctr_x\n    pred_points[:, 1] = d1y * heights + ctr_y\n    pred_points[:, 2] = d2x * widths + ctr_x\n    pred_points[:, 3] = d2y * heights + ctr_y\n    pred_points[:, 4] = d3x * widths + ctr_x\n    pred_points[:, 5] = d3y * heights + ctr_y\n    pred_points[:, 6] = d4x * widths + ctr_x\n    pred_points[:, 7] = d4y * heights + ctr_y\n    pred_points[:, 8] = d5x * widths + ctr_x\n    pred_points[:, 9] = d5y * heights + ctr_y\n\n    return pred_points\n\ndef iou_transform(ex_rois, gt_rois):\n    \"\"\" return bbox targets, IoU loss uses gt_rois as gt \"\"\"\n    assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number'\n    return gt_rois\n\n\ndef iou_pred(boxes, box_deltas):\n    \"\"\"\n    Transform the set of class-agnostic boxes into class-specific boxes\n    by applying the predicted offsets (box_deltas)\n    :param boxes: !important [N 4]\n    :param box_deltas: [N, 4 * num_classes]\n    :return: [N 4 * num_classes]\n    \"\"\"\n    if boxes.shape[0] == 0:\n        return np.zeros((0, box_deltas.shape[1]))\n\n    boxes = boxes.astype(np.float, copy=False)\n    x1 = boxes[:, 0]\n    y1 = boxes[:, 1]\n    x2 = boxes[:, 2]\n    y2 = boxes[:, 3]\n\n    dx1 = box_deltas[:, 0::4]\n    dy1 = box_deltas[:, 1::4]\n    dx2 = box_deltas[:, 2::4]\n    dy2 = box_deltas[:, 3::4]\n\n    pred_boxes = np.zeros(box_deltas.shape)\n    # x1\n    pred_boxes[:, 0::4] = dx1 + x1[:, np.newaxis]\n    # y1\n    pred_boxes[:, 1::4] = dy1 + y1[:, np.newaxis]\n    # x2\n    pred_boxes[:, 2::4] = dx2 + x2[:, np.newaxis]\n    # y2\n    pred_boxes[:, 3::4] = dy2 + y2[:, np.newaxis]\n\n    return pred_boxes\n\n\n# define bbox_transform and bbox_pred\nbbox_transform = nonlinear_transform\nbbox_pred = nonlinear_pred\n"
  },
  {
    "path": "rcnn/processing/generate_anchor.py",
    "content": "\"\"\"\nGenerate base anchors on index 0\n\"\"\"\nfrom __future__ import print_function\nimport sys\n#from builtins import range\nimport numpy as np\nfrom ..cython.anchors import anchors_cython\n\n\ndef anchors_plane(feat_h, feat_w, stride, base_anchor):\n    return anchors_cython(feat_h, feat_w, stride, base_anchor)\n\ndef generate_anchors(base_size=16, ratios=[0.5, 1, 2],\n                     scales=2 ** np.arange(3, 6)):\n    \"\"\"\n    Generate anchor (reference) windows by enumerating aspect ratios X\n    scales wrt a reference (0, 0, 15, 15) window.\n    \"\"\"\n\n    base_anchor = np.array([1, 1, base_size, base_size]) - 1\n    ratio_anchors = _ratio_enum(base_anchor, ratios)\n    anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)\n                         for i in range(ratio_anchors.shape[0])])\n    return anchors\n\ndef generate_anchors_fpn(base_size=[64,32,16,8,4], ratios=[0.5, 1, 2],\n                     scales=8):\n    \"\"\"\n    Generate anchor (reference) windows by enumerating aspect ratios X\n    scales wrt a reference (0, 0, 15, 15) window.\n    \"\"\"\n    anchors = []\n    _ratios = ratios.reshape( (len(base_size), -1) )\n    _scales = scales.reshape( (len(base_size), -1) )\n    for i,bs in enumerate(base_size):\n      __ratios = _ratios[i]\n      __scales = _scales[i]\n      #print('anchors_fpn', bs, __ratios, __scales, file=sys.stderr)\n      r = generate_anchors(bs, __ratios, __scales)\n      #print('anchors_fpn', r.shape, file=sys.stderr)\n      anchors.append(r)\n\n    return anchors\n\ndef _whctrs(anchor):\n    \"\"\"\n    Return width, height, x center, and y center for an anchor (window).\n    \"\"\"\n\n    w = anchor[2] - anchor[0] + 1\n    h = anchor[3] - anchor[1] + 1\n    x_ctr = anchor[0] + 0.5 * (w - 1)\n    y_ctr = anchor[1] + 0.5 * (h - 1)\n    return w, h, x_ctr, y_ctr\n\n\ndef _mkanchors(ws, hs, x_ctr, y_ctr):\n    \"\"\"\n    Given a vector of widths (ws) and heights (hs) around a center\n    (x_ctr, y_ctr), output a set of anchors (windows).\n    \"\"\"\n\n    ws = ws[:, np.newaxis]\n    hs = hs[:, np.newaxis]\n    anchors = np.hstack((x_ctr - 0.5 * (ws - 1),\n                         y_ctr - 0.5 * (hs - 1),\n                         x_ctr + 0.5 * (ws - 1),\n                         y_ctr + 0.5 * (hs - 1)))\n    return anchors\n\n\ndef _ratio_enum(anchor, ratios):\n    \"\"\"\n    Enumerate a set of anchors for each aspect ratio wrt an anchor.\n    \"\"\"\n\n    w, h, x_ctr, y_ctr = _whctrs(anchor)\n    size = w * h\n    size_ratios = size / ratios\n    ws = np.round(np.sqrt(size_ratios))\n    hs = np.round(ws * ratios)\n    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)\n    return anchors\n\n\ndef _scale_enum(anchor, scales):\n    \"\"\"\n    Enumerate a set of anchors for each scale wrt an anchor.\n    \"\"\"\n\n    w, h, x_ctr, y_ctr = _whctrs(anchor)\n    ws = w * scales\n    hs = h * scales\n    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)\n    return anchors\n"
  },
  {
    "path": "rcnn/processing/nms.py",
    "content": "import numpy as np\nfrom ..cython.cpu_nms import cpu_nms\ntry:\n    from ..cython.gpu_nms import gpu_nms\nexcept ImportError:\n    gpu_nms = None\n\n\ndef py_nms_wrapper(thresh):\n    def _nms(dets):\n        return nms(dets, thresh)\n    return _nms\n\n\ndef cpu_nms_wrapper(thresh):\n    def _nms(dets):\n        return cpu_nms(dets, thresh)\n    return _nms\n\n\ndef gpu_nms_wrapper(thresh, device_id):\n    def _nms(dets):\n        return gpu_nms(dets[:,0:5], thresh, device_id)\n    if gpu_nms is not None:\n        return _nms\n    else:\n        return cpu_nms_wrapper(thresh)\n\n\ndef nms(dets, thresh):\n    \"\"\"\n    greedily select boxes with high confidence and overlap with current maximum <= thresh\n    rule out overlap >= thresh\n    :param dets: [[x1, y1, x2, y2 score]]\n    :param thresh: retain overlap < thresh\n    :return: indexes to keep\n    \"\"\"\n    x1 = dets[:, 0]\n    y1 = dets[:, 1]\n    x2 = dets[:, 2]\n    y2 = dets[:, 3]\n    scores = dets[:, 4]\n\n    areas = (x2 - x1 + 1) * (y2 - y1 + 1)\n    order = scores.argsort()[::-1]\n\n    keep = []\n    while order.size > 0:\n        i = order[0]\n        keep.append(i)\n        xx1 = np.maximum(x1[i], x1[order[1:]])\n        yy1 = np.maximum(y1[i], y1[order[1:]])\n        xx2 = np.minimum(x2[i], x2[order[1:]])\n        yy2 = np.minimum(y2[i], y2[order[1:]])\n\n        w = np.maximum(0.0, xx2 - xx1 + 1)\n        h = np.maximum(0.0, yy2 - yy1 + 1)\n        inter = w * h\n        ovr = inter / (areas[i] + areas[order[1:]] - inter)\n\n        inds = np.where(ovr <= thresh)[0]\n        order = order[inds + 1]\n\n    return keep\n"
  },
  {
    "path": "ssha_detector.py",
    "content": "from __future__ import print_function\nimport sys\nimport cv2\nimport mxnet as mx\nfrom mxnet import ndarray as nd\nimport numpy as np\nimport numpy.random as npr\nfrom distutils.util import strtobool\n\nfrom rcnn.processing.bbox_transform import nonlinear_pred, clip_boxes, kpoint_pred, clip_points\nfrom rcnn.processing.generate_anchor import generate_anchors_fpn, anchors_plane\nfrom rcnn.processing.nms import gpu_nms_wrapper\n\n\nclass SSHDetector:\n    def __init__(self, prefix, epoch, ctx_id=0, test_mode=False):\n        self.ctx_id = ctx_id\n        self.ctx = mx.gpu(self.ctx_id)\n        self.fpn_keys = []\n        fpn_stride = []\n        fpn_base_size = []\n        self._feat_stride_fpn = [32, 16, 8]\n\n        for s in self._feat_stride_fpn:\n            self.fpn_keys.append('stride%s' % s)\n            fpn_stride.append(int(s))\n            fpn_base_size.append(16)\n\n        self._scales = np.array([32, 16, 8, 4, 2, 1])\n        self._ratios = np.array([1.0] * len(self._feat_stride_fpn))\n        self._anchors_fpn = dict(\n            zip(self.fpn_keys, generate_anchors_fpn(base_size=fpn_base_size, scales=self._scales, ratios=self._ratios)))\n        self._num_anchors = dict(zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()]))\n        self._rpn_pre_nms_top_n = 1000\n        # self._rpn_post_nms_top_n = rpn_post_nms_top_n\n        # self.score_threshold = 0.05\n        self.nms_threshold = 0.3\n        self._bbox_pred = nonlinear_pred\n        sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)\n        # mx.viz.plot_network(sym).view()\n        self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id)\n        self.pixel_means = np.array([103.939, 116.779, 123.68])  # BGR\n\n        if not test_mode:\n            image_size = (640, 640)\n            self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names=None)\n            self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False)\n            self.model.set_params(arg_params, aux_params)\n        else:\n            from rcnn.core.module import MutableModule\n            image_size = (2400, 2400)\n            data_shape = [('data', (1, 3, image_size[0], image_size[1]))]\n            self.model = MutableModule(symbol=sym, data_names=['data'], label_names=None,\n                                       context=self.ctx, max_data_shapes=data_shape)\n            self.model.bind(data_shape, None, for_training=False)\n            self.model.set_params(arg_params, aux_params)\n\n    def detect(self, img, threshold=0.5, scales=[1.0]):\n        proposals_list = []\n        proposals_kp_list = []\n        scores_list = []\n\n        for im_scale in scales:\n\n            if im_scale != 1.0:\n                im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)\n            else:\n                im = img\n            im = im.astype(np.float32)\n            # self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False)\n            im_info = [im.shape[0], im.shape[1], im_scale]\n            im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1]))\n            for i in range(3):\n                im_tensor[0, i, :, :] = im[:, :, 2 - i] - self.pixel_means[2 - i]\n            data = nd.array(im_tensor)\n            print(\"data.shape: \", data.shape)\n            db = mx.io.DataBatch(data=(data,), provide_data=[('data', data.shape)])\n            self.model.forward(db, is_train=False)\n            net_out = self.model.get_outputs()\n            # print(\"net_out: \", net_out)\n            pre_nms_topN = self._rpn_pre_nms_top_n\n            # post_nms_topN = self._rpn_post_nms_top_n\n            # min_size_dict = self._rpn_min_size_fpn\n\n            for s in self._feat_stride_fpn:\n                if len(scales) > 1 and s == 32 and im_scale == scales[-1]:\n                    continue\n                _key = 'stride%s' % s\n                stride = int(s)\n                idx = 0\n                if s == 16:\n                    idx = 3\n                elif s == 8:\n                    idx = 6\n                print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr)\n\n                # print(\"net_out\", net_out)\n                scores = net_out[idx].asnumpy()\n                # print(scores.shape)\n                idx += 1\n                # print('scores',stride, scores.shape, file=sys.stderr)\n                scores = scores[:, self._num_anchors['stride%s' % s]:, :, :]\n                bbox_deltas = net_out[idx].asnumpy()\n                idx += 1\n\n                # if DEBUG:\n                #    print 'im_size: ({}, {})'.format(im_info[0], im_info[1])\n                #    print 'scale: {}'.format(im_info[2])\n\n                _height, _width = int(im_info[0] / stride), int(im_info[1] / stride)\n                height, width = bbox_deltas.shape[2], bbox_deltas.shape[3]\n\n                # kpoint\n                kpoint_deltas = net_out[idx].asnumpy()\n\n\n                A = self._num_anchors['stride%s' % s]\n                K = height * width\n\n                anchors = anchors_plane(height, width, stride, self._anchors_fpn['stride%s' % s].astype(np.float32))\n                # print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr)\n                anchors = anchors.reshape((K * A, 4))\n                # print('pre', bbox_deltas.shape, height, width)\n                bbox_deltas = self._clip_pad(bbox_deltas, (height, width))\n                # print('after', bbox_deltas.shape, height, width)\n                bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))\n\n                kpoint_deltas = self._clip_pad(kpoint_deltas, (height, width))\n                kpoint_deltas = kpoint_deltas.transpose((0, 2, 3, 1)).reshape((-1, 10))\n\n                scores = self._clip_pad(scores, (height, width))\n                scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))\n\n                # print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr)\n                proposals = self._bbox_pred(anchors, bbox_deltas)\n                # proposals = anchors\n\n                proposals = clip_boxes(proposals, im_info[:2])\n\n                proposals_kp = kpoint_pred(anchors, kpoint_deltas)\n\n                proposals_kp = clip_points(proposals_kp, im_info[:2])\n\n                # keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2])\n                # proposals = proposals[keep, :]\n                # scores = scores[keep]\n                # print('333', proposals.shape)\n\n                scores_ravel = scores.ravel()\n                order = scores_ravel.argsort()[::-1]\n                if pre_nms_topN > 0:\n                    order = order[:pre_nms_topN]\n                proposals = proposals[order, :]\n                proposals_kp = proposals_kp[order, :]\n                scores = scores[order]\n\n                proposals /= im_scale\n                proposals_kp /= im_scale\n\n                proposals_list.append(proposals)\n                proposals_kp_list.append(proposals_kp)\n                scores_list.append(scores)\n\n        proposals = np.vstack(proposals_list)\n        proposals_kp = np.vstack(proposals_kp_list)\n        scores = np.vstack(scores_list)\n        scores_ravel = scores.ravel()\n        order = scores_ravel.argsort()[::-1]\n        # if config.TEST.SCORE_THRESH>0.0:\n        #  _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH)\n        #  order = order[:_count]\n        # if pre_nms_topN > 0:\n        #    order = order[:pre_nms_topN]\n        proposals = proposals[order, :]\n        proposals_kp = proposals_kp[order, :]\n        scores = scores[order]\n\n        det = np.hstack((proposals, scores, proposals_kp)).astype(np.float32)\n\n        # if np.shape(det)[0] == 0:\n        #    print(\"Something wrong with the input image(resolution is too low?), generate fake proposals for it.\")\n        #    proposals = np.array([[1.0, 1.0, 2.0, 2.0]]*post_nms_topN, dtype=np.float32)\n        #    scores = np.array([[0.9]]*post_nms_topN, dtype=np.float32)\n        #    det = np.array([[1.0, 1.0, 2.0, 2.0, 0.9]]*post_nms_topN, dtype=np.float32)\n\n        if self.nms_threshold < 1.0:\n            keep = self.nms(det)\n            det = det[keep, :]\n        if threshold > 0.0:\n            keep = np.where(det[:, 4] >= threshold)[0]\n            det = det[keep, :]\n        return det\n\n    @staticmethod\n    def _filter_boxes(boxes, min_size):\n        \"\"\" Remove all boxes with any side smaller than min_size \"\"\"\n        ws = boxes[:, 2] - boxes[:, 0] + 1\n        hs = boxes[:, 3] - boxes[:, 1] + 1\n        keep = np.where((ws >= min_size) & (hs >= min_size))[0]\n        return keep\n\n    @staticmethod\n    def _clip_pad(tensor, pad_shape):\n        \"\"\"\n        Clip boxes of the pad area.\n        :param tensor: [n, c, H, W]\n        :param pad_shape: [h, w]\n        :return: [n, c, h, w]\n        \"\"\"\n        H, W = tensor.shape[2:]\n        h, w = pad_shape\n\n        if h < H or w < W:\n            tensor = tensor[:, :, :h, :w].copy()\n\n        return tensor\n"
  },
  {
    "path": "test_kpoint.py",
    "content": "import cv2\nimport sys\nimport numpy as np\nimport datetime\n#sys.path.append('.')\nfrom ssha_detector import SSHDetector\n\nscales = [1200, 1600]\n# scales = [200, 600]\nt = 2\ndetector = SSHDetector('./kmodel/e2e', 0)\n\n\n\nf = '../sample-images/t1.jpg'\nf = 'test_image/test_2.jpg'\nif len(sys.argv)>1:\n  f = sys.argv[1]\nimg = cv2.imread(f)\nim_shape = img.shape\nprint(im_shape)\ntarget_size = scales[0]\nmax_size = scales[1]\nim_size_min = np.min(im_shape[0:2])\nim_size_max = np.max(im_shape[0:2])\n# cv2.copyMakeBorder()\nimg = cv2.copyMakeBorder(img, 5, 5, 5, 5, borderType=cv2.BORDER_CONSTANT, value=[0,0,0])\nif im_size_min>target_size or im_size_max>max_size:\n  im_scale = float(target_size) / float(im_size_min)\n  # prevent bigger axis from being more than max_size:\n  if np.round(im_scale * im_size_max) > max_size:\n      im_scale = float(max_size) / float(im_size_max)\n  img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale)\n  print('resize to', img.shape)\n# for i in xrange(t-1): #warmup\n#   faces = detector.detect(img)\ntimea = datetime.datetime.now()\nfaces = detector.detect(img, threshold=0.8)\ntimeb = datetime.datetime.now()\nfor num in range(faces.shape[0]):\n  bbox = faces[num, 0:4]\n  cv2.rectangle(img, (bbox[0],bbox[1]),(bbox[2], bbox[3]), (0,255, 0), 2)\n  kpoint = faces[num, 5:15]\n  for knum in range(5):\n      cv2.circle(img, (kpoint[2*knum], kpoint[2*knum+1]), 1, [0,0,255], 2)\n\ncv2.imwrite(\"res.jpg\", img[5:-5,5:-5,:])\ndiff = timeb - timea\nprint('detection uses', diff.total_seconds(), 'seconds')\nprint('find', faces.shape[0], 'faces')\n"
  }
]