Repository: YYuanAnyVision/mxnet_mtcnn_face_detection Branch: master Commit: b56065418b63 Files: 21 Total size: 98.5 KB Directory structure: gitextract_cj2cvrn1/ ├── .gitignore ├── README.md ├── helper.py ├── main.py ├── model/ │ ├── det1-0001.params │ ├── det1-symbol.json │ ├── det1.caffemodel │ ├── det1.prototxt │ ├── det2-0001.params │ ├── det2-symbol.json │ ├── det2.caffemodel │ ├── det2.prototxt │ ├── det3-0001.params │ ├── det3-symbol.json │ ├── det3.caffemodel │ ├── det3.prototxt │ ├── det4-0001.params │ ├── det4-symbol.json │ ├── det4.caffemodel │ └── det4.prototxt └── mtcnn_detector.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ *.pyc caffe_converter .idea ================================================ FILE: README.md ================================================ # MTCNN_face_detection_and_alignment ## About This is a python/mxnet implementation of [Zhang](https://kpzhang93.github.io/)'s work ****. it's fast and accurate, see [link](https://github.com/kpzhang93/MTCNN_face_detection_alignment). It should have **almost** the same output with the original work, for mxnet fans and those can't afford matlab :) [中文blog](https://pangyupo.github.io/2016/10/22/mxnet-mtcnn/) ## Requirement - opencv ​ I use cv2 for image io and resize(much faster than skimage), the input image's channel is acutally BGR - mxnet ​ **please update to the newest version, we need 'full' mode in Pooling operation** Only tested on Linux and Mac ## Test run: ``python main.py`` you can change `ctx` to `mx.gpu(0)` for faster detection --- update 20161028 --- by setting ``num_worker=4`` ``accurate_landmark=False`` we can reduce the detection time by 1/4-1/3, the bboxes are still the same, but we skip the last landmark fine-tune stage( mtcnn_v1 ). --- update 20161207 --- add function `extract_face_chips`, examples: ![1](http://7vikw0.com1.z0.glb.clouddn.com/chip_0.png) ![2](http://7vikw0.com1.z0.glb.clouddn.com/chip_3.png) ![3](http://7vikw0.com1.z0.glb.clouddn.com/chip_2.png) ![4](http://7vikw0.com1.z0.glb.clouddn.com/chip_1.png) see `mtcnn_detector.py` for the details about the parameters. this function use [dlib](http://dlib.net/)'s align strategy, which works well on profile images :) ## Results ![big4](http://7xsc78.com1.z0.glb.clouddn.com/face_mtcnn.png) ## License MIT LICENSE ## Reference K. Zhang and Z. Zhang and Z. Li and Y. Qiao Joint, Face Detection and Alignment Using Multitask Cascaded Convolutional Networks, IEEE Signal Processing Letters ================================================ FILE: helper.py ================================================ # coding: utf-8 # YuanYang import math import cv2 import numpy as np def nms(boxes, overlap_threshold, mode='Union'): """ non max suppression Parameters: ---------- box: numpy array n x 5 input bbox array overlap_threshold: float number threshold of overlap mode: float number how to compute overlap ratio, 'Union' or 'Min' Returns: ------- index array of the selected bbox """ # if there are no boxes, return an empty list if len(boxes) == 0: return [] # if the bounding boxes integers, convert them to floats if boxes.dtype.kind == "i": boxes = boxes.astype("float") # initialize the list of picked indexes pick = [] # grab the coordinates of the bounding boxes x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)] area = (x2 - x1 + 1) * (y2 - y1 + 1) idxs = np.argsort(score) # keep looping while some indexes still remain in the indexes list while len(idxs) > 0: # grab the last index in the indexes list and add the index value to the list of picked indexes last = len(idxs) - 1 i = idxs[last] pick.append(i) xx1 = np.maximum(x1[i], x1[idxs[:last]]) yy1 = np.maximum(y1[i], y1[idxs[:last]]) xx2 = np.minimum(x2[i], x2[idxs[:last]]) yy2 = np.minimum(y2[i], y2[idxs[:last]]) # compute the width and height of the bounding box w = np.maximum(0, xx2 - xx1 + 1) h = np.maximum(0, yy2 - yy1 + 1) inter = w * h if mode == 'Min': overlap = inter / np.minimum(area[i], area[idxs[:last]]) else: overlap = inter / (area[i] + area[idxs[:last]] - inter) # delete all indexes from the index list that have idxs = np.delete(idxs, np.concatenate(([last], np.where(overlap > overlap_threshold)[0]))) return pick def adjust_input(in_data): """ adjust the input from (h, w, c) to ( 1, c, h, w) for network input Parameters: ---------- in_data: numpy array of shape (h, w, c) input data Returns: ------- out_data: numpy array of shape (1, c, h, w) reshaped array """ if in_data.dtype is not np.dtype('float32'): out_data = in_data.astype(np.float32) else: out_data = in_data out_data = out_data.transpose((2,0,1)) out_data = np.expand_dims(out_data, 0) out_data = (out_data - 127.5)*0.0078125 return out_data def generate_bbox(map, reg, scale, threshold): """ generate bbox from feature map Parameters: ---------- map: numpy array , n x m x 1 detect score for each position reg: numpy array , n x m x 4 bbox scale: float number scale of this detection threshold: float number detect threshold Returns: ------- bbox array """ stride = 2 cellsize = 12 t_index = np.where(map>threshold) # find nothing if t_index[0].size == 0: return np.array([]) dx1, dy1, dx2, dy2 = [reg[0, i, t_index[0], t_index[1]] for i in range(4)] reg = np.array([dx1, dy1, dx2, dy2]) score = map[t_index[0], t_index[1]] boundingbox = np.vstack([np.round((stride*t_index[1]+1)/scale), np.round((stride*t_index[0]+1)/scale), np.round((stride*t_index[1]+1+cellsize)/scale), np.round((stride*t_index[0]+1+cellsize)/scale), score, reg]) return boundingbox.T def detect_first_stage(img, net, scale, threshold): """ run PNet for first stage Parameters: ---------- img: numpy array, bgr order input image scale: float number how much should the input image scale net: PNet worker Returns: ------- total_boxes : bboxes """ height, width, _ = img.shape hs = int(math.ceil(height * scale)) ws = int(math.ceil(width * scale)) im_data = cv2.resize(img, (ws,hs)) # adjust for the network input input_buf = adjust_input(im_data) output = net.predict(input_buf) boxes = generate_bbox(output[1][0,1,:,:], output[0], scale, threshold) if boxes.size == 0: return None # nms pick = nms(boxes[:,0:5], 0.5, mode='Union') boxes = boxes[pick] return boxes def detect_first_stage_warpper( args ): return detect_first_stage(*args) ================================================ FILE: main.py ================================================ # coding: utf-8 import mxnet as mx from mtcnn_detector import MtcnnDetector import cv2 import os import time detector = MtcnnDetector(model_folder='model', ctx=mx.cpu(0), num_worker = 4 , accurate_landmark = False) img = cv2.imread('test2.jpg') # run detector results = detector.detect_face(img) if results is not None: total_boxes = results[0] points = results[1] # extract aligned face chips chips = detector.extract_image_chips(img, points, 144, 0.37) for i, chip in enumerate(chips): cv2.imshow('chip_'+str(i), chip) cv2.imwrite('chip_'+str(i)+'.png', chip) draw = img.copy() for b in total_boxes: cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (255, 255, 255)) for p in points: for i in range(5): cv2.circle(draw, (p[i], p[i + 5]), 1, (0, 0, 255), 2) cv2.imshow("detection result", draw) cv2.waitKey(0) # -------------- # test on camera # -------------- ''' camera = cv2.VideoCapture(0) while True: grab, frame = camera.read() img = cv2.resize(frame, (320,180)) t1 = time.time() results = detector.detect_face(img) print 'time: ',time.time() - t1 if results is None: continue total_boxes = results[0] points = results[1] draw = img.copy() for b in total_boxes: cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (255, 255, 255)) for p in points: for i in range(5): cv2.circle(draw, (p[i], p[i + 5]), 1, (255, 0, 0), 2) cv2.imshow("detection result", draw) cv2.waitKey(30) ''' ================================================ FILE: model/det1-symbol.json ================================================ { "nodes": [ { "op": "null", "param": {}, "name": "data", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv1_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv1_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(3,3)", "no_bias": "False", "num_filter": "10", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv1", "inputs": [[0, 0], [1, 0], [2, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu1_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu1", "inputs": [[3, 0], [4, 0]], "backward_source_id": -1 }, { "op": "Pooling", "param": { "global_pool": "False", "kernel": "(2,2)", "pad": "(0,0)", "pool_type": "max", "pooling_convention": "full", "stride": "(2,2)" }, "name": "pool1", "inputs": [[5, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv2_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv2_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(3,3)", "no_bias": "False", "num_filter": "16", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv2", "inputs": [[6, 0], [7, 0], [8, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu2_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu2", "inputs": [[9, 0], [10, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv3_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv3_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(3,3)", "no_bias": "False", "num_filter": "32", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv3", "inputs": [[11, 0], [12, 0], [13, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu3_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu3", "inputs": [[14, 0], [15, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv4_2_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv4_2_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(1,1)", "no_bias": "False", "num_filter": "4", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv4_2", "inputs": [[16, 0], [17, 0], [18, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv4_1_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv4_1_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(1,1)", "no_bias": "False", "num_filter": "2", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv4_1", "inputs": [[16, 0], [20, 0], [21, 0]], "backward_source_id": -1 }, { "op": "SoftmaxActivation", "param": {"mode": "channel"}, "name": "prob1", "inputs": [[22, 0]], "backward_source_id": -1 } ], "arg_nodes": [ 0, 1, 2, 4, 7, 8, 10, 12, 13, 15, 17, 18, 20, 21 ], "heads": [[19, 0], [23, 0]] } ================================================ FILE: model/det1.prototxt ================================================ name: "PNet" input: "data" input_dim: 1 input_dim: 3 input_dim: 12 input_dim: 12 layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 10 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "PReLU1" type: "PReLU" bottom: "conv1" top: "conv1" } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "conv2" type: "Convolution" bottom: "pool1" top: "conv2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 16 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "PReLU2" type: "PReLU" bottom: "conv2" top: "conv2" } layer { name: "conv3" type: "Convolution" bottom: "conv2" top: "conv3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 32 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "PReLU3" type: "PReLU" bottom: "conv3" top: "conv3" } layer { name: "conv4-1" type: "Convolution" bottom: "conv3" top: "conv4-1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 2 kernel_size: 1 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "conv4-2" type: "Convolution" bottom: "conv3" top: "conv4-2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 4 kernel_size: 1 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prob1" type: "Softmax" bottom: "conv4-1" top: "prob1" } ================================================ FILE: model/det2-symbol.json ================================================ { "nodes": [ { "op": "null", "param": {}, "name": "data", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv1_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv1_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(3,3)", "no_bias": "False", "num_filter": "28", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv1", "inputs": [[0, 0], [1, 0], [2, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu1_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu1", "inputs": [[3, 0], [4, 0]], "backward_source_id": -1 }, { "op": "Pooling", "param": { "global_pool": "False", "kernel": "(3,3)", "pad": "(0,0)", "pool_type": "max", "pooling_convention": "full", "stride": "(2,2)" }, "name": "pool1", "inputs": [[5, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv2_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv2_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(3,3)", "no_bias": "False", "num_filter": "48", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv2", "inputs": [[6, 0], [7, 0], [8, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu2_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu2", "inputs": [[9, 0], [10, 0]], "backward_source_id": -1 }, { "op": "Pooling", "param": { "global_pool": "False", "kernel": "(3,3)", "pad": "(0,0)", "pool_type": "max", "pooling_convention": "full", "stride": "(2,2)" }, "name": "pool2", "inputs": [[11, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv3_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv3_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(2,2)", "no_bias": "False", "num_filter": "64", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv3", "inputs": [[12, 0], [13, 0], [14, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu3_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu3", "inputs": [[15, 0], [16, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv4_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv4_bias", "inputs": [], "backward_source_id": -1 }, { "op": "FullyConnected", "param": { "no_bias": "False", "num_hidden": "128" }, "name": "conv4", "inputs": [[17, 0], [18, 0], [19, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu4_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu4", "inputs": [[20, 0], [21, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv5_2_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv5_2_bias", "inputs": [], "backward_source_id": -1 }, { "op": "FullyConnected", "param": { "no_bias": "False", "num_hidden": "4" }, "name": "conv5_2", "inputs": [[22, 0], [23, 0], [24, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv5_1_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv5_1_bias", "inputs": [], "backward_source_id": -1 }, { "op": "FullyConnected", "param": { "no_bias": "False", "num_hidden": "2" }, "name": "conv5_1", "inputs": [[22, 0], [26, 0], [27, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prob1_label", "inputs": [], "backward_source_id": -1 }, { "op": "SoftmaxOutput", "param": { "grad_scale": "1", "ignore_label": "-1", "multi_output": "False", "normalization": "null", "use_ignore": "False" }, "name": "prob1", "inputs": [[28, 0], [29, 0]], "backward_source_id": -1 } ], "arg_nodes": [ 0, 1, 2, 4, 7, 8, 10, 13, 14, 16, 18, 19, 21, 23, 24, 26, 27, 29 ], "heads": [[25, 0], [30, 0]] } ================================================ FILE: model/det2.prototxt ================================================ name: "RNet" input: "data" input_dim: 1 input_dim: 3 input_dim: 24 input_dim: 24 ########################## ###################### layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 28 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu1" type: "PReLU" bottom: "conv1" top: "conv1" propagate_down: true } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "conv2" type: "Convolution" bottom: "pool1" top: "conv2" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 48 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu2" type: "PReLU" bottom: "conv2" top: "conv2" propagate_down: true } layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } #################################### ################################## layer { name: "conv3" type: "Convolution" bottom: "pool2" top: "conv3" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 64 kernel_size: 2 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu3" type: "PReLU" bottom: "conv3" top: "conv3" propagate_down: true } ############################### ############################### layer { name: "conv4" type: "InnerProduct" bottom: "conv3" top: "conv4" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } inner_product_param { num_output: 128 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu4" type: "PReLU" bottom: "conv4" top: "conv4" } layer { name: "conv5-1" type: "InnerProduct" bottom: "conv4" top: "conv5-1" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } inner_product_param { num_output: 2 #kernel_size: 1 #stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "conv5-2" type: "InnerProduct" bottom: "conv4" top: "conv5-2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { num_output: 4 #kernel_size: 1 #stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prob1" type: "Softmax" bottom: "conv5-1" top: "prob1" } ================================================ FILE: model/det3-symbol.json ================================================ { "nodes": [ { "op": "null", "param": {}, "name": "data", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv1_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv1_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(3,3)", "no_bias": "False", "num_filter": "32", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv1", "inputs": [[0, 0], [1, 0], [2, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu1_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu1", "inputs": [[3, 0], [4, 0]], "backward_source_id": -1 }, { "op": "Pooling", "param": { "global_pool": "False", "kernel": "(3,3)", "pad": "(0,0)", "pool_type": "max", "pooling_convention": "full", "stride": "(2,2)" }, "name": "pool1", "inputs": [[5, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv2_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv2_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(3,3)", "no_bias": "False", "num_filter": "64", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv2", "inputs": [[6, 0], [7, 0], [8, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu2_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu2", "inputs": [[9, 0], [10, 0]], "backward_source_id": -1 }, { "op": "Pooling", "param": { "global_pool": "False", "kernel": "(3,3)", "pad": "(0,0)", "pool_type": "max", "pooling_convention": "full", "stride": "(2,2)" }, "name": "pool2", "inputs": [[11, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv3_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv3_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(3,3)", "no_bias": "False", "num_filter": "64", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv3", "inputs": [[12, 0], [13, 0], [14, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu3_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu3", "inputs": [[15, 0], [16, 0]], "backward_source_id": -1 }, { "op": "Pooling", "param": { "global_pool": "False", "kernel": "(2,2)", "pad": "(0,0)", "pool_type": "max", "pooling_convention": "full", "stride": "(2,2)" }, "name": "pool3", "inputs": [[17, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv4_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv4_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(2,2)", "no_bias": "False", "num_filter": "128", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv4", "inputs": [[18, 0], [19, 0], [20, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu4_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu4", "inputs": [[21, 0], [22, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv5_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv5_bias", "inputs": [], "backward_source_id": -1 }, { "op": "FullyConnected", "param": { "no_bias": "False", "num_hidden": "256" }, "name": "conv5", "inputs": [[23, 0], [24, 0], [25, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu5_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu5", "inputs": [[26, 0], [27, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv6_3_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv6_3_bias", "inputs": [], "backward_source_id": -1 }, { "op": "FullyConnected", "param": { "no_bias": "False", "num_hidden": "10" }, "name": "conv6_3", "inputs": [[28, 0], [29, 0], [30, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv6_2_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv6_2_bias", "inputs": [], "backward_source_id": -1 }, { "op": "FullyConnected", "param": { "no_bias": "False", "num_hidden": "4" }, "name": "conv6_2", "inputs": [[28, 0], [32, 0], [33, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv6_1_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv6_1_bias", "inputs": [], "backward_source_id": -1 }, { "op": "FullyConnected", "param": { "no_bias": "False", "num_hidden": "2" }, "name": "conv6_1", "inputs": [[28, 0], [35, 0], [36, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prob1_label", "inputs": [], "backward_source_id": -1 }, { "op": "SoftmaxOutput", "param": { "grad_scale": "1", "ignore_label": "-1", "multi_output": "False", "normalization": "null", "use_ignore": "False" }, "name": "prob1", "inputs": [[37, 0], [38, 0]], "backward_source_id": -1 } ], "arg_nodes": [ 0, 1, 2, 4, 7, 8, 10, 13, 14, 16, 19, 20, 22, 24, 25, 27, 29, 30, 32, 33, 35, 36, 38 ], "heads": [[31, 0], [34, 0], [39, 0]] } ================================================ FILE: model/det3.prototxt ================================================ name: "ONet" input: "data" input_dim: 1 input_dim: 3 input_dim: 48 input_dim: 48 ################################## layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 32 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu1" type: "PReLU" bottom: "conv1" top: "conv1" } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "conv2" type: "Convolution" bottom: "pool1" top: "conv2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 64 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu2" type: "PReLU" bottom: "conv2" top: "conv2" } layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "conv3" type: "Convolution" bottom: "pool2" top: "conv3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 64 kernel_size: 3 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu3" type: "PReLU" bottom: "conv3" top: "conv3" } layer { name: "pool3" type: "Pooling" bottom: "conv3" top: "pool3" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "conv4" type: "Convolution" bottom: "pool3" top: "conv4" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 128 kernel_size: 2 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu4" type: "PReLU" bottom: "conv4" top: "conv4" } layer { name: "conv5" type: "InnerProduct" bottom: "conv4" top: "conv5" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { #kernel_size: 3 num_output: 256 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "drop5" type: "Dropout" bottom: "conv5" top: "conv5" dropout_param { dropout_ratio: 0.25 } } layer { name: "prelu5" type: "PReLU" bottom: "conv5" top: "conv5" } layer { name: "conv6-1" type: "InnerProduct" bottom: "conv5" top: "conv6-1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { #kernel_size: 1 num_output: 2 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "conv6-2" type: "InnerProduct" bottom: "conv5" top: "conv6-2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { #kernel_size: 1 num_output: 4 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "conv6-3" type: "InnerProduct" bottom: "conv5" top: "conv6-3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { #kernel_size: 1 num_output: 10 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prob1" type: "Softmax" bottom: "conv6-1" top: "prob1" } ================================================ FILE: model/det4-symbol.json ================================================ { "nodes": [ { "op": "null", "param": {}, "name": "data", "inputs": [], "backward_source_id": -1 }, { "op": "SliceChannel", "param": { "axis": "1", "num_outputs": "5", "squeeze_axis": "False" }, "name": "slice", "inputs": [[0, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv1_1_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv1_1_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(3,3)", "no_bias": "False", "num_filter": "28", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv1_1", "inputs": [[1, 0], [2, 0], [3, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu1_1_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu1_1", "inputs": [[4, 0], [5, 0]], "backward_source_id": -1 }, { "op": "Pooling", "param": { "global_pool": "False", "kernel": "(3,3)", "pad": "(0,0)", "pool_type": "max", "pooling_convention": "full", "stride": "(2,2)" }, "name": "pool1_1", "inputs": [[6, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv2_1_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv2_1_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(3,3)", "no_bias": "False", "num_filter": "48", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv2_1", "inputs": [[7, 0], [8, 0], [9, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu2_1_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu2_1", "inputs": [[10, 0], [11, 0]], "backward_source_id": -1 }, { "op": "Pooling", "param": { "global_pool": "False", "kernel": "(3,3)", "pad": "(0,0)", "pool_type": "max", "pooling_convention": "full", "stride": "(2,2)" }, "name": "pool2_1", "inputs": [[12, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv3_1_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv3_1_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(2,2)", "no_bias": "False", "num_filter": "64", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv3_1", "inputs": [[13, 0], [14, 0], [15, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu3_1_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu3_1", "inputs": [[16, 0], [17, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv1_2_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv1_2_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(3,3)", "no_bias": "False", "num_filter": "28", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv1_2", "inputs": [[1, 1], [19, 0], [20, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu1_2_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu1_2", "inputs": [[21, 0], [22, 0]], "backward_source_id": -1 }, { "op": "Pooling", "param": { "global_pool": "False", "kernel": "(3,3)", "pad": "(0,0)", "pool_type": "max", "pooling_convention": "full", "stride": "(2,2)" }, "name": "pool1_2", "inputs": [[23, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv2_2_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv2_2_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(3,3)", "no_bias": "False", "num_filter": "48", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv2_2", "inputs": [[24, 0], [25, 0], [26, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu2_2_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu2_2", "inputs": [[27, 0], [28, 0]], "backward_source_id": -1 }, { "op": "Pooling", "param": { "global_pool": "False", "kernel": "(3,3)", "pad": "(0,0)", "pool_type": "max", "pooling_convention": "full", "stride": "(2,2)" }, "name": "pool2_2", "inputs": [[29, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv3_2_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv3_2_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(2,2)", "no_bias": "False", "num_filter": "64", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv3_2", "inputs": [[30, 0], [31, 0], [32, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu3_2_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu3_2", "inputs": [[33, 0], [34, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv1_3_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv1_3_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(3,3)", "no_bias": "False", "num_filter": "28", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv1_3", "inputs": [[1, 2], [36, 0], [37, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu1_3_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu1_3", "inputs": [[38, 0], [39, 0]], "backward_source_id": -1 }, { "op": "Pooling", "param": { "global_pool": "False", "kernel": "(3,3)", "pad": "(0,0)", "pool_type": "max", "pooling_convention": "full", "stride": "(2,2)" }, "name": "pool1_3", "inputs": [[40, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv2_3_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv2_3_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(3,3)", "no_bias": "False", "num_filter": "48", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv2_3", "inputs": [[41, 0], [42, 0], [43, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu2_3_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu2_3", "inputs": [[44, 0], [45, 0]], "backward_source_id": -1 }, { "op": "Pooling", "param": { "global_pool": "False", "kernel": "(3,3)", "pad": "(0,0)", "pool_type": "max", "pooling_convention": "full", "stride": "(2,2)" }, "name": "pool2_3", "inputs": [[46, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv3_3_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv3_3_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(2,2)", "no_bias": "False", "num_filter": "64", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv3_3", "inputs": [[47, 0], [48, 0], [49, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu3_3_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu3_3", "inputs": [[50, 0], [51, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv1_4_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv1_4_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(3,3)", "no_bias": "False", "num_filter": "28", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv1_4", "inputs": [[1, 3], [53, 0], [54, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu1_4_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu1_4", "inputs": [[55, 0], [56, 0]], "backward_source_id": -1 }, { "op": "Pooling", "param": { "global_pool": "False", "kernel": "(3,3)", "pad": "(0,0)", "pool_type": "max", "pooling_convention": "full", "stride": "(2,2)" }, "name": "pool1_4", "inputs": [[57, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv2_4_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv2_4_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(3,3)", "no_bias": "False", "num_filter": "48", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv2_4", "inputs": [[58, 0], [59, 0], [60, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu2_4_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu2_4", "inputs": [[61, 0], [62, 0]], "backward_source_id": -1 }, { "op": "Pooling", "param": { "global_pool": "False", "kernel": "(3,3)", "pad": "(0,0)", "pool_type": "max", "pooling_convention": "full", "stride": "(2,2)" }, "name": "pool2_4", "inputs": [[63, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv3_4_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv3_4_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(2,2)", "no_bias": "False", "num_filter": "64", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv3_4", "inputs": [[64, 0], [65, 0], [66, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu3_4_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu3_4", "inputs": [[67, 0], [68, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv1_5_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv1_5_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(3,3)", "no_bias": "False", "num_filter": "28", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv1_5", "inputs": [[1, 4], [70, 0], [71, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu1_5_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu1_5", "inputs": [[72, 0], [73, 0]], "backward_source_id": -1 }, { "op": "Pooling", "param": { "global_pool": "False", "kernel": "(3,3)", "pad": "(0,0)", "pool_type": "max", "pooling_convention": "full", "stride": "(2,2)" }, "name": "pool1_5", "inputs": [[74, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv2_5_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv2_5_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(3,3)", "no_bias": "False", "num_filter": "48", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv2_5", "inputs": [[75, 0], [76, 0], [77, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu2_5_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu2_5", "inputs": [[78, 0], [79, 0]], "backward_source_id": -1 }, { "op": "Pooling", "param": { "global_pool": "False", "kernel": "(3,3)", "pad": "(0,0)", "pool_type": "max", "pooling_convention": "full", "stride": "(2,2)" }, "name": "pool2_5", "inputs": [[80, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv3_5_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "conv3_5_bias", "inputs": [], "backward_source_id": -1 }, { "op": "Convolution", "param": { "cudnn_off": "False", "cudnn_tune": "off", "dilate": "(1,1)", "kernel": "(2,2)", "no_bias": "False", "num_filter": "64", "num_group": "1", "pad": "(0,0)", "stride": "(1,1)", "workspace": "1024" }, "name": "conv3_5", "inputs": [[81, 0], [82, 0], [83, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu3_5_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu3_5", "inputs": [[84, 0], [85, 0]], "backward_source_id": -1 }, { "op": "Concat", "param": { "dim": "1", "num_args": "5" }, "name": "concat", "inputs": [[18, 0], [35, 0], [52, 0], [69, 0], [86, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "fc4_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "fc4_bias", "inputs": [], "backward_source_id": -1 }, { "op": "FullyConnected", "param": { "no_bias": "False", "num_hidden": "256" }, "name": "fc4", "inputs": [[87, 0], [88, 0], [89, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu4_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu4", "inputs": [[90, 0], [91, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "fc4_1_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "fc4_1_bias", "inputs": [], "backward_source_id": -1 }, { "op": "FullyConnected", "param": { "no_bias": "False", "num_hidden": "64" }, "name": "fc4_1", "inputs": [[92, 0], [93, 0], [94, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu4_1_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu4_1", "inputs": [[95, 0], [96, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "fc5_1_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "fc5_1_bias", "inputs": [], "backward_source_id": -1 }, { "op": "FullyConnected", "param": { "no_bias": "False", "num_hidden": "2" }, "name": "fc5_1", "inputs": [[97, 0], [98, 0], [99, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "fc4_2_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "fc4_2_bias", "inputs": [], "backward_source_id": -1 }, { "op": "FullyConnected", "param": { "no_bias": "False", "num_hidden": "64" }, "name": "fc4_2", "inputs": [[92, 0], [101, 0], [102, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu4_2_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu4_2", "inputs": [[103, 0], [104, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "fc5_2_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "fc5_2_bias", "inputs": [], "backward_source_id": -1 }, { "op": "FullyConnected", "param": { "no_bias": "False", "num_hidden": "2" }, "name": "fc5_2", "inputs": [[105, 0], [106, 0], [107, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "fc4_3_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "fc4_3_bias", "inputs": [], "backward_source_id": -1 }, { "op": "FullyConnected", "param": { "no_bias": "False", "num_hidden": "64" }, "name": "fc4_3", "inputs": [[92, 0], [109, 0], [110, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu4_3_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu4_3", "inputs": [[111, 0], [112, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "fc5_3_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "fc5_3_bias", "inputs": [], "backward_source_id": -1 }, { "op": "FullyConnected", "param": { "no_bias": "False", "num_hidden": "2" }, "name": "fc5_3", "inputs": [[113, 0], [114, 0], [115, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "fc4_4_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "fc4_4_bias", "inputs": [], "backward_source_id": -1 }, { "op": "FullyConnected", "param": { "no_bias": "False", "num_hidden": "64" }, "name": "fc4_4", "inputs": [[92, 0], [117, 0], [118, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu4_4_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu4_4", "inputs": [[119, 0], [120, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "fc5_4_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "fc5_4_bias", "inputs": [], "backward_source_id": -1 }, { "op": "FullyConnected", "param": { "no_bias": "False", "num_hidden": "2" }, "name": "fc5_4", "inputs": [[121, 0], [122, 0], [123, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "fc4_5_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "fc4_5_bias", "inputs": [], "backward_source_id": -1 }, { "op": "FullyConnected", "param": { "no_bias": "False", "num_hidden": "64" }, "name": "fc4_5", "inputs": [[92, 0], [125, 0], [126, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "prelu4_5_gamma", "inputs": [], "backward_source_id": -1 }, { "op": "LeakyReLU", "param": { "act_type": "prelu", "lower_bound": "0.125", "slope": "0.25", "upper_bound": "0.334" }, "name": "prelu4_5", "inputs": [[127, 0], [128, 0]], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "fc5_5_weight", "inputs": [], "backward_source_id": -1 }, { "op": "null", "param": {}, "name": "fc5_5_bias", "inputs": [], "backward_source_id": -1 }, { "op": "FullyConnected", "param": { "no_bias": "False", "num_hidden": "2" }, "name": "fc5_5", "inputs": [[129, 0], [130, 0], [131, 0]], "backward_source_id": -1 } ], "arg_nodes": [ 0, 2, 3, 5, 8, 9, 11, 14, 15, 17, 19, 20, 22, 25, 26, 28, 31, 32, 34, 36, 37, 39, 42, 43, 45, 48, 49, 51, 53, 54, 56, 59, 60, 62, 65, 66, 68, 70, 71, 73, 76, 77, 79, 82, 83, 85, 88, 89, 91, 93, 94, 96, 98, 99, 101, 102, 104, 106, 107, 109, 110, 112, 114, 115, 117, 118, 120, 122, 123, 125, 126, 128, 130, 131 ], "heads": [[100, 0], [108, 0], [116, 0], [124, 0], [132, 0]] } ================================================ FILE: model/det4.prototxt ================================================ name: "LNet" input: "data" input_dim: 1 input_dim: 15 input_dim: 24 input_dim: 24 layer { name: "slicer_data" type: "Slice" bottom: "data" top: "data241" top: "data242" top: "data243" top: "data244" top: "data245" slice_param { axis: 1 slice_point: 3 slice_point: 6 slice_point: 9 slice_point: 12 } } layer { name: "conv1_1" type: "Convolution" bottom: "data241" top: "conv1_1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 28 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu1_1" type: "PReLU" bottom: "conv1_1" top: "conv1_1" } layer { name: "pool1_1" type: "Pooling" bottom: "conv1_1" top: "pool1_1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "conv2_1" type: "Convolution" bottom: "pool1_1" top: "conv2_1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 48 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu2_1" type: "PReLU" bottom: "conv2_1" top: "conv2_1" } layer { name: "pool2_1" type: "Pooling" bottom: "conv2_1" top: "pool2_1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "conv3_1" type: "Convolution" bottom: "pool2_1" top: "conv3_1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 64 kernel_size: 2 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu3_1" type: "PReLU" bottom: "conv3_1" top: "conv3_1" } ########################## layer { name: "conv1_2" type: "Convolution" bottom: "data242" top: "conv1_2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 28 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu1_2" type: "PReLU" bottom: "conv1_2" top: "conv1_2" } layer { name: "pool1_2" type: "Pooling" bottom: "conv1_2" top: "pool1_2" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "conv2_2" type: "Convolution" bottom: "pool1_2" top: "conv2_2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 48 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu2_2" type: "PReLU" bottom: "conv2_2" top: "conv2_2" } layer { name: "pool2_2" type: "Pooling" bottom: "conv2_2" top: "pool2_2" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "conv3_2" type: "Convolution" bottom: "pool2_2" top: "conv3_2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 64 kernel_size: 2 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu3_2" type: "PReLU" bottom: "conv3_2" top: "conv3_2" } ########################## ########################## layer { name: "conv1_3" type: "Convolution" bottom: "data243" top: "conv1_3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 28 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu1_3" type: "PReLU" bottom: "conv1_3" top: "conv1_3" } layer { name: "pool1_3" type: "Pooling" bottom: "conv1_3" top: "pool1_3" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "conv2_3" type: "Convolution" bottom: "pool1_3" top: "conv2_3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 48 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu2_3" type: "PReLU" bottom: "conv2_3" top: "conv2_3" } layer { name: "pool2_3" type: "Pooling" bottom: "conv2_3" top: "pool2_3" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "conv3_3" type: "Convolution" bottom: "pool2_3" top: "conv3_3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 64 kernel_size: 2 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu3_3" type: "PReLU" bottom: "conv3_3" top: "conv3_3" } ########################## ########################## layer { name: "conv1_4" type: "Convolution" bottom: "data244" top: "conv1_4" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 28 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu1_4" type: "PReLU" bottom: "conv1_4" top: "conv1_4" } layer { name: "pool1_4" type: "Pooling" bottom: "conv1_4" top: "pool1_4" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "conv2_4" type: "Convolution" bottom: "pool1_4" top: "conv2_4" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 48 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu2_4" type: "PReLU" bottom: "conv2_4" top: "conv2_4" } layer { name: "pool2_4" type: "Pooling" bottom: "conv2_4" top: "pool2_4" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "conv3_4" type: "Convolution" bottom: "pool2_4" top: "conv3_4" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 64 kernel_size: 2 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu3_4" type: "PReLU" bottom: "conv3_4" top: "conv3_4" } ########################## ########################## layer { name: "conv1_5" type: "Convolution" bottom: "data245" top: "conv1_5" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 28 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu1_5" type: "PReLU" bottom: "conv1_5" top: "conv1_5" } layer { name: "pool1_5" type: "Pooling" bottom: "conv1_5" top: "pool1_5" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "conv2_5" type: "Convolution" bottom: "pool1_5" top: "conv2_5" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 48 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu2_5" type: "PReLU" bottom: "conv2_5" top: "conv2_5" } layer { name: "pool2_5" type: "Pooling" bottom: "conv2_5" top: "pool2_5" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "conv3_5" type: "Convolution" bottom: "pool2_5" top: "conv3_5" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 64 kernel_size: 2 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu3_5" type: "PReLU" bottom: "conv3_5" top: "conv3_5" } ########################## layer { name: "concat" bottom: "conv3_1" bottom: "conv3_2" bottom: "conv3_3" bottom: "conv3_4" bottom: "conv3_5" top: "conv3" type: "Concat" concat_param { axis: 1 } } ########################## layer { name: "fc4" type: "InnerProduct" bottom: "conv3" top: "fc4" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { num_output: 256 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu4" type: "PReLU" bottom: "fc4" top: "fc4" } ############################ layer { name: "fc4_1" type: "InnerProduct" bottom: "fc4" top: "fc4_1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { num_output: 64 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu4_1" type: "PReLU" bottom: "fc4_1" top: "fc4_1" } layer { name: "fc5_1" type: "InnerProduct" bottom: "fc4_1" top: "fc5_1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { num_output: 2 weight_filler { type: "xavier" #type: "constant" #value: 0 } bias_filler { type: "constant" value: 0 } } } ######################### layer { name: "fc4_2" type: "InnerProduct" bottom: "fc4" top: "fc4_2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { num_output: 64 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu4_2" type: "PReLU" bottom: "fc4_2" top: "fc4_2" } layer { name: "fc5_2" type: "InnerProduct" bottom: "fc4_2" top: "fc5_2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { num_output: 2 weight_filler { type: "xavier" #type: "constant" #value: 0 } bias_filler { type: "constant" value: 0 } } } ######################### layer { name: "fc4_3" type: "InnerProduct" bottom: "fc4" top: "fc4_3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { num_output: 64 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu4_3" type: "PReLU" bottom: "fc4_3" top: "fc4_3" } layer { name: "fc5_3" type: "InnerProduct" bottom: "fc4_3" top: "fc5_3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { num_output: 2 weight_filler { type: "xavier" #type: "constant" #value: 0 } bias_filler { type: "constant" value: 0 } } } ######################### layer { name: "fc4_4" type: "InnerProduct" bottom: "fc4" top: "fc4_4" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { num_output: 64 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu4_4" type: "PReLU" bottom: "fc4_4" top: "fc4_4" } layer { name: "fc5_4" type: "InnerProduct" bottom: "fc4_4" top: "fc5_4" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { num_output: 2 weight_filler { type: "xavier" #type: "constant" #value: 0 } bias_filler { type: "constant" value: 0 } } } ######################### layer { name: "fc4_5" type: "InnerProduct" bottom: "fc4" top: "fc4_5" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { num_output: 64 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu4_5" type: "PReLU" bottom: "fc4_5" top: "fc4_5" } layer { name: "fc5_5" type: "InnerProduct" bottom: "fc4_5" top: "fc5_5" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { num_output: 2 weight_filler { type: "xavier" #type: "constant" #value: 0 } bias_filler { type: "constant" value: 0 } } } ######################### ================================================ FILE: mtcnn_detector.py ================================================ # coding: utf-8 import os import mxnet as mx import numpy as np import math import cv2 from multiprocessing import Pool from itertools import repeat from itertools import izip from helper import nms, adjust_input, generate_bbox, detect_first_stage_warpper class MtcnnDetector(object): """ Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Neural Networks see https://github.com/kpzhang93/MTCNN_face_detection_alignment this is a mxnet version """ def __init__(self, model_folder='.', minsize = 20, threshold = [0.6, 0.7, 0.8], factor = 0.709, num_worker = 1, accurate_landmark = False, ctx=mx.cpu()): """ Initialize the detector Parameters: ---------- model_folder : string path for the models minsize : float number minimal face to detect threshold : float number detect threshold for 3 stages factor: float number scale factor for image pyramid num_worker: int number number of processes we use for first stage accurate_landmark: bool use accurate landmark localization or not """ self.num_worker = num_worker self.accurate_landmark = accurate_landmark # load 4 models from folder models = ['det1', 'det2', 'det3','det4'] models = [ os.path.join(model_folder, f) for f in models] self.PNets = [] for i in range(num_worker): workner_net = mx.model.FeedForward.load(models[0], 1, ctx=ctx) self.PNets.append(workner_net) self.Pool = Pool(num_worker) self.RNet = mx.model.FeedForward.load(models[1], 1, ctx=ctx) self.ONet = mx.model.FeedForward.load(models[2], 1, ctx=ctx) self.LNet = mx.model.FeedForward.load(models[3], 1, ctx=ctx) self.minsize = float(minsize) self.factor = float(factor) self.threshold = threshold def convert_to_square(self, bbox): """ convert bbox to square Parameters: ---------- bbox: numpy array , shape n x 5 input bbox Returns: ------- square bbox """ square_bbox = bbox.copy() h = bbox[:, 3] - bbox[:, 1] + 1 w = bbox[:, 2] - bbox[:, 0] + 1 max_side = np.maximum(h,w) square_bbox[:, 0] = bbox[:, 0] + w*0.5 - max_side*0.5 square_bbox[:, 1] = bbox[:, 1] + h*0.5 - max_side*0.5 square_bbox[:, 2] = square_bbox[:, 0] + max_side - 1 square_bbox[:, 3] = square_bbox[:, 1] + max_side - 1 return square_bbox def calibrate_box(self, bbox, reg): """ calibrate bboxes Parameters: ---------- bbox: numpy array, shape n x 5 input bboxes reg: numpy array, shape n x 4 bboxex adjustment Returns: ------- bboxes after refinement """ w = bbox[:, 2] - bbox[:, 0] + 1 w = np.expand_dims(w, 1) h = bbox[:, 3] - bbox[:, 1] + 1 h = np.expand_dims(h, 1) reg_m = np.hstack([w, h, w, h]) aug = reg_m * reg bbox[:, 0:4] = bbox[:, 0:4] + aug return bbox def pad(self, bboxes, w, h): """ pad the the bboxes, alse restrict the size of it Parameters: ---------- bboxes: numpy array, n x 5 input bboxes w: float number width of the input image h: float number height of the input image Returns : ------s dy, dx : numpy array, n x 1 start point of the bbox in target image edy, edx : numpy array, n x 1 end point of the bbox in target image y, x : numpy array, n x 1 start point of the bbox in original image ex, ex : numpy array, n x 1 end point of the bbox in original image tmph, tmpw: numpy array, n x 1 height and width of the bbox """ tmpw, tmph = bboxes[:, 2] - bboxes[:, 0] + 1, bboxes[:, 3] - bboxes[:, 1] + 1 num_box = bboxes.shape[0] dx , dy= np.zeros((num_box, )), np.zeros((num_box, )) edx, edy = tmpw.copy()-1, tmph.copy()-1 x, y, ex, ey = bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 3] tmp_index = np.where(ex > w-1) edx[tmp_index] = tmpw[tmp_index] + w - 2 - ex[tmp_index] ex[tmp_index] = w - 1 tmp_index = np.where(ey > h-1) edy[tmp_index] = tmph[tmp_index] + h - 2 - ey[tmp_index] ey[tmp_index] = h - 1 tmp_index = np.where(x < 0) dx[tmp_index] = 0 - x[tmp_index] x[tmp_index] = 0 tmp_index = np.where(y < 0) dy[tmp_index] = 0 - y[tmp_index] y[tmp_index] = 0 return_list = [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] return_list = [item.astype(np.int32) for item in return_list] return return_list def slice_index(self, number): """ slice the index into (n,n,m), m < n Parameters: ---------- number: int number number """ def chunks(l, n): """Yield successive n-sized chunks from l.""" for i in range(0, len(l), n): yield l[i:i + n] num_list = range(number) return list(chunks(num_list, self.num_worker)) def detect_face(self, img): """ detect face over img Parameters: ---------- img: numpy array, bgr order of shape (1, 3, n, m) input image Retures: ------- bboxes: numpy array, n x 5 (x1,y2,x2,y2,score) bboxes points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5) landmarks """ # check input MIN_DET_SIZE = 12 if img is None: return None # only works for color image if len(img.shape) != 3: return None # detected boxes total_boxes = [] height, width, _ = img.shape minl = min( height, width) # get all the valid scales scales = [] m = MIN_DET_SIZE/self.minsize minl *= m factor_count = 0 while minl > MIN_DET_SIZE: scales.append(m*self.factor**factor_count) minl *= self.factor factor_count += 1 ############################################# # first stage ############################################# #for scale in scales: # return_boxes = self.detect_first_stage(img, scale, 0) # if return_boxes is not None: # total_boxes.append(return_boxes) sliced_index = self.slice_index(len(scales)) total_boxes = [] for batch in sliced_index: local_boxes = self.Pool.map( detect_first_stage_warpper, \ izip(repeat(img), self.PNets[:len(batch)], [scales[i] for i in batch], repeat(self.threshold[0])) ) total_boxes.extend(local_boxes) # remove the Nones total_boxes = [ i for i in total_boxes if i is not None] if len(total_boxes) == 0: return None total_boxes = np.vstack(total_boxes) if total_boxes.size == 0: return None # merge the detection from first stage pick = nms(total_boxes[:, 0:5], 0.7, 'Union') total_boxes = total_boxes[pick] bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1 bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1 # refine the bboxes total_boxes = np.vstack([total_boxes[:, 0]+total_boxes[:, 5] * bbw, total_boxes[:, 1]+total_boxes[:, 6] * bbh, total_boxes[:, 2]+total_boxes[:, 7] * bbw, total_boxes[:, 3]+total_boxes[:, 8] * bbh, total_boxes[:, 4] ]) total_boxes = total_boxes.T total_boxes = self.convert_to_square(total_boxes) total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4]) ############################################# # second stage ############################################# num_box = total_boxes.shape[0] # pad the bbox [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height) # (3, 24, 24) is the input shape for RNet input_buf = np.zeros((num_box, 3, 24, 24), dtype=np.float32) for i in range(num_box): tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :] input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24))) output = self.RNet.predict(input_buf) # filter the total_boxes with threshold passed = np.where(output[1][:, 1] > self.threshold[1]) total_boxes = total_boxes[passed] if total_boxes.size == 0: return None total_boxes[:, 4] = output[1][passed, 1].reshape((-1,)) reg = output[0][passed] # nms pick = nms(total_boxes, 0.7, 'Union') total_boxes = total_boxes[pick] total_boxes = self.calibrate_box(total_boxes, reg[pick]) total_boxes = self.convert_to_square(total_boxes) total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4]) ############################################# # third stage ############################################# num_box = total_boxes.shape[0] # pad the bbox [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height) # (3, 48, 48) is the input shape for ONet input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32) for i in range(num_box): tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32) tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :] input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48))) output = self.ONet.predict(input_buf) # filter the total_boxes with threshold passed = np.where(output[2][:, 1] > self.threshold[2]) total_boxes = total_boxes[passed] if total_boxes.size == 0: return None total_boxes[:, 4] = output[2][passed, 1].reshape((-1,)) reg = output[1][passed] points = output[0][passed] # compute landmark points bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1 bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1 points[:, 0:5] = np.expand_dims(total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5] points[:, 5:10] = np.expand_dims(total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10] # nms total_boxes = self.calibrate_box(total_boxes, reg) pick = nms(total_boxes, 0.7, 'Min') total_boxes = total_boxes[pick] points = points[pick] if not self.accurate_landmark: return total_boxes, points ############################################# # extended stage ############################################# num_box = total_boxes.shape[0] patchw = np.maximum(total_boxes[:, 2]-total_boxes[:, 0]+1, total_boxes[:, 3]-total_boxes[:, 1]+1) patchw = np.round(patchw*0.25) # make it even patchw[np.where(np.mod(patchw,2) == 1)] += 1 input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32) for i in range(5): x, y = points[:, i], points[:, i+5] x, y = np.round(x-0.5*patchw), np.round(y-0.5*patchw) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(np.vstack([x, y, x+patchw-1, y+patchw-1]).T, width, height) for j in range(num_box): tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32) tmpim[dy[j]:edy[j]+1, dx[j]:edx[j]+1, :] = img[y[j]:ey[j]+1, x[j]:ex[j]+1, :] input_buf[j, i*3:i*3+3, :, :] = adjust_input(cv2.resize(tmpim, (24, 24))) output = self.LNet.predict(input_buf) pointx = np.zeros((num_box, 5)) pointy = np.zeros((num_box, 5)) for k in range(5): # do not make a large movement tmp_index = np.where(np.abs(output[k]-0.5) > 0.35) output[k][tmp_index[0]] = 0.5 pointx[:, k] = np.round(points[:, k] - 0.5*patchw) + output[k][:, 0]*patchw pointy[:, k] = np.round(points[:, k+5] - 0.5*patchw) + output[k][:, 1]*patchw points = np.hstack([pointx, pointy]) points = points.astype(np.int32) return total_boxes, points def list2colmatrix(self, pts_list): """ convert list to column matrix Parameters: ---------- pts_list: input list Retures: ------- colMat: """ assert len(pts_list) > 0 colMat = [] for i in range(len(pts_list)): colMat.append(pts_list[i][0]) colMat.append(pts_list[i][1]) colMat = np.matrix(colMat).transpose() return colMat def find_tfrom_between_shapes(self, from_shape, to_shape): """ find transform between shapes Parameters: ---------- from_shape: to_shape: Retures: ------- tran_m: tran_b: """ assert from_shape.shape[0] == to_shape.shape[0] and from_shape.shape[0] % 2 == 0 sigma_from = 0.0 sigma_to = 0.0 cov = np.matrix([[0.0, 0.0], [0.0, 0.0]]) # compute the mean and cov from_shape_points = from_shape.reshape(from_shape.shape[0]/2, 2) to_shape_points = to_shape.reshape(to_shape.shape[0]/2, 2) mean_from = from_shape_points.mean(axis=0) mean_to = to_shape_points.mean(axis=0) for i in range(from_shape_points.shape[0]): temp_dis = np.linalg.norm(from_shape_points[i] - mean_from) sigma_from += temp_dis * temp_dis temp_dis = np.linalg.norm(to_shape_points[i] - mean_to) sigma_to += temp_dis * temp_dis cov += (to_shape_points[i].transpose() - mean_to.transpose()) * (from_shape_points[i] - mean_from) sigma_from = sigma_from / to_shape_points.shape[0] sigma_to = sigma_to / to_shape_points.shape[0] cov = cov / to_shape_points.shape[0] # compute the affine matrix s = np.matrix([[1.0, 0.0], [0.0, 1.0]]) u, d, vt = np.linalg.svd(cov) if np.linalg.det(cov) < 0: if d[1] < d[0]: s[1, 1] = -1 else: s[0, 0] = -1 r = u * s * vt c = 1.0 if sigma_from != 0: c = 1.0 / sigma_from * np.trace(np.diag(d) * s) tran_b = mean_to.transpose() - c * r * mean_from.transpose() tran_m = c * r return tran_m, tran_b def extract_image_chips(self, img, points, desired_size=256, padding=0): """ crop and align face Parameters: ---------- img: numpy array, bgr order of shape (1, 3, n, m) input image points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5) desired_size: default 256 padding: default 0 Retures: ------- crop_imgs: list, n cropped and aligned faces """ crop_imgs = [] for p in points: shape =[] for k in range(len(p)/2): shape.append(p[k]) shape.append(p[k+5]) if padding > 0: padding = padding else: padding = 0 # average positions of face points mean_face_shape_x = [0.224152, 0.75610125, 0.490127, 0.254149, 0.726104] mean_face_shape_y = [0.2119465, 0.2119465, 0.628106, 0.780233, 0.780233] from_points = [] to_points = [] for i in range(len(shape)/2): x = (padding + mean_face_shape_x[i]) / (2 * padding + 1) * desired_size y = (padding + mean_face_shape_y[i]) / (2 * padding + 1) * desired_size to_points.append([x, y]) from_points.append([shape[2*i], shape[2*i+1]]) # convert the points to Mat from_mat = self.list2colmatrix(from_points) to_mat = self.list2colmatrix(to_points) # compute the similar transfrom tran_m, tran_b = self.find_tfrom_between_shapes(from_mat, to_mat) probe_vec = np.matrix([1.0, 0.0]).transpose() probe_vec = tran_m * probe_vec scale = np.linalg.norm(probe_vec) angle = 180.0 / math.pi * math.atan2(probe_vec[1, 0], probe_vec[0, 0]) from_center = [(shape[0]+shape[2])/2.0, (shape[1]+shape[3])/2.0] to_center = [0, 0] to_center[1] = desired_size * 0.4 to_center[0] = desired_size * 0.5 ex = to_center[0] - from_center[0] ey = to_center[1] - from_center[1] rot_mat = cv2.getRotationMatrix2D((from_center[0], from_center[1]), -1*angle, scale) rot_mat[0][2] += ex rot_mat[1][2] += ey chips = cv2.warpAffine(img, rot_mat, (desired_size, desired_size)) crop_imgs.append(chips) return crop_imgs