Repository: YYuanAnyVision/mxnet_mtcnn_face_detection
Branch: master
Commit: b56065418b63
Files: 21
Total size: 98.5 KB
Directory structure:
gitextract_cj2cvrn1/
├── .gitignore
├── README.md
├── helper.py
├── main.py
├── model/
│ ├── det1-0001.params
│ ├── det1-symbol.json
│ ├── det1.caffemodel
│ ├── det1.prototxt
│ ├── det2-0001.params
│ ├── det2-symbol.json
│ ├── det2.caffemodel
│ ├── det2.prototxt
│ ├── det3-0001.params
│ ├── det3-symbol.json
│ ├── det3.caffemodel
│ ├── det3.prototxt
│ ├── det4-0001.params
│ ├── det4-symbol.json
│ ├── det4.caffemodel
│ └── det4.prototxt
└── mtcnn_detector.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
*.pyc
caffe_converter
.idea
================================================
FILE: README.md
================================================
# MTCNN_face_detection_and_alignment
## About
This is a python/mxnet implementation of [Zhang](https://kpzhang93.github.io/)'s work **<Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Neural Networks>**. it's fast and accurate, see [link](https://github.com/kpzhang93/MTCNN_face_detection_alignment).
It should have **almost** the same output with the original work, for mxnet fans and those can't afford matlab :)
[中文blog](https://pangyupo.github.io/2016/10/22/mxnet-mtcnn/)
## Requirement
- opencv
I use cv2 for image io and resize(much faster than skimage), the input image's channel is acutally BGR
- mxnet
**please update to the newest version, we need 'full' mode in Pooling operation**
Only tested on Linux and Mac
## Test
run:
``python main.py``
you can change `ctx` to `mx.gpu(0)` for faster detection
--- update 20161028 ---
by setting ``num_worker=4`` ``accurate_landmark=False`` we can reduce the detection time by 1/4-1/3, the bboxes are still the same, but we skip the last landmark fine-tune stage( mtcnn_v1 ).
--- update 20161207 ---
add function `extract_face_chips`, examples:




see `mtcnn_detector.py` for the details about the parameters. this function use [dlib](http://dlib.net/)'s align strategy, which works well on profile images :)
## Results

## License
MIT LICENSE
## Reference
K. Zhang and Z. Zhang and Z. Li and Y. Qiao Joint, Face Detection and Alignment Using Multitask Cascaded Convolutional Networks, IEEE Signal Processing Letters
================================================
FILE: helper.py
================================================
# coding: utf-8
# YuanYang
import math
import cv2
import numpy as np
def nms(boxes, overlap_threshold, mode='Union'):
"""
non max suppression
Parameters:
----------
box: numpy array n x 5
input bbox array
overlap_threshold: float number
threshold of overlap
mode: float number
how to compute overlap ratio, 'Union' or 'Min'
Returns:
-------
index array of the selected bbox
"""
# if there are no boxes, return an empty list
if len(boxes) == 0:
return []
# if the bounding boxes integers, convert them to floats
if boxes.dtype.kind == "i":
boxes = boxes.astype("float")
# initialize the list of picked indexes
pick = []
# grab the coordinates of the bounding boxes
x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)]
area = (x2 - x1 + 1) * (y2 - y1 + 1)
idxs = np.argsort(score)
# keep looping while some indexes still remain in the indexes list
while len(idxs) > 0:
# grab the last index in the indexes list and add the index value to the list of picked indexes
last = len(idxs) - 1
i = idxs[last]
pick.append(i)
xx1 = np.maximum(x1[i], x1[idxs[:last]])
yy1 = np.maximum(y1[i], y1[idxs[:last]])
xx2 = np.minimum(x2[i], x2[idxs[:last]])
yy2 = np.minimum(y2[i], y2[idxs[:last]])
# compute the width and height of the bounding box
w = np.maximum(0, xx2 - xx1 + 1)
h = np.maximum(0, yy2 - yy1 + 1)
inter = w * h
if mode == 'Min':
overlap = inter / np.minimum(area[i], area[idxs[:last]])
else:
overlap = inter / (area[i] + area[idxs[:last]] - inter)
# delete all indexes from the index list that have
idxs = np.delete(idxs, np.concatenate(([last],
np.where(overlap > overlap_threshold)[0])))
return pick
def adjust_input(in_data):
"""
adjust the input from (h, w, c) to ( 1, c, h, w) for network input
Parameters:
----------
in_data: numpy array of shape (h, w, c)
input data
Returns:
-------
out_data: numpy array of shape (1, c, h, w)
reshaped array
"""
if in_data.dtype is not np.dtype('float32'):
out_data = in_data.astype(np.float32)
else:
out_data = in_data
out_data = out_data.transpose((2,0,1))
out_data = np.expand_dims(out_data, 0)
out_data = (out_data - 127.5)*0.0078125
return out_data
def generate_bbox(map, reg, scale, threshold):
"""
generate bbox from feature map
Parameters:
----------
map: numpy array , n x m x 1
detect score for each position
reg: numpy array , n x m x 4
bbox
scale: float number
scale of this detection
threshold: float number
detect threshold
Returns:
-------
bbox array
"""
stride = 2
cellsize = 12
t_index = np.where(map>threshold)
# find nothing
if t_index[0].size == 0:
return np.array([])
dx1, dy1, dx2, dy2 = [reg[0, i, t_index[0], t_index[1]] for i in range(4)]
reg = np.array([dx1, dy1, dx2, dy2])
score = map[t_index[0], t_index[1]]
boundingbox = np.vstack([np.round((stride*t_index[1]+1)/scale),
np.round((stride*t_index[0]+1)/scale),
np.round((stride*t_index[1]+1+cellsize)/scale),
np.round((stride*t_index[0]+1+cellsize)/scale),
score,
reg])
return boundingbox.T
def detect_first_stage(img, net, scale, threshold):
"""
run PNet for first stage
Parameters:
----------
img: numpy array, bgr order
input image
scale: float number
how much should the input image scale
net: PNet
worker
Returns:
-------
total_boxes : bboxes
"""
height, width, _ = img.shape
hs = int(math.ceil(height * scale))
ws = int(math.ceil(width * scale))
im_data = cv2.resize(img, (ws,hs))
# adjust for the network input
input_buf = adjust_input(im_data)
output = net.predict(input_buf)
boxes = generate_bbox(output[1][0,1,:,:], output[0], scale, threshold)
if boxes.size == 0:
return None
# nms
pick = nms(boxes[:,0:5], 0.5, mode='Union')
boxes = boxes[pick]
return boxes
def detect_first_stage_warpper( args ):
return detect_first_stage(*args)
================================================
FILE: main.py
================================================
# coding: utf-8
import mxnet as mx
from mtcnn_detector import MtcnnDetector
import cv2
import os
import time
detector = MtcnnDetector(model_folder='model', ctx=mx.cpu(0), num_worker = 4 , accurate_landmark = False)
img = cv2.imread('test2.jpg')
# run detector
results = detector.detect_face(img)
if results is not None:
total_boxes = results[0]
points = results[1]
# extract aligned face chips
chips = detector.extract_image_chips(img, points, 144, 0.37)
for i, chip in enumerate(chips):
cv2.imshow('chip_'+str(i), chip)
cv2.imwrite('chip_'+str(i)+'.png', chip)
draw = img.copy()
for b in total_boxes:
cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (255, 255, 255))
for p in points:
for i in range(5):
cv2.circle(draw, (p[i], p[i + 5]), 1, (0, 0, 255), 2)
cv2.imshow("detection result", draw)
cv2.waitKey(0)
# --------------
# test on camera
# --------------
'''
camera = cv2.VideoCapture(0)
while True:
grab, frame = camera.read()
img = cv2.resize(frame, (320,180))
t1 = time.time()
results = detector.detect_face(img)
print 'time: ',time.time() - t1
if results is None:
continue
total_boxes = results[0]
points = results[1]
draw = img.copy()
for b in total_boxes:
cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (255, 255, 255))
for p in points:
for i in range(5):
cv2.circle(draw, (p[i], p[i + 5]), 1, (255, 0, 0), 2)
cv2.imshow("detection result", draw)
cv2.waitKey(30)
'''
================================================
FILE: model/det1-symbol.json
================================================
{
"nodes": [
{
"op": "null",
"param": {},
"name": "data",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv1_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv1_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "10",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv1",
"inputs": [[0, 0], [1, 0], [2, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu1_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu1",
"inputs": [[3, 0], [4, 0]],
"backward_source_id": -1
},
{
"op": "Pooling",
"param": {
"global_pool": "False",
"kernel": "(2,2)",
"pad": "(0,0)",
"pool_type": "max",
"pooling_convention": "full",
"stride": "(2,2)"
},
"name": "pool1",
"inputs": [[5, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv2_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv2_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "16",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv2",
"inputs": [[6, 0], [7, 0], [8, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu2_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu2",
"inputs": [[9, 0], [10, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv3_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv3_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "32",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv3",
"inputs": [[11, 0], [12, 0], [13, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu3_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu3",
"inputs": [[14, 0], [15, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv4_2_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv4_2_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(1,1)",
"no_bias": "False",
"num_filter": "4",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv4_2",
"inputs": [[16, 0], [17, 0], [18, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv4_1_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv4_1_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(1,1)",
"no_bias": "False",
"num_filter": "2",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv4_1",
"inputs": [[16, 0], [20, 0], [21, 0]],
"backward_source_id": -1
},
{
"op": "SoftmaxActivation",
"param": {"mode": "channel"},
"name": "prob1",
"inputs": [[22, 0]],
"backward_source_id": -1
}
],
"arg_nodes": [
0,
1,
2,
4,
7,
8,
10,
12,
13,
15,
17,
18,
20,
21
],
"heads": [[19, 0], [23, 0]]
}
================================================
FILE: model/det1.prototxt
================================================
name: "PNet"
input: "data"
input_dim: 1
input_dim: 3
input_dim: 12
input_dim: 12
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 10
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "PReLU1"
type: "PReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 16
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "PReLU2"
type: "PReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "conv3"
type: "Convolution"
bottom: "conv2"
top: "conv3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 32
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "PReLU3"
type: "PReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "conv4-1"
type: "Convolution"
bottom: "conv3"
top: "conv4-1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 2
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv4-2"
type: "Convolution"
bottom: "conv3"
top: "conv4-2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 4
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prob1"
type: "Softmax"
bottom: "conv4-1"
top: "prob1"
}
================================================
FILE: model/det2-symbol.json
================================================
{
"nodes": [
{
"op": "null",
"param": {},
"name": "data",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv1_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv1_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "28",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv1",
"inputs": [[0, 0], [1, 0], [2, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu1_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu1",
"inputs": [[3, 0], [4, 0]],
"backward_source_id": -1
},
{
"op": "Pooling",
"param": {
"global_pool": "False",
"kernel": "(3,3)",
"pad": "(0,0)",
"pool_type": "max",
"pooling_convention": "full",
"stride": "(2,2)"
},
"name": "pool1",
"inputs": [[5, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv2_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv2_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "48",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv2",
"inputs": [[6, 0], [7, 0], [8, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu2_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu2",
"inputs": [[9, 0], [10, 0]],
"backward_source_id": -1
},
{
"op": "Pooling",
"param": {
"global_pool": "False",
"kernel": "(3,3)",
"pad": "(0,0)",
"pool_type": "max",
"pooling_convention": "full",
"stride": "(2,2)"
},
"name": "pool2",
"inputs": [[11, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv3_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv3_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(2,2)",
"no_bias": "False",
"num_filter": "64",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv3",
"inputs": [[12, 0], [13, 0], [14, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu3_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu3",
"inputs": [[15, 0], [16, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv4_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv4_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "128"
},
"name": "conv4",
"inputs": [[17, 0], [18, 0], [19, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu4_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu4",
"inputs": [[20, 0], [21, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv5_2_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv5_2_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "4"
},
"name": "conv5_2",
"inputs": [[22, 0], [23, 0], [24, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv5_1_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv5_1_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "2"
},
"name": "conv5_1",
"inputs": [[22, 0], [26, 0], [27, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prob1_label",
"inputs": [],
"backward_source_id": -1
},
{
"op": "SoftmaxOutput",
"param": {
"grad_scale": "1",
"ignore_label": "-1",
"multi_output": "False",
"normalization": "null",
"use_ignore": "False"
},
"name": "prob1",
"inputs": [[28, 0], [29, 0]],
"backward_source_id": -1
}
],
"arg_nodes": [
0,
1,
2,
4,
7,
8,
10,
13,
14,
16,
18,
19,
21,
23,
24,
26,
27,
29
],
"heads": [[25, 0], [30, 0]]
}
================================================
FILE: model/det2.prototxt
================================================
name: "RNet"
input: "data"
input_dim: 1
input_dim: 3
input_dim: 24
input_dim: 24
##########################
######################
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 28
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu1"
type: "PReLU"
bottom: "conv1"
top: "conv1"
propagate_down: true
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 48
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu2"
type: "PReLU"
bottom: "conv2"
top: "conv2"
propagate_down: true
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
####################################
##################################
layer {
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 64
kernel_size: 2
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu3"
type: "PReLU"
bottom: "conv3"
top: "conv3"
propagate_down: true
}
###############################
###############################
layer {
name: "conv4"
type: "InnerProduct"
bottom: "conv3"
top: "conv4"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
inner_product_param {
num_output: 128
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu4"
type: "PReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "conv5-1"
type: "InnerProduct"
bottom: "conv4"
top: "conv5-1"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
inner_product_param {
num_output: 2
#kernel_size: 1
#stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv5-2"
type: "InnerProduct"
bottom: "conv4"
top: "conv5-2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
inner_product_param {
num_output: 4
#kernel_size: 1
#stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prob1"
type: "Softmax"
bottom: "conv5-1"
top: "prob1"
}
================================================
FILE: model/det3-symbol.json
================================================
{
"nodes": [
{
"op": "null",
"param": {},
"name": "data",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv1_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv1_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "32",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv1",
"inputs": [[0, 0], [1, 0], [2, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu1_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu1",
"inputs": [[3, 0], [4, 0]],
"backward_source_id": -1
},
{
"op": "Pooling",
"param": {
"global_pool": "False",
"kernel": "(3,3)",
"pad": "(0,0)",
"pool_type": "max",
"pooling_convention": "full",
"stride": "(2,2)"
},
"name": "pool1",
"inputs": [[5, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv2_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv2_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "64",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv2",
"inputs": [[6, 0], [7, 0], [8, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu2_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu2",
"inputs": [[9, 0], [10, 0]],
"backward_source_id": -1
},
{
"op": "Pooling",
"param": {
"global_pool": "False",
"kernel": "(3,3)",
"pad": "(0,0)",
"pool_type": "max",
"pooling_convention": "full",
"stride": "(2,2)"
},
"name": "pool2",
"inputs": [[11, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv3_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv3_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "64",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv3",
"inputs": [[12, 0], [13, 0], [14, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu3_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu3",
"inputs": [[15, 0], [16, 0]],
"backward_source_id": -1
},
{
"op": "Pooling",
"param": {
"global_pool": "False",
"kernel": "(2,2)",
"pad": "(0,0)",
"pool_type": "max",
"pooling_convention": "full",
"stride": "(2,2)"
},
"name": "pool3",
"inputs": [[17, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv4_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv4_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(2,2)",
"no_bias": "False",
"num_filter": "128",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv4",
"inputs": [[18, 0], [19, 0], [20, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu4_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu4",
"inputs": [[21, 0], [22, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv5_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv5_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "256"
},
"name": "conv5",
"inputs": [[23, 0], [24, 0], [25, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu5_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu5",
"inputs": [[26, 0], [27, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv6_3_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv6_3_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "10"
},
"name": "conv6_3",
"inputs": [[28, 0], [29, 0], [30, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv6_2_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv6_2_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "4"
},
"name": "conv6_2",
"inputs": [[28, 0], [32, 0], [33, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv6_1_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv6_1_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "2"
},
"name": "conv6_1",
"inputs": [[28, 0], [35, 0], [36, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prob1_label",
"inputs": [],
"backward_source_id": -1
},
{
"op": "SoftmaxOutput",
"param": {
"grad_scale": "1",
"ignore_label": "-1",
"multi_output": "False",
"normalization": "null",
"use_ignore": "False"
},
"name": "prob1",
"inputs": [[37, 0], [38, 0]],
"backward_source_id": -1
}
],
"arg_nodes": [
0,
1,
2,
4,
7,
8,
10,
13,
14,
16,
19,
20,
22,
24,
25,
27,
29,
30,
32,
33,
35,
36,
38
],
"heads": [[31, 0], [34, 0], [39, 0]]
}
================================================
FILE: model/det3.prototxt
================================================
name: "ONet"
input: "data"
input_dim: 1
input_dim: 3
input_dim: 48
input_dim: 48
##################################
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
convolution_param {
num_output: 32
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu1"
type: "PReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
convolution_param {
num_output: 64
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu2"
type: "PReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
convolution_param {
num_output: 64
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu3"
type: "PReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv4"
type: "Convolution"
bottom: "pool3"
top: "conv4"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
convolution_param {
num_output: 128
kernel_size: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu4"
type: "PReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "conv5"
type: "InnerProduct"
bottom: "conv4"
top: "conv5"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
inner_product_param {
#kernel_size: 3
num_output: 256
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "drop5"
type: "Dropout"
bottom: "conv5"
top: "conv5"
dropout_param {
dropout_ratio: 0.25
}
}
layer {
name: "prelu5"
type: "PReLU"
bottom: "conv5"
top: "conv5"
}
layer {
name: "conv6-1"
type: "InnerProduct"
bottom: "conv5"
top: "conv6-1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
inner_product_param {
#kernel_size: 1
num_output: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv6-2"
type: "InnerProduct"
bottom: "conv5"
top: "conv6-2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
inner_product_param {
#kernel_size: 1
num_output: 4
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv6-3"
type: "InnerProduct"
bottom: "conv5"
top: "conv6-3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
inner_product_param {
#kernel_size: 1
num_output: 10
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prob1"
type: "Softmax"
bottom: "conv6-1"
top: "prob1"
}
================================================
FILE: model/det4-symbol.json
================================================
{
"nodes": [
{
"op": "null",
"param": {},
"name": "data",
"inputs": [],
"backward_source_id": -1
},
{
"op": "SliceChannel",
"param": {
"axis": "1",
"num_outputs": "5",
"squeeze_axis": "False"
},
"name": "slice",
"inputs": [[0, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv1_1_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv1_1_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "28",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv1_1",
"inputs": [[1, 0], [2, 0], [3, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu1_1_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu1_1",
"inputs": [[4, 0], [5, 0]],
"backward_source_id": -1
},
{
"op": "Pooling",
"param": {
"global_pool": "False",
"kernel": "(3,3)",
"pad": "(0,0)",
"pool_type": "max",
"pooling_convention": "full",
"stride": "(2,2)"
},
"name": "pool1_1",
"inputs": [[6, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv2_1_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv2_1_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "48",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv2_1",
"inputs": [[7, 0], [8, 0], [9, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu2_1_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu2_1",
"inputs": [[10, 0], [11, 0]],
"backward_source_id": -1
},
{
"op": "Pooling",
"param": {
"global_pool": "False",
"kernel": "(3,3)",
"pad": "(0,0)",
"pool_type": "max",
"pooling_convention": "full",
"stride": "(2,2)"
},
"name": "pool2_1",
"inputs": [[12, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv3_1_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv3_1_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(2,2)",
"no_bias": "False",
"num_filter": "64",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv3_1",
"inputs": [[13, 0], [14, 0], [15, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu3_1_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu3_1",
"inputs": [[16, 0], [17, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv1_2_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv1_2_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "28",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv1_2",
"inputs": [[1, 1], [19, 0], [20, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu1_2_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu1_2",
"inputs": [[21, 0], [22, 0]],
"backward_source_id": -1
},
{
"op": "Pooling",
"param": {
"global_pool": "False",
"kernel": "(3,3)",
"pad": "(0,0)",
"pool_type": "max",
"pooling_convention": "full",
"stride": "(2,2)"
},
"name": "pool1_2",
"inputs": [[23, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv2_2_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv2_2_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "48",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv2_2",
"inputs": [[24, 0], [25, 0], [26, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu2_2_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu2_2",
"inputs": [[27, 0], [28, 0]],
"backward_source_id": -1
},
{
"op": "Pooling",
"param": {
"global_pool": "False",
"kernel": "(3,3)",
"pad": "(0,0)",
"pool_type": "max",
"pooling_convention": "full",
"stride": "(2,2)"
},
"name": "pool2_2",
"inputs": [[29, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv3_2_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv3_2_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(2,2)",
"no_bias": "False",
"num_filter": "64",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv3_2",
"inputs": [[30, 0], [31, 0], [32, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu3_2_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu3_2",
"inputs": [[33, 0], [34, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv1_3_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv1_3_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "28",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv1_3",
"inputs": [[1, 2], [36, 0], [37, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu1_3_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu1_3",
"inputs": [[38, 0], [39, 0]],
"backward_source_id": -1
},
{
"op": "Pooling",
"param": {
"global_pool": "False",
"kernel": "(3,3)",
"pad": "(0,0)",
"pool_type": "max",
"pooling_convention": "full",
"stride": "(2,2)"
},
"name": "pool1_3",
"inputs": [[40, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv2_3_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv2_3_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "48",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv2_3",
"inputs": [[41, 0], [42, 0], [43, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu2_3_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu2_3",
"inputs": [[44, 0], [45, 0]],
"backward_source_id": -1
},
{
"op": "Pooling",
"param": {
"global_pool": "False",
"kernel": "(3,3)",
"pad": "(0,0)",
"pool_type": "max",
"pooling_convention": "full",
"stride": "(2,2)"
},
"name": "pool2_3",
"inputs": [[46, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv3_3_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv3_3_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(2,2)",
"no_bias": "False",
"num_filter": "64",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv3_3",
"inputs": [[47, 0], [48, 0], [49, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu3_3_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu3_3",
"inputs": [[50, 0], [51, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv1_4_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv1_4_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "28",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv1_4",
"inputs": [[1, 3], [53, 0], [54, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu1_4_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu1_4",
"inputs": [[55, 0], [56, 0]],
"backward_source_id": -1
},
{
"op": "Pooling",
"param": {
"global_pool": "False",
"kernel": "(3,3)",
"pad": "(0,0)",
"pool_type": "max",
"pooling_convention": "full",
"stride": "(2,2)"
},
"name": "pool1_4",
"inputs": [[57, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv2_4_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv2_4_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "48",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv2_4",
"inputs": [[58, 0], [59, 0], [60, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu2_4_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu2_4",
"inputs": [[61, 0], [62, 0]],
"backward_source_id": -1
},
{
"op": "Pooling",
"param": {
"global_pool": "False",
"kernel": "(3,3)",
"pad": "(0,0)",
"pool_type": "max",
"pooling_convention": "full",
"stride": "(2,2)"
},
"name": "pool2_4",
"inputs": [[63, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv3_4_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv3_4_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(2,2)",
"no_bias": "False",
"num_filter": "64",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv3_4",
"inputs": [[64, 0], [65, 0], [66, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu3_4_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu3_4",
"inputs": [[67, 0], [68, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv1_5_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv1_5_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "28",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv1_5",
"inputs": [[1, 4], [70, 0], [71, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu1_5_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu1_5",
"inputs": [[72, 0], [73, 0]],
"backward_source_id": -1
},
{
"op": "Pooling",
"param": {
"global_pool": "False",
"kernel": "(3,3)",
"pad": "(0,0)",
"pool_type": "max",
"pooling_convention": "full",
"stride": "(2,2)"
},
"name": "pool1_5",
"inputs": [[74, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv2_5_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv2_5_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "48",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv2_5",
"inputs": [[75, 0], [76, 0], [77, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu2_5_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu2_5",
"inputs": [[78, 0], [79, 0]],
"backward_source_id": -1
},
{
"op": "Pooling",
"param": {
"global_pool": "False",
"kernel": "(3,3)",
"pad": "(0,0)",
"pool_type": "max",
"pooling_convention": "full",
"stride": "(2,2)"
},
"name": "pool2_5",
"inputs": [[80, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv3_5_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv3_5_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(2,2)",
"no_bias": "False",
"num_filter": "64",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv3_5",
"inputs": [[81, 0], [82, 0], [83, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu3_5_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu3_5",
"inputs": [[84, 0], [85, 0]],
"backward_source_id": -1
},
{
"op": "Concat",
"param": {
"dim": "1",
"num_args": "5"
},
"name": "concat",
"inputs": [[18, 0], [35, 0], [52, 0], [69, 0], [86, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "fc4_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "fc4_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "256"
},
"name": "fc4",
"inputs": [[87, 0], [88, 0], [89, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu4_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu4",
"inputs": [[90, 0], [91, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "fc4_1_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "fc4_1_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "64"
},
"name": "fc4_1",
"inputs": [[92, 0], [93, 0], [94, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu4_1_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu4_1",
"inputs": [[95, 0], [96, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "fc5_1_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "fc5_1_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "2"
},
"name": "fc5_1",
"inputs": [[97, 0], [98, 0], [99, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "fc4_2_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "fc4_2_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "64"
},
"name": "fc4_2",
"inputs": [[92, 0], [101, 0], [102, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu4_2_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu4_2",
"inputs": [[103, 0], [104, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "fc5_2_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "fc5_2_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "2"
},
"name": "fc5_2",
"inputs": [[105, 0], [106, 0], [107, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "fc4_3_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "fc4_3_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "64"
},
"name": "fc4_3",
"inputs": [[92, 0], [109, 0], [110, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu4_3_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu4_3",
"inputs": [[111, 0], [112, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "fc5_3_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "fc5_3_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "2"
},
"name": "fc5_3",
"inputs": [[113, 0], [114, 0], [115, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "fc4_4_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "fc4_4_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "64"
},
"name": "fc4_4",
"inputs": [[92, 0], [117, 0], [118, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu4_4_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu4_4",
"inputs": [[119, 0], [120, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "fc5_4_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "fc5_4_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "2"
},
"name": "fc5_4",
"inputs": [[121, 0], [122, 0], [123, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "fc4_5_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "fc4_5_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "64"
},
"name": "fc4_5",
"inputs": [[92, 0], [125, 0], [126, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu4_5_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu4_5",
"inputs": [[127, 0], [128, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "fc5_5_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "fc5_5_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "2"
},
"name": "fc5_5",
"inputs": [[129, 0], [130, 0], [131, 0]],
"backward_source_id": -1
}
],
"arg_nodes": [
0,
2,
3,
5,
8,
9,
11,
14,
15,
17,
19,
20,
22,
25,
26,
28,
31,
32,
34,
36,
37,
39,
42,
43,
45,
48,
49,
51,
53,
54,
56,
59,
60,
62,
65,
66,
68,
70,
71,
73,
76,
77,
79,
82,
83,
85,
88,
89,
91,
93,
94,
96,
98,
99,
101,
102,
104,
106,
107,
109,
110,
112,
114,
115,
117,
118,
120,
122,
123,
125,
126,
128,
130,
131
],
"heads": [[100, 0], [108, 0], [116, 0], [124, 0], [132, 0]]
}
================================================
FILE: model/det4.prototxt
================================================
name: "LNet"
input: "data"
input_dim: 1
input_dim: 15
input_dim: 24
input_dim: 24
layer {
name: "slicer_data"
type: "Slice"
bottom: "data"
top: "data241"
top: "data242"
top: "data243"
top: "data244"
top: "data245"
slice_param {
axis: 1
slice_point: 3
slice_point: 6
slice_point: 9
slice_point: 12
}
}
layer {
name: "conv1_1"
type: "Convolution"
bottom: "data241"
top: "conv1_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
convolution_param {
num_output: 28
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu1_1"
type: "PReLU"
bottom: "conv1_1"
top: "conv1_1"
}
layer {
name: "pool1_1"
type: "Pooling"
bottom: "conv1_1"
top: "pool1_1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv2_1"
type: "Convolution"
bottom: "pool1_1"
top: "conv2_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
convolution_param {
num_output: 48
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu2_1"
type: "PReLU"
bottom: "conv2_1"
top: "conv2_1"
}
layer {
name: "pool2_1"
type: "Pooling"
bottom: "conv2_1"
top: "pool2_1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv3_1"
type: "Convolution"
bottom: "pool2_1"
top: "conv3_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
convolution_param {
num_output: 64
kernel_size: 2
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu3_1"
type: "PReLU"
bottom: "conv3_1"
top: "conv3_1"
}
##########################
layer {
name: "conv1_2"
type: "Convolution"
bottom: "data242"
top: "conv1_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
convolution_param {
num_output: 28
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu1_2"
type: "PReLU"
bottom: "conv1_2"
top: "conv1_2"
}
layer {
name: "pool1_2"
type: "Pooling"
bottom: "conv1_2"
top: "pool1_2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv2_2"
type: "Convolution"
bottom: "pool1_2"
top: "conv2_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
convolution_param {
num_output: 48
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu2_2"
type: "PReLU"
bottom: "conv2_2"
top: "conv2_2"
}
layer {
name: "pool2_2"
type: "Pooling"
bottom: "conv2_2"
top: "pool2_2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv3_2"
type: "Convolution"
bottom: "pool2_2"
top: "conv3_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
convolution_param {
num_output: 64
kernel_size: 2
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu3_2"
type: "PReLU"
bottom: "conv3_2"
top: "conv3_2"
}
##########################
##########################
layer {
name: "conv1_3"
type: "Convolution"
bottom: "data243"
top: "conv1_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
convolution_param {
num_output: 28
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu1_3"
type: "PReLU"
bottom: "conv1_3"
top: "conv1_3"
}
layer {
name: "pool1_3"
type: "Pooling"
bottom: "conv1_3"
top: "pool1_3"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv2_3"
type: "Convolution"
bottom: "pool1_3"
top: "conv2_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
convolution_param {
num_output: 48
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu2_3"
type: "PReLU"
bottom: "conv2_3"
top: "conv2_3"
}
layer {
name: "pool2_3"
type: "Pooling"
bottom: "conv2_3"
top: "pool2_3"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv3_3"
type: "Convolution"
bottom: "pool2_3"
top: "conv3_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
convolution_param {
num_output: 64
kernel_size: 2
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu3_3"
type: "PReLU"
bottom: "conv3_3"
top: "conv3_3"
}
##########################
##########################
layer {
name: "conv1_4"
type: "Convolution"
bottom: "data244"
top: "conv1_4"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
convolution_param {
num_output: 28
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu1_4"
type: "PReLU"
bottom: "conv1_4"
top: "conv1_4"
}
layer {
name: "pool1_4"
type: "Pooling"
bottom: "conv1_4"
top: "pool1_4"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv2_4"
type: "Convolution"
bottom: "pool1_4"
top: "conv2_4"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
convolution_param {
num_output: 48
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu2_4"
type: "PReLU"
bottom: "conv2_4"
top: "conv2_4"
}
layer {
name: "pool2_4"
type: "Pooling"
bottom: "conv2_4"
top: "pool2_4"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv3_4"
type: "Convolution"
bottom: "pool2_4"
top: "conv3_4"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
convolution_param {
num_output: 64
kernel_size: 2
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu3_4"
type: "PReLU"
bottom: "conv3_4"
top: "conv3_4"
}
##########################
##########################
layer {
name: "conv1_5"
type: "Convolution"
bottom: "data245"
top: "conv1_5"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
convolution_param {
num_output: 28
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu1_5"
type: "PReLU"
bottom: "conv1_5"
top: "conv1_5"
}
layer {
name: "pool1_5"
type: "Pooling"
bottom: "conv1_5"
top: "pool1_5"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv2_5"
type: "Convolution"
bottom: "pool1_5"
top: "conv2_5"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
convolution_param {
num_output: 48
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu2_5"
type: "PReLU"
bottom: "conv2_5"
top: "conv2_5"
}
layer {
name: "pool2_5"
type: "Pooling"
bottom: "conv2_5"
top: "pool2_5"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv3_5"
type: "Convolution"
bottom: "pool2_5"
top: "conv3_5"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
convolution_param {
num_output: 64
kernel_size: 2
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu3_5"
type: "PReLU"
bottom: "conv3_5"
top: "conv3_5"
}
##########################
layer {
name: "concat"
bottom: "conv3_1"
bottom: "conv3_2"
bottom: "conv3_3"
bottom: "conv3_4"
bottom: "conv3_5"
top: "conv3"
type: "Concat"
concat_param {
axis: 1
}
}
##########################
layer {
name: "fc4"
type: "InnerProduct"
bottom: "conv3"
top: "fc4"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
inner_product_param {
num_output: 256
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu4"
type: "PReLU"
bottom: "fc4"
top: "fc4"
}
############################
layer {
name: "fc4_1"
type: "InnerProduct"
bottom: "fc4"
top: "fc4_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
inner_product_param {
num_output: 64
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu4_1"
type: "PReLU"
bottom: "fc4_1"
top: "fc4_1"
}
layer {
name: "fc5_1"
type: "InnerProduct"
bottom: "fc4_1"
top: "fc5_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
inner_product_param {
num_output: 2
weight_filler {
type: "xavier"
#type: "constant"
#value: 0
}
bias_filler {
type: "constant"
value: 0
}
}
}
#########################
layer {
name: "fc4_2"
type: "InnerProduct"
bottom: "fc4"
top: "fc4_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
inner_product_param {
num_output: 64
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu4_2"
type: "PReLU"
bottom: "fc4_2"
top: "fc4_2"
}
layer {
name: "fc5_2"
type: "InnerProduct"
bottom: "fc4_2"
top: "fc5_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
inner_product_param {
num_output: 2
weight_filler {
type: "xavier"
#type: "constant"
#value: 0
}
bias_filler {
type: "constant"
value: 0
}
}
}
#########################
layer {
name: "fc4_3"
type: "InnerProduct"
bottom: "fc4"
top: "fc4_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
inner_product_param {
num_output: 64
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu4_3"
type: "PReLU"
bottom: "fc4_3"
top: "fc4_3"
}
layer {
name: "fc5_3"
type: "InnerProduct"
bottom: "fc4_3"
top: "fc5_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
inner_product_param {
num_output: 2
weight_filler {
type: "xavier"
#type: "constant"
#value: 0
}
bias_filler {
type: "constant"
value: 0
}
}
}
#########################
layer {
name: "fc4_4"
type: "InnerProduct"
bottom: "fc4"
top: "fc4_4"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
inner_product_param {
num_output: 64
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu4_4"
type: "PReLU"
bottom: "fc4_4"
top: "fc4_4"
}
layer {
name: "fc5_4"
type: "InnerProduct"
bottom: "fc4_4"
top: "fc5_4"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
inner_product_param {
num_output: 2
weight_filler {
type: "xavier"
#type: "constant"
#value: 0
}
bias_filler {
type: "constant"
value: 0
}
}
}
#########################
layer {
name: "fc4_5"
type: "InnerProduct"
bottom: "fc4"
top: "fc4_5"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
inner_product_param {
num_output: 64
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu4_5"
type: "PReLU"
bottom: "fc4_5"
top: "fc4_5"
}
layer {
name: "fc5_5"
type: "InnerProduct"
bottom: "fc4_5"
top: "fc5_5"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
inner_product_param {
num_output: 2
weight_filler {
type: "xavier"
#type: "constant"
#value: 0
}
bias_filler {
type: "constant"
value: 0
}
}
}
#########################
================================================
FILE: mtcnn_detector.py
================================================
# coding: utf-8
import os
import mxnet as mx
import numpy as np
import math
import cv2
from multiprocessing import Pool
from itertools import repeat
from itertools import izip
from helper import nms, adjust_input, generate_bbox, detect_first_stage_warpper
class MtcnnDetector(object):
"""
Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Neural Networks
see https://github.com/kpzhang93/MTCNN_face_detection_alignment
this is a mxnet version
"""
def __init__(self,
model_folder='.',
minsize = 20,
threshold = [0.6, 0.7, 0.8],
factor = 0.709,
num_worker = 1,
accurate_landmark = False,
ctx=mx.cpu()):
"""
Initialize the detector
Parameters:
----------
model_folder : string
path for the models
minsize : float number
minimal face to detect
threshold : float number
detect threshold for 3 stages
factor: float number
scale factor for image pyramid
num_worker: int number
number of processes we use for first stage
accurate_landmark: bool
use accurate landmark localization or not
"""
self.num_worker = num_worker
self.accurate_landmark = accurate_landmark
# load 4 models from folder
models = ['det1', 'det2', 'det3','det4']
models = [ os.path.join(model_folder, f) for f in models]
self.PNets = []
for i in range(num_worker):
workner_net = mx.model.FeedForward.load(models[0], 1, ctx=ctx)
self.PNets.append(workner_net)
self.Pool = Pool(num_worker)
self.RNet = mx.model.FeedForward.load(models[1], 1, ctx=ctx)
self.ONet = mx.model.FeedForward.load(models[2], 1, ctx=ctx)
self.LNet = mx.model.FeedForward.load(models[3], 1, ctx=ctx)
self.minsize = float(minsize)
self.factor = float(factor)
self.threshold = threshold
def convert_to_square(self, bbox):
"""
convert bbox to square
Parameters:
----------
bbox: numpy array , shape n x 5
input bbox
Returns:
-------
square bbox
"""
square_bbox = bbox.copy()
h = bbox[:, 3] - bbox[:, 1] + 1
w = bbox[:, 2] - bbox[:, 0] + 1
max_side = np.maximum(h,w)
square_bbox[:, 0] = bbox[:, 0] + w*0.5 - max_side*0.5
square_bbox[:, 1] = bbox[:, 1] + h*0.5 - max_side*0.5
square_bbox[:, 2] = square_bbox[:, 0] + max_side - 1
square_bbox[:, 3] = square_bbox[:, 1] + max_side - 1
return square_bbox
def calibrate_box(self, bbox, reg):
"""
calibrate bboxes
Parameters:
----------
bbox: numpy array, shape n x 5
input bboxes
reg: numpy array, shape n x 4
bboxex adjustment
Returns:
-------
bboxes after refinement
"""
w = bbox[:, 2] - bbox[:, 0] + 1
w = np.expand_dims(w, 1)
h = bbox[:, 3] - bbox[:, 1] + 1
h = np.expand_dims(h, 1)
reg_m = np.hstack([w, h, w, h])
aug = reg_m * reg
bbox[:, 0:4] = bbox[:, 0:4] + aug
return bbox
def pad(self, bboxes, w, h):
"""
pad the the bboxes, alse restrict the size of it
Parameters:
----------
bboxes: numpy array, n x 5
input bboxes
w: float number
width of the input image
h: float number
height of the input image
Returns :
------s
dy, dx : numpy array, n x 1
start point of the bbox in target image
edy, edx : numpy array, n x 1
end point of the bbox in target image
y, x : numpy array, n x 1
start point of the bbox in original image
ex, ex : numpy array, n x 1
end point of the bbox in original image
tmph, tmpw: numpy array, n x 1
height and width of the bbox
"""
tmpw, tmph = bboxes[:, 2] - bboxes[:, 0] + 1, bboxes[:, 3] - bboxes[:, 1] + 1
num_box = bboxes.shape[0]
dx , dy= np.zeros((num_box, )), np.zeros((num_box, ))
edx, edy = tmpw.copy()-1, tmph.copy()-1
x, y, ex, ey = bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 3]
tmp_index = np.where(ex > w-1)
edx[tmp_index] = tmpw[tmp_index] + w - 2 - ex[tmp_index]
ex[tmp_index] = w - 1
tmp_index = np.where(ey > h-1)
edy[tmp_index] = tmph[tmp_index] + h - 2 - ey[tmp_index]
ey[tmp_index] = h - 1
tmp_index = np.where(x < 0)
dx[tmp_index] = 0 - x[tmp_index]
x[tmp_index] = 0
tmp_index = np.where(y < 0)
dy[tmp_index] = 0 - y[tmp_index]
y[tmp_index] = 0
return_list = [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph]
return_list = [item.astype(np.int32) for item in return_list]
return return_list
def slice_index(self, number):
"""
slice the index into (n,n,m), m < n
Parameters:
----------
number: int number
number
"""
def chunks(l, n):
"""Yield successive n-sized chunks from l."""
for i in range(0, len(l), n):
yield l[i:i + n]
num_list = range(number)
return list(chunks(num_list, self.num_worker))
def detect_face(self, img):
"""
detect face over img
Parameters:
----------
img: numpy array, bgr order of shape (1, 3, n, m)
input image
Retures:
-------
bboxes: numpy array, n x 5 (x1,y2,x2,y2,score)
bboxes
points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5)
landmarks
"""
# check input
MIN_DET_SIZE = 12
if img is None:
return None
# only works for color image
if len(img.shape) != 3:
return None
# detected boxes
total_boxes = []
height, width, _ = img.shape
minl = min( height, width)
# get all the valid scales
scales = []
m = MIN_DET_SIZE/self.minsize
minl *= m
factor_count = 0
while minl > MIN_DET_SIZE:
scales.append(m*self.factor**factor_count)
minl *= self.factor
factor_count += 1
#############################################
# first stage
#############################################
#for scale in scales:
# return_boxes = self.detect_first_stage(img, scale, 0)
# if return_boxes is not None:
# total_boxes.append(return_boxes)
sliced_index = self.slice_index(len(scales))
total_boxes = []
for batch in sliced_index:
local_boxes = self.Pool.map( detect_first_stage_warpper, \
izip(repeat(img), self.PNets[:len(batch)], [scales[i] for i in batch], repeat(self.threshold[0])) )
total_boxes.extend(local_boxes)
# remove the Nones
total_boxes = [ i for i in total_boxes if i is not None]
if len(total_boxes) == 0:
return None
total_boxes = np.vstack(total_boxes)
if total_boxes.size == 0:
return None
# merge the detection from first stage
pick = nms(total_boxes[:, 0:5], 0.7, 'Union')
total_boxes = total_boxes[pick]
bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
# refine the bboxes
total_boxes = np.vstack([total_boxes[:, 0]+total_boxes[:, 5] * bbw,
total_boxes[:, 1]+total_boxes[:, 6] * bbh,
total_boxes[:, 2]+total_boxes[:, 7] * bbw,
total_boxes[:, 3]+total_boxes[:, 8] * bbh,
total_boxes[:, 4]
])
total_boxes = total_boxes.T
total_boxes = self.convert_to_square(total_boxes)
total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])
#############################################
# second stage
#############################################
num_box = total_boxes.shape[0]
# pad the bbox
[dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height)
# (3, 24, 24) is the input shape for RNet
input_buf = np.zeros((num_box, 3, 24, 24), dtype=np.float32)
for i in range(num_box):
tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :]
input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24)))
output = self.RNet.predict(input_buf)
# filter the total_boxes with threshold
passed = np.where(output[1][:, 1] > self.threshold[1])
total_boxes = total_boxes[passed]
if total_boxes.size == 0:
return None
total_boxes[:, 4] = output[1][passed, 1].reshape((-1,))
reg = output[0][passed]
# nms
pick = nms(total_boxes, 0.7, 'Union')
total_boxes = total_boxes[pick]
total_boxes = self.calibrate_box(total_boxes, reg[pick])
total_boxes = self.convert_to_square(total_boxes)
total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])
#############################################
# third stage
#############################################
num_box = total_boxes.shape[0]
# pad the bbox
[dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height)
# (3, 48, 48) is the input shape for ONet
input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32)
for i in range(num_box):
tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32)
tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :]
input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48)))
output = self.ONet.predict(input_buf)
# filter the total_boxes with threshold
passed = np.where(output[2][:, 1] > self.threshold[2])
total_boxes = total_boxes[passed]
if total_boxes.size == 0:
return None
total_boxes[:, 4] = output[2][passed, 1].reshape((-1,))
reg = output[1][passed]
points = output[0][passed]
# compute landmark points
bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
points[:, 0:5] = np.expand_dims(total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5]
points[:, 5:10] = np.expand_dims(total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10]
# nms
total_boxes = self.calibrate_box(total_boxes, reg)
pick = nms(total_boxes, 0.7, 'Min')
total_boxes = total_boxes[pick]
points = points[pick]
if not self.accurate_landmark:
return total_boxes, points
#############################################
# extended stage
#############################################
num_box = total_boxes.shape[0]
patchw = np.maximum(total_boxes[:, 2]-total_boxes[:, 0]+1, total_boxes[:, 3]-total_boxes[:, 1]+1)
patchw = np.round(patchw*0.25)
# make it even
patchw[np.where(np.mod(patchw,2) == 1)] += 1
input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32)
for i in range(5):
x, y = points[:, i], points[:, i+5]
x, y = np.round(x-0.5*patchw), np.round(y-0.5*patchw)
[dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(np.vstack([x, y, x+patchw-1, y+patchw-1]).T,
width,
height)
for j in range(num_box):
tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32)
tmpim[dy[j]:edy[j]+1, dx[j]:edx[j]+1, :] = img[y[j]:ey[j]+1, x[j]:ex[j]+1, :]
input_buf[j, i*3:i*3+3, :, :] = adjust_input(cv2.resize(tmpim, (24, 24)))
output = self.LNet.predict(input_buf)
pointx = np.zeros((num_box, 5))
pointy = np.zeros((num_box, 5))
for k in range(5):
# do not make a large movement
tmp_index = np.where(np.abs(output[k]-0.5) > 0.35)
output[k][tmp_index[0]] = 0.5
pointx[:, k] = np.round(points[:, k] - 0.5*patchw) + output[k][:, 0]*patchw
pointy[:, k] = np.round(points[:, k+5] - 0.5*patchw) + output[k][:, 1]*patchw
points = np.hstack([pointx, pointy])
points = points.astype(np.int32)
return total_boxes, points
def list2colmatrix(self, pts_list):
"""
convert list to column matrix
Parameters:
----------
pts_list:
input list
Retures:
-------
colMat:
"""
assert len(pts_list) > 0
colMat = []
for i in range(len(pts_list)):
colMat.append(pts_list[i][0])
colMat.append(pts_list[i][1])
colMat = np.matrix(colMat).transpose()
return colMat
def find_tfrom_between_shapes(self, from_shape, to_shape):
"""
find transform between shapes
Parameters:
----------
from_shape:
to_shape:
Retures:
-------
tran_m:
tran_b:
"""
assert from_shape.shape[0] == to_shape.shape[0] and from_shape.shape[0] % 2 == 0
sigma_from = 0.0
sigma_to = 0.0
cov = np.matrix([[0.0, 0.0], [0.0, 0.0]])
# compute the mean and cov
from_shape_points = from_shape.reshape(from_shape.shape[0]/2, 2)
to_shape_points = to_shape.reshape(to_shape.shape[0]/2, 2)
mean_from = from_shape_points.mean(axis=0)
mean_to = to_shape_points.mean(axis=0)
for i in range(from_shape_points.shape[0]):
temp_dis = np.linalg.norm(from_shape_points[i] - mean_from)
sigma_from += temp_dis * temp_dis
temp_dis = np.linalg.norm(to_shape_points[i] - mean_to)
sigma_to += temp_dis * temp_dis
cov += (to_shape_points[i].transpose() - mean_to.transpose()) * (from_shape_points[i] - mean_from)
sigma_from = sigma_from / to_shape_points.shape[0]
sigma_to = sigma_to / to_shape_points.shape[0]
cov = cov / to_shape_points.shape[0]
# compute the affine matrix
s = np.matrix([[1.0, 0.0], [0.0, 1.0]])
u, d, vt = np.linalg.svd(cov)
if np.linalg.det(cov) < 0:
if d[1] < d[0]:
s[1, 1] = -1
else:
s[0, 0] = -1
r = u * s * vt
c = 1.0
if sigma_from != 0:
c = 1.0 / sigma_from * np.trace(np.diag(d) * s)
tran_b = mean_to.transpose() - c * r * mean_from.transpose()
tran_m = c * r
return tran_m, tran_b
def extract_image_chips(self, img, points, desired_size=256, padding=0):
"""
crop and align face
Parameters:
----------
img: numpy array, bgr order of shape (1, 3, n, m)
input image
points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5)
desired_size: default 256
padding: default 0
Retures:
-------
crop_imgs: list, n
cropped and aligned faces
"""
crop_imgs = []
for p in points:
shape =[]
for k in range(len(p)/2):
shape.append(p[k])
shape.append(p[k+5])
if padding > 0:
padding = padding
else:
padding = 0
# average positions of face points
mean_face_shape_x = [0.224152, 0.75610125, 0.490127, 0.254149, 0.726104]
mean_face_shape_y = [0.2119465, 0.2119465, 0.628106, 0.780233, 0.780233]
from_points = []
to_points = []
for i in range(len(shape)/2):
x = (padding + mean_face_shape_x[i]) / (2 * padding + 1) * desired_size
y = (padding + mean_face_shape_y[i]) / (2 * padding + 1) * desired_size
to_points.append([x, y])
from_points.append([shape[2*i], shape[2*i+1]])
# convert the points to Mat
from_mat = self.list2colmatrix(from_points)
to_mat = self.list2colmatrix(to_points)
# compute the similar transfrom
tran_m, tran_b = self.find_tfrom_between_shapes(from_mat, to_mat)
probe_vec = np.matrix([1.0, 0.0]).transpose()
probe_vec = tran_m * probe_vec
scale = np.linalg.norm(probe_vec)
angle = 180.0 / math.pi * math.atan2(probe_vec[1, 0], probe_vec[0, 0])
from_center = [(shape[0]+shape[2])/2.0, (shape[1]+shape[3])/2.0]
to_center = [0, 0]
to_center[1] = desired_size * 0.4
to_center[0] = desired_size * 0.5
ex = to_center[0] - from_center[0]
ey = to_center[1] - from_center[1]
rot_mat = cv2.getRotationMatrix2D((from_center[0], from_center[1]), -1*angle, scale)
rot_mat[0][2] += ex
rot_mat[1][2] += ey
chips = cv2.warpAffine(img, rot_mat, (desired_size, desired_size))
crop_imgs.append(chips)
return crop_imgs
gitextract_cj2cvrn1/ ├── .gitignore ├── README.md ├── helper.py ├── main.py ├── model/ │ ├── det1-0001.params │ ├── det1-symbol.json │ ├── det1.caffemodel │ ├── det1.prototxt │ ├── det2-0001.params │ ├── det2-symbol.json │ ├── det2.caffemodel │ ├── det2.prototxt │ ├── det3-0001.params │ ├── det3-symbol.json │ ├── det3.caffemodel │ ├── det3.prototxt │ ├── det4-0001.params │ ├── det4-symbol.json │ ├── det4.caffemodel │ └── det4.prototxt └── mtcnn_detector.py
SYMBOL INDEX (15 symbols across 2 files)
FILE: helper.py
function nms (line 8) | def nms(boxes, overlap_threshold, mode='Union'):
function adjust_input (line 69) | def adjust_input(in_data):
function generate_bbox (line 92) | def generate_bbox(map, reg, scale, threshold):
function detect_first_stage (line 132) | def detect_first_stage(img, net, scale, threshold):
function detect_first_stage_warpper (line 167) | def detect_first_stage_warpper( args ):
FILE: mtcnn_detector.py
class MtcnnDetector (line 12) | class MtcnnDetector(object):
method __init__ (line 18) | def __init__(self,
method convert_to_square (line 68) | def convert_to_square(self, bbox):
method calibrate_box (line 92) | def calibrate_box(self, bbox, reg):
method pad (line 118) | def pad(self, bboxes, w, h):
method slice_index (line 173) | def slice_index(self, number):
method detect_face (line 189) | def detect_face(self, img):
method list2colmatrix (line 392) | def list2colmatrix(self, pts_list):
method find_tfrom_between_shapes (line 412) | def find_tfrom_between_shapes(self, from_shape, to_shape):
method extract_image_chips (line 466) | def extract_image_chips(self, img, points, desired_size=256, padding=0):
Condensed preview — 21 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (113K chars).
[
{
"path": ".gitignore",
"chars": 28,
"preview": "*.pyc\ncaffe_converter\n.idea\n"
},
{
"path": "README.md",
"chars": 1826,
"preview": "# MTCNN_face_detection_and_alignment\n\n## About\n\n This is a python/mxnet implementation of [Zhang](https://kpzhang93.git"
},
{
"path": "helper.py",
"chars": 4695,
"preview": "# coding: utf-8\n# YuanYang\nimport math\nimport cv2\nimport numpy as np\n\n\ndef nms(boxes, overlap_threshold, mode='Union'):\n"
},
{
"path": "main.py",
"chars": 1611,
"preview": "# coding: utf-8\nimport mxnet as mx\nfrom mtcnn_detector import MtcnnDetector\nimport cv2\nimport os\nimport time\n\ndetector ="
},
{
"path": "model/det1-symbol.json",
"chars": 5706,
"preview": "{\n \"nodes\": [\n {\n \"op\": \"null\", \n \"param\": {}, \n \"name\": \"data\", \n \"inputs\": [], \n \"backwar"
},
{
"path": "model/det1.prototxt",
"chars": 2353,
"preview": "name: \"PNet\"\ninput: \"data\"\ninput_dim: 1\ninput_dim: 3\ninput_dim: 12\ninput_dim: 12\n\nlayer {\n name: \"conv1\"\n type: \"Convo"
},
{
"path": "model/det2-symbol.json",
"chars": 6833,
"preview": "{\n \"nodes\": [\n {\n \"op\": \"null\", \n \"param\": {}, \n \"name\": \"data\", \n \"inputs\": [], \n \"backwar"
},
{
"path": "model/det2.prototxt",
"chars": 3159,
"preview": "name: \"RNet\"\ninput: \"data\"\ninput_dim: 1\ninput_dim: 3\ninput_dim: 24\ninput_dim: 24\n\n\n##########################\n##########"
},
{
"path": "model/det3-symbol.json",
"chars": 8870,
"preview": "{\n \"nodes\": [\n {\n \"op\": \"null\", \n \"param\": {}, \n \"name\": \"data\", \n \"inputs\": [], \n \"backwar"
},
{
"path": "model/det3.prototxt",
"chars": 3931,
"preview": "name: \"ONet\"\ninput: \"data\"\ninput_dim: 1\ninput_dim: 3\ninput_dim: 48\ninput_dim: 48\n##################################\nlaye"
},
{
"path": "model/det4-symbol.json",
"chars": 30068,
"preview": "{\n \"nodes\": [\n {\n \"op\": \"null\", \n \"param\": {}, \n \"name\": \"data\", \n \"inputs\": [], \n \"backwar"
},
{
"path": "model/det4.prototxt",
"chars": 13509,
"preview": "name: \"LNet\"\ninput: \"data\"\ninput_dim: 1\ninput_dim: 15\ninput_dim: 24\ninput_dim: 24\n\nlayer {\n name: \"slicer_data\"\n type:"
},
{
"path": "mtcnn_detector.py",
"chars": 18237,
"preview": "# coding: utf-8\nimport os\nimport mxnet as mx\nimport numpy as np\nimport math\nimport cv2\nfrom multiprocessing import Pool\n"
}
]
// ... and 8 more files (download for full content)
About this extraction
This page contains the full source code of the YYuanAnyVision/mxnet_mtcnn_face_detection GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 21 files (98.5 KB), approximately 34.4k tokens, and a symbol index with 15 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.