Full Code of szad670401/Fast-MTCNN for AI

master b2b48e8efb82 cached

12 files

35.9 KB

11.8k tokens

5 symbols

1 requests

Download .txt

Repository: szad670401/Fast-MTCNN
Branch: master
Commit: b2b48e8efb82
Files: 12
Total size: 35.9 KB

Directory structure:
gitextract_fjo1u_o6/

├── README.md
├── model/
│   ├── det1.prototxt
│   ├── det1_.caffemodel
│   ├── det1_.prototxt
│   ├── det1_half.caffemodel
│   ├── det2.prototxt
│   ├── det2_half.caffemodel
│   ├── det3-half.caffemodel
│   ├── det3-half.prototxt
│   ├── det3.caffemodel
│   └── det3.prototxt
└── mtcnn_opencv.cpp

================================================
FILE CONTENTS
================================================

================================================
FILE: README.md
================================================
# Fast-MTCNN

A casual work about retainining mtcnn Pnet and Onet. make it a little bit fast,which achiciving 100fps+ (1920*1080 minSize 60) at intel i7 6700k (st),but the accuracy is not so well.

## Dependencies

+ OpenCV 3.4.1 only

The demo base on [OpenCV](https://github.com/opencv/opencv) DNN module. my computer with Intel i7 6700k (st) can achicive 100fps+ (1920*1080 minSize 60)compiled with OpenBLAS (OpenCV 3.4.1) ,if you wanna achieve the better performance.you can compile with [Intel MKL-DNN Inference Engine package](https://github.com/opencv/opencv/wiki/Intel%27s-Deep-Learning-Inference-Engine-backend) to accelerate.

### Demo Image

![Screen Shot 2018-05-25 at 2.25.02 AM](images/test.png)

### TODO
+ Optimize PNet Rnet Onet with modern net desigin (bottleneck , depthwise conv ,inverted residual block...) .
+ Benchmark on FDDB.
+ Computing sharing to accelerate speed when the detected faces increased.


## Anthor

+ Jack Yu


================================================
FILE: model/det1.prototxt
================================================
name: "PNet"
input: "data"
input_dim: 1
input_dim: 3
input_dim: 12
input_dim: 12

layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 10
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "PReLU1"
  type: "PReLU"
  bottom: "conv1"
  top: "conv1"
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}

layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 16
    kernel_size: 3
    stride: 1
     weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "PReLU2"
  type: "PReLU"
  bottom: "conv2"
  top: "conv2"
}

layer {
  name: "conv3"
  type: "Convolution"
  bottom: "conv2"
  top: "conv3"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 32
    kernel_size: 3
    stride: 1
     weight_filler {
      type: "xavier"
    }
    bias_filler {
	  type: "constant"
      value: 0
    }
  }
}
layer {
  name: "PReLU3"
  type: "PReLU"
  bottom: "conv3"
  top: "conv3"
}


layer {
  name: "conv4-1"
  type: "Convolution"
  bottom: "conv3"
  top: "conv4-1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 2
    kernel_size: 1
    stride: 1
     weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}

layer {
  name: "conv4-2"
  type: "Convolution"
  bottom: "conv3"
  top: "conv4-2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 4
    kernel_size: 1
    stride: 1
     weight_filler {
      type: "xavier"
	}
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "prob1"
  type: "Softmax"
  bottom: "conv4-1"
  top: "prob1"
}


================================================
FILE: model/det1_.prototxt
================================================
input: "data"
input_dim: 1
input_dim: 3
input_dim: 12
input_dim: 12
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  convolution_param {
    num_output: 10
    bias_term: true
    pad_h: 0
    pad_w: 0
    kernel_h: 3
    kernel_w: 3
    stride_h: 1
    stride_w: 1
  }
}
layer {
  name: "batch_normalization_5"
  type: "BatchNorm"
  bottom: "conv1"
  top: "batch_normalization_5"
  batch_norm_param {
    moving_average_fraction: 0.990000009537
    eps: 0.0010000000475
  }
}
layer {
  name: "batch_normalization_5_scale"
  type: "Scale"
  bottom: "batch_normalization_5"
  top: "batch_normalization_5"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "prelu1"
  type: "PReLU"
  bottom: "batch_normalization_5"
  top: "prelu1"
}
layer {
  name: "max_pooling2d_3"
  type: "Pooling"
  bottom: "prelu1"
  top: "max_pooling2d_3"
  pooling_param {
    pool: MAX
    kernel_h: 2
    kernel_w: 2
    stride_h: 2
    stride_w: 2
    pad_h: 0
    pad_w: 0
  }
}
layer {
  name: "conv2_"
  type: "Convolution"
  bottom: "max_pooling2d_3"
  top: "conv2_"
  convolution_param {
    num_output: 14
    bias_term: true
    pad_h: 0
    pad_w: 0
    kernel_h: 3
    kernel_w: 3
    stride_h: 1
    stride_w: 1
  }
}
layer {
  name: "batch_normalization_6"
  type: "BatchNorm"
  bottom: "conv2_"
  top: "batch_normalization_6"
  batch_norm_param {
    moving_average_fraction: 0.990000009537
    eps: 0.0010000000475
  }
}
layer {
  name: "batch_normalization_6_scale"
  type: "Scale"
  bottom: "batch_normalization_6"
  top: "batch_normalization_6"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "prelu2"
  type: "PReLU"
  bottom: "batch_normalization_6"
  top: "prelu2"
}
layer {
  name: "conv3"
  type: "Convolution"
  bottom: "prelu2"
  top: "conv3"
  convolution_param {
    num_output: 16
    bias_term: true
    pad_h: 0
    pad_w: 0
    kernel_h: 3
    kernel_w: 3
    stride_h: 1
    stride_w: 1
  }
}
layer {
  name: "batch_normalization_7"
  type: "BatchNorm"
  bottom: "conv3"
  top: "batch_normalization_7"
  batch_norm_param {
    moving_average_fraction: 0.990000009537
    eps: 0.0010000000475
  }
}
layer {
  name: "batch_normalization_7_scale"
  type: "Scale"
  bottom: "batch_normalization_7"
  top: "batch_normalization_7"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "prelu3"
  type: "PReLU"
  bottom: "batch_normalization_7"
  top: "prelu3"
}
layer {
  name: "classifier1"
  type: "Convolution"
  bottom: "prelu3"
  top: "classifier1"
  convolution_param {
    num_output: 2
    bias_term: true
    pad_h: 0
    pad_w: 0
    kernel_h: 1
    kernel_w: 1
    stride_h: 1
    stride_w: 1
  }
}
layer {
  name: "prob1"
  type: "Softmax"
  bottom: "classifier1"
  top: "prob1"
}
layer {
  name: "conv4-2"
  type: "Convolution"
  bottom: "prelu3"
  top: "conv4-2"
  convolution_param {
    num_output: 4
    bias_term: true
    pad_h: 0
    pad_w: 0
    kernel_h: 1
    kernel_w: 1
    stride_h: 1
    stride_w: 1
  }
}


================================================
FILE: model/det2.prototxt
================================================
name: "RNet"
input: "data"
input_dim: 1
input_dim: 3
input_dim: 24
input_dim: 24

layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 0
    decay_mult: 0
  }
  param {
    lr_mult: 0
    decay_mult: 0
  }
  convolution_param {
    num_output: 28
    kernel_size: 3
    stride: 1
     weight_filler {
      type: "xavier"
	}
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "prelu1"
  type: "PReLU"
  bottom: "conv1"
  top: "conv1"
  propagate_down: true
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}

layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  param {
    lr_mult: 0
    decay_mult: 0
  }
  param {
    lr_mult: 0
    decay_mult: 0
  }
  convolution_param {
    num_output: 48
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
	}
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "prelu2"
  type: "PReLU"
  bottom: "conv2"
  top: "conv2"
  propagate_down: true
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
####################################

##################################
layer {
  name: "conv3"
  type: "Convolution"
  bottom: "pool2"
  top: "conv3"
  param {
    lr_mult: 0
    decay_mult: 0
  }
  param {
    lr_mult: 0
    decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 2
    stride: 1
    weight_filler {
      type: "xavier"
	}
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "prelu3"
  type: "PReLU"
  bottom: "conv3"
  top: "conv3"
  propagate_down: true
}
###############################

###############################

layer {
  name: "conv4"
  type: "InnerProduct"
  bottom: "conv3"
  top: "conv4"
  param {
    lr_mult: 0
    decay_mult: 0
  }
  param {
    lr_mult: 0
    decay_mult: 0
  }
  inner_product_param {
    num_output: 128
    weight_filler {
      type: "xavier"
	}
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "prelu4"
  type: "PReLU"
  bottom: "conv4"
  top: "conv4"
}

layer {
  name: "conv5-1"
  type: "InnerProduct"
  bottom: "conv4"
  top: "conv5-1"
  param {
    lr_mult: 0
    decay_mult: 0
  }
  param {
    lr_mult: 0
    decay_mult: 0
  }
  inner_product_param {
    num_output: 2
    #kernel_size: 1
    #stride: 1
    weight_filler {
      type: "xavier"
	}
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv5-2"
  type: "InnerProduct"
  bottom: "conv4"
  top: "conv5-2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 1
  }
  inner_product_param {
    num_output: 4
    #kernel_size: 1
    #stride: 1
     weight_filler {
      type: "xavier"
	}
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "prob1"
  type: "Softmax"
  bottom: "conv5-1"
  top: "prob1"
}

================================================
FILE: model/det3-half.prototxt
================================================
name: "ONet"
input: "data"
input_dim: 1
input_dim: 3
input_dim: 48
input_dim: 48

##################################
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 1
  }
  convolution_param {
    num_output: 16
    kernel_size: 3
    stride: 1
     weight_filler {
      type: "xavier"
	}
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "prelu1"
  type: "PReLU"
  bottom: "conv1"
  top: "conv1"
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 1
  }
  convolution_param {
    num_output: 32
    kernel_size: 3
    stride: 1
     weight_filler {
      type: "xavier"
	}
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}

layer {
  name: "prelu2"
  type: "PReLU"
  bottom: "conv2"
  top: "conv2"
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}

layer {
  name: "conv3"
  type: "Convolution"
  bottom: "pool2"
  top: "conv3"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 1
  }
  convolution_param {
	num_output: 32
	kernel_size: 3
    weight_filler {
      type: "xavier"
	}
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "prelu3"
  type: "PReLU"
  bottom: "conv3"
  top: "conv3"
}
layer {
  name: "pool3"
  type: "Pooling"
  bottom: "conv3"
  top: "pool3"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "conv4"
  type: "Convolution"
  bottom: "pool3"
  top: "conv4"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 1
  }
  convolution_param {
	num_output: 64
	kernel_size: 2
    weight_filler {
      type: "xavier"
	}
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "prelu4"
  type: "PReLU"
  bottom: "conv4"
  top: "conv4"
}


layer {
  name: "conv5"
  type: "InnerProduct"
  bottom: "conv4"
  top: "conv5"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 1
  }
  inner_product_param {
	#kernel_size: 3
	num_output: 128
    weight_filler {
      type: "xavier"
	}
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "prelu5"
 type: "PReLU"
  # type: "TanH"
  bottom: "conv5"
  top: "conv5"
}


layer {
  name: "conv6-1"
  type: "InnerProduct"
  bottom: "conv5"
  top: "conv6-1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 1
  }
  inner_product_param {
    #kernel_size: 1
	num_output: 2
    weight_filler {
      type: "xavier"
	}
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv6-2"
  type: "InnerProduct"
  bottom: "conv5"
  top: "conv6-2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 1
  }
  inner_product_param {
  	#kernel_size: 1
	num_output: 4
    weight_filler {
      type: "xavier"
	}
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv6-3"
  type: "InnerProduct"
  bottom: "conv5"
  top: "conv6-3"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 1
  }
  inner_product_param {
  	#kernel_size: 1
	num_output: 10
    weight_filler {
      type: "xavier"
	}
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
  
layer {
  name: "prob1"
  type: "Softmax"
  bottom: "conv6-1"
  top: "prob1"
}


================================================
FILE: model/det3.prototxt
================================================
name: "ONet"
input: "data"
input_dim: 1
input_dim: 3
input_dim: 48
input_dim: 48

##################################
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 1
  }
  convolution_param {
    num_output: 32
    kernel_size: 3
    stride: 1
     weight_filler {
      type: "xavier"
	}
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "prelu1"
  type: "PReLU"
  bottom: "conv1"
  top: "conv1"
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 1
  }
  convolution_param {
    num_output: 64
    kernel_size: 3
    stride: 1
     weight_filler {
      type: "xavier"
	}
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}

layer {
  name: "prelu2"
  type: "PReLU"
  bottom: "conv2"
  top: "conv2"
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}

layer {
  name: "conv3"
  type: "Convolution"
  bottom: "pool2"
  top: "conv3"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 1
  }
  convolution_param {
	num_output: 64
	kernel_size: 3
    weight_filler {
      type: "xavier"
	}
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "prelu3"
  type: "PReLU"
  bottom: "conv3"
  top: "conv3"
}
layer {
  name: "pool3"
  type: "Pooling"
  bottom: "conv3"
  top: "pool3"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "conv4"
  type: "Convolution"
  bottom: "pool3"
  top: "conv4"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 1
  }
  convolution_param {
	num_output: 128
	kernel_size: 2
    weight_filler {
      type: "xavier"
	}
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "prelu4"
  type: "PReLU"
  bottom: "conv4"
  top: "conv4"
}


layer {
  name: "conv5"
  type: "InnerProduct"
  bottom: "conv4"
  top: "conv5"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 1
  }
  inner_product_param {
	#kernel_size: 3
	num_output: 256
    weight_filler {
      type: "xavier"
	}
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}

layer {
  name: "drop5"
  type: "Dropout"
  bottom: "conv5"
  top: "conv5"
  dropout_param {
    dropout_ratio: 0.25
  }
}
layer {
  name: "prelu5"
 type: "PReLU"
  # type: "TanH"
  bottom: "conv5"
  top: "conv5"
}


layer {
  name: "conv6-1"
  type: "InnerProduct"
  bottom: "conv5"
  top: "conv6-1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 1
  }
  inner_product_param {
    #kernel_size: 1
	num_output: 2
    weight_filler {
      type: "xavier"
	}
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv6-2"
  type: "InnerProduct"
  bottom: "conv5"
  top: "conv6-2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 1
  }
  inner_product_param {
  	#kernel_size: 1
	num_output: 4
    weight_filler {
      type: "xavier"
	}
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv6-3"
  type: "InnerProduct"
  bottom: "conv5"
  top: "conv6-3"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 1
  }
  inner_product_param {
  	#kernel_size: 1
	num_output: 10
    weight_filler {
      type: "xavier"
	}
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
  
layer {
  name: "prob1"
  type: "Softmax"
  bottom: "conv6-1"
  top: "prob1"
}


================================================
FILE: mtcnn_opencv.cpp
================================================
//Created by Jack Yu
#include <fstream>
#include <iostream>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
using namespace std;
using namespace cv;

const float pnet_stride = 2;
const float pnet_cell_size = 12;
const int pnet_max_detect_num = 5000;
//mean & std
const float mean_val = 127.5f;
const float std_val = 0.0078125f;
//minibatch size
const int step_size = 128;


typedef struct FaceBox {
    float xmin;
    float ymin;
    float xmax;
    float ymax;
    float score;
} FaceBox;
typedef struct FaceInfo {
    float bbox_reg[4];
    float landmark_reg[10];
    float landmark[10];
    FaceBox bbox;
} FaceInfo;



class MTCNN {
public:
    MTCNN(const string& proto_model_dir);
    vector<FaceInfo> Detect_mtcnn(const cv::Mat& img, const int min_size, const float* threshold, const float factor, const int stage);
//protected:
    vector<FaceInfo> ProposalNet(const cv::Mat& img, int min_size, float threshold, float factor);
    vector<FaceInfo> NextStage(const cv::Mat& image, vector<FaceInfo> &pre_stage_res, int input_w, int input_h, int stage_num, const float threshold);
    void BBoxRegression(vector<FaceInfo>& bboxes);
    void BBoxPadSquare(vector<FaceInfo>& bboxes, int width, int height);
    void BBoxPad(vector<FaceInfo>& bboxes, int width, int height);
    void GenerateBBox(Mat* confidence, Mat* reg_box, float scale, float thresh);
    std::vector<FaceInfo> NMS(std::vector<FaceInfo>& bboxes, float thresh, char methodType);
    float IoU(float xmin, float ymin, float xmax, float ymax, float xmin_, float ymin_, float xmax_, float ymax_, bool is_iom = false);



//    std::shared_ptr<dnn::Net> PNet_;
//    std::shared_ptr<dnn::Net> ONet_;
//    std::shared_ptr<dnn::Net> RNet_;
public:
    dnn::Net PNet_;
    dnn::Net RNet_;
    dnn::Net ONet_;

    std::vector<FaceInfo> candidate_boxes_;
    std::vector<FaceInfo> total_boxes_;
};


MTCNN::MTCNN(const string& proto_model_dir) {
//    PNet_ = cv::dnn::readNetFromCaffe(proto_model_dir + "/det1.prototxt", proto_model_dir + "/det1_half.caffemodel");
    PNet_ = cv::dnn::readNetFromCaffe(proto_model_dir + "/det1_.prototxt", proto_model_dir + "/det1_.caffemodel");

    RNet_ = cv::dnn::readNetFromCaffe(proto_model_dir + "/det2.prototxt", proto_model_dir + "/det2_half.caffemodel");
    ONet_ = cv::dnn::readNetFromCaffe(proto_model_dir + "/det3-half.prototxt", proto_model_dir + "/det3-half.caffemodel");
}

bool CompareBBox(const FaceInfo & a, const FaceInfo & b) {
    return a.bbox.score > b.bbox.score;
}



float MTCNN::IoU(float xmin, float ymin, float xmax, float ymax,
                 float xmin_, float ymin_, float xmax_, float ymax_, bool is_iom) {
    float iw = std::min(xmax, xmax_) - std::max(xmin, xmin_) + 1;
    float ih = std::min(ymax, ymax_) - std::max(ymin, ymin_) + 1;
    if (iw <= 0 || ih <= 0)
        return 0;
    float s = iw*ih;
    if (is_iom) {
        float ov = s / min((xmax - xmin + 1)*(ymax - ymin + 1), (xmax_ - xmin_ + 1)*(ymax_ - ymin_ + 1));
        return ov;
    }
    else {
        float ov = s / ((xmax - xmin + 1)*(ymax - ymin + 1) + (xmax_ - xmin_ + 1)*(ymax_ - ymin_ + 1) - s);
        return ov;
    }
}
void MTCNN::BBoxRegression(vector<FaceInfo>& bboxes) {
//#pragma omp parallel for num_threads(threads_num)
    for (int i = 0; i < bboxes.size(); ++i) {
        FaceBox &bbox = bboxes[i].bbox;
        float *bbox_reg = bboxes[i].bbox_reg;
        float w = bbox.xmax - bbox.xmin + 1;
        float h = bbox.ymax - bbox.ymin + 1;
        bbox.xmin += bbox_reg[0] * w;
        bbox.ymin += bbox_reg[1] * h;
        bbox.xmax += bbox_reg[2] * w;
        bbox.ymax += bbox_reg[3] * h;
    }
}
void MTCNN::BBoxPad(vector<FaceInfo>& bboxes, int width, int height) {
//#pragma omp parallel for num_threads(threads_num)
    for (int i = 0; i < bboxes.size(); ++i) {
        FaceBox &bbox = bboxes[i].bbox;
        bbox.xmin = round(max(bbox.xmin, 0.f));
        bbox.ymin = round(max(bbox.ymin, 0.f));
        bbox.xmax = round(min(bbox.xmax, width - 1.f));
        bbox.ymax = round(min(bbox.ymax, height - 1.f));
    }
}
void MTCNN::BBoxPadSquare(vector<FaceInfo>& bboxes, int width, int height) {
//#pragma omp parallel for num_threads(threads_num)
    for (int i = 0; i < bboxes.size(); ++i) {
        FaceBox &bbox = bboxes[i].bbox;
        float w = bbox.xmax - bbox.xmin + 1;
        float h = bbox.ymax - bbox.ymin + 1;
        float side = h>w ? h : w;
        bbox.xmin = round(max(bbox.xmin + (w - side)*0.5f, 0.f));

        bbox.ymin = round(max(bbox.ymin + (h - side)*0.5f, 0.f));
        bbox.xmax = round(min(bbox.xmin + side - 1, width - 1.f));
        bbox.ymax = round(min(bbox.ymin + side - 1, height - 1.f));
    }
}
void MTCNN::GenerateBBox(Mat* confidence, Mat* reg_box,
                         float scale, float thresh) {
    int feature_map_w_ = confidence->size[3];
    int feature_map_h_ = confidence->size[2];
    int spatical_size = feature_map_w_*feature_map_h_;
//    const float* confidence_data = (float*)(confidence->data + spatical_size);

    std::cout<<confidence->size;
    std::cout<<" "<<scale<<std::endl;


    const float* confidence_data = (float*)(confidence->data);
    confidence_data += spatical_size;

    cv::Mat image(feature_map_h_,feature_map_w_,confidence->type());

    image.data =(unsigned  char*)(confidence_data);
//    cv::imshow("image",image);
//    cv::waitKey(0);




//    std::cout<<confidence_data[0]<<std::endl;

    const float* reg_data = (float*)(reg_box->data);
    candidate_boxes_.clear();
    for (int i = 0; i<spatical_size; i++) {
//        if (confidence_data[i] >= thresh) {
        if (confidence_data[i] <= 1-thresh) {

            int y = i / feature_map_w_;
            int x = i - feature_map_w_ * y;
            FaceInfo faceInfo;
            FaceBox &faceBox = faceInfo.bbox;

            faceBox.xmin = (float)(x * pnet_stride) / scale;
            faceBox.ymin = (float)(y * pnet_stride) / scale;
            faceBox.xmax = (float)(x * pnet_stride + pnet_cell_size - 1.f) / scale;
            faceBox.ymax = (float)(y * pnet_stride + pnet_cell_size - 1.f) / scale;
            faceInfo.bbox_reg[0] = reg_data[i];
            faceInfo.bbox_reg[1] = reg_data[i + spatical_size];
            faceInfo.bbox_reg[2] = reg_data[i + 2 * spatical_size];
            faceInfo.bbox_reg[3] = reg_data[i + 3 * spatical_size];
            faceBox.score = confidence_data[i];
            candidate_boxes_.push_back(faceInfo);
        }
    }
}
std::vector<FaceInfo> MTCNN::NMS(std::vector<FaceInfo>& bboxes,
                                 float thresh, char methodType) {
    std::vector<FaceInfo> bboxes_nms;
    if (bboxes.size() == 0) {
        return bboxes_nms;
    }
    std::sort(bboxes.begin(), bboxes.end(), CompareBBox);

    int32_t select_idx = 0;
    int32_t num_bbox = static_cast<int32_t>(bboxes.size());
    std::vector<int32_t> mask_merged(num_bbox, 0);
    bool all_merged = false;

    while (!all_merged) {
        while (select_idx < num_bbox && mask_merged[select_idx] == 1)
            select_idx++;
        if (select_idx == num_bbox) {
            all_merged = true;
            continue;
        }

        bboxes_nms.push_back(bboxes[select_idx]);
        mask_merged[select_idx] = 1;

        FaceBox select_bbox = bboxes[select_idx].bbox;
        float area1 = static_cast<float>((select_bbox.xmax - select_bbox.xmin + 1) * (select_bbox.ymax - select_bbox.ymin + 1));
        float x1 = static_cast<float>(select_bbox.xmin);
        float y1 = static_cast<float>(select_bbox.ymin);
        float x2 = static_cast<float>(select_bbox.xmax);
        float y2 = static_cast<float>(select_bbox.ymax);

        select_idx++;
//#pragma omp parallel for num_threads(threads_num)
        for (int32_t i = select_idx; i < num_bbox; i++) {
            if (mask_merged[i] == 1)
                continue;

            FaceBox & bbox_i = bboxes[i].bbox;
            float x = std::max<float>(x1, static_cast<float>(bbox_i.xmin));
            float y = std::max<float>(y1, static_cast<float>(bbox_i.ymin));
            float w = std::min<float>(x2, static_cast<float>(bbox_i.xmax)) - x + 1;
            float h = std::min<float>(y2, static_cast<float>(bbox_i.ymax)) - y + 1;
            if (w <= 0 || h <= 0)
                continue;

            float area2 = static_cast<float>((bbox_i.xmax - bbox_i.xmin + 1) * (bbox_i.ymax - bbox_i.ymin + 1));
            float area_intersect = w * h;

            switch (methodType) {
                case 'u':
                    if (static_cast<float>(area_intersect) / (area1 + area2 - area_intersect) > thresh)
                        mask_merged[i] = 1;
                    break;
                case 'm':
                    if (static_cast<float>(area_intersect) / std::min(area1, area2) > thresh)
                        mask_merged[i] = 1;
                    break;
                default:
                    break;
            }
        }
    }
    return bboxes_nms;
}

vector<FaceInfo> MTCNN::NextStage(const cv::Mat& image, vector<FaceInfo> &pre_stage_res, int input_w, int input_h, int stage_num, const float threshold) {
    vector<FaceInfo> res;
    int batch_size = (int)pre_stage_res.size();
    if (batch_size == 0)
        return res;
    Mat* input_layer = nullptr;
    Mat* confidence = nullptr;
    Mat* reg_box = nullptr;
    Mat* reg_landmark = nullptr;

    std::vector< Mat > targets_blobs;



    switch (stage_num) {
        case 2: {
//            input_layer = RNet_->input_blobs()[0];
//            input_layer->Reshape(batch_size, 3, input_h, input_w);
//            RNet_->Reshape();
        }break;
        case 3: {
//            input_layer = ONet_->input_blobs()[0];
//            input_layer->Reshape(batch_size, 3, input_h, input_w);
//            ONet_->Reshape();
        }break;
        default:
            return res;
            break;
    }
//    float * input_data = input_layer->mutable_cpu_data();
    int spatial_size = input_h*input_w;

//#pragma omp parallel for num_threads(threads_num)

    std::vector<cv::Mat> inputs;

    for (int n = 0; n < batch_size; ++n) {
        FaceBox &box = pre_stage_res[n].bbox;
        Mat roi = image(Rect(Point((int)box.xmin, (int)box.ymin), Point((int)box.xmax, (int)box.ymax))).clone();
        resize(roi, roi, Size(input_w, input_h));
        inputs.push_back(roi);
        //resize好的face roi 里面
    }

    //
//    cv::Mat inputBlob = cv::dnn::blobFromImage(resized, std_val,cv::Size(),mean_val);

//    cv::imshow("image",inputs[0]);
//    cv::waitKey(0);


    Mat blob_input = dnn::blobFromImages(inputs, std_val,cv::Size(),cv::Scalar(mean_val,mean_val,mean_val),false);

//    PNet_.setInput(inputBlob, "data");
//    const std::vector< String >  targets_node{"conv4-2","prob1"};
//    std::vector< Mat > targets_blobs;
//    PNet_.forward(targets_blobs,targets_node);

    switch (stage_num) {
        case 2: {
            RNet_.setInput(blob_input, "data");
            const std::vector< String >  targets_node{"conv5-2","prob1"};
            RNet_.forward(targets_blobs,targets_node);
            confidence = &targets_blobs[1];
            reg_box = &targets_blobs[0];

            float* confidence_data = (float*)confidence->data;
        }break;
        case 3: {

            ONet_.setInput(blob_input, "data");
            const std::vector< String >  targets_node{"conv6-2","conv6-3","prob1"};
            ONet_.forward(targets_blobs,targets_node);
            reg_box = &targets_blobs[0];
            reg_landmark = &targets_blobs[1];
            confidence = &targets_blobs[2];

        }break;
    }


    const float* confidence_data = (float*)confidence->data;
//    std::cout<<"confidence_data[0] "<<confidence_data[0]<<std::endl;

    const float* reg_data = (float*)reg_box->data;
    const float* landmark_data = nullptr;
    if (reg_landmark) {
        landmark_data = (float*)reg_landmark->data;
    }
    for (int k = 0; k < batch_size; ++k) {
        if (confidence_data[2 * k + 1] >= threshold) {
            FaceInfo info;
            info.bbox.score = confidence_data[2 * k + 1];
            info.bbox.xmin = pre_stage_res[k].bbox.xmin;
            info.bbox.ymin = pre_stage_res[k].bbox.ymin;
            info.bbox.xmax = pre_stage_res[k].bbox.xmax;
            info.bbox.ymax = pre_stage_res[k].bbox.ymax;
            for (int i = 0; i < 4; ++i) {
                info.bbox_reg[i] = reg_data[4 * k + i];
            }
            if (reg_landmark) {
                float w = info.bbox.xmax - info.bbox.xmin + 1.f;
                float h = info.bbox.ymax - info.bbox.ymin + 1.f;
                for (int i = 0; i < 5; ++i){
                    info.landmark[2 * i] = landmark_data[10 * k + 2 * i] * w + info.bbox.xmin;
                    info.landmark[2 * i + 1] = landmark_data[10 * k + 2 * i + 1] * h + info.bbox.ymin;
                }
            }
            res.push_back(info);
        }
    }
    return res;
}

vector<FaceInfo> MTCNN::ProposalNet(const cv::Mat& img, int minSize, float threshold, float factor) {
    cv::Mat  resized;
    int width = img.cols;
    int height = img.rows;
    float scale = 12.f / minSize;
    float minWH = std::min(height, width) *scale;
    std::vector<float> scales;
    while (minWH >= 12) {
        scales.push_back(scale);
        minWH *= factor;
        scale *= factor;
    }

//    Mat* input_layer = PNet_->input_blobs()[0];
    total_boxes_.clear();
    for (int i = 0; i < scales.size(); i++) {
        int ws = (int)std::ceil(width*scales[i]);
        int hs = (int)std::ceil(height*scales[i]);
        cv::resize(img, resized, cv::Size(ws, hs), 0, 0, cv::INTER_LINEAR);
//
//        input_layer->Reshape(1, 3, hs, ws);
//        PNet_->Reshape();
//
//        float * input_data = input_layer->mutable_cpu_data();
//        cv::Vec3b * img_data = (cv::Vec3b *)resized.data;
//        int spatial_size = ws* hs;
//        for (int k = 0; k < spatial_size; ++k) {
//            input_data[k] = float((img_data[k][0] - mean_val)* std_val);
//            input_data[k + spatial_size] = float((img_data[k][1] - mean_val) * std_val);
//            input_data[k + 2 * spatial_size] = float((img_data[k][2] - mean_val) * std_val);
//        }


        cv::Mat inputBlob = cv::dnn::blobFromImage(resized, 1/255.0,cv::Size(),cv::Scalar(0,0,0),false);

        float* c = (float*)inputBlob.data;
        PNet_.setInput(inputBlob, "data");
        const std::vector< cv::String >  targets_node{"conv4-2","prob1"};
        std::vector< cv::Mat > targets_blobs;
        PNet_.forward(targets_blobs,targets_node);

        cv::Mat prob = targets_blobs[1]
        ;
        cv::Mat reg = targets_blobs[0];
        GenerateBBox(&prob, &reg, scales[i], threshold);
//
        std::vector<FaceInfo> bboxes_nms = NMS(candidate_boxes_, 0.5, 'u');
        if (bboxes_nms.size()>0) {
            total_boxes_.insert(total_boxes_.end(), bboxes_nms.begin(), bboxes_nms.end());
        }
    }
    int num_box = (int)total_boxes_.size();
//    std::cout<<num_box<<std::endl;

    vector<FaceInfo> res_boxes;
    if (num_box != 0) {
        res_boxes = NMS(total_boxes_, 0.7f, 'u');
        BBoxRegression(res_boxes);
        BBoxPadSquare(res_boxes, width, height);
    }
    return res_boxes;
}

vector<FaceInfo> MTCNN::Detect_mtcnn(const cv::Mat& image, const int minSize, const float* threshold, const float factor, const int stage) {
    vector<FaceInfo> pnet_res;
    vector<FaceInfo> rnet_res;
    vector<FaceInfo> onet_res;
    if (stage >= 1){
        pnet_res = ProposalNet(image, minSize, threshold[0], factor);
    }
    if (stage >= 2 && pnet_res.size()>0){
        if (pnet_max_detect_num < (int)pnet_res.size()){
            pnet_res.resize(pnet_max_detect_num);
        }
        int num = (int)pnet_res.size();
        int size = (int)ceil(1.f*num / step_size);
        for (int iter = 0; iter < size; ++iter){
            int start = iter*step_size;
            int end = min(start + step_size, num);
            vector<FaceInfo> input(pnet_res.begin() + start, pnet_res.begin() + end);
            vector<FaceInfo> res = NextStage(image, input, 24, 24, 2, threshold[1]);
            rnet_res.insert(rnet_res.end(), res.begin(), res.end());
        }
        rnet_res = NMS(rnet_res, 0.4f, 'm');
        BBoxRegression(rnet_res);
        BBoxPadSquare(rnet_res, image.cols, image.rows);

    }
    if (stage >= 3 && rnet_res.size()>0){
        int num = (int)rnet_res.size();
        int size = (int)ceil(1.f*num / step_size);
        for (int iter = 0; iter < size; ++iter){
            int start = iter*step_size;
            int end = min(start + step_size, num);
            vector<FaceInfo> input(rnet_res.begin() + start, rnet_res.begin() + end);
            vector<FaceInfo> res = NextStage(image, input, 48, 48, 3, threshold[2]);
            onet_res.insert(onet_res.end(), res.begin(), res.end());
        }
        BBoxRegression(onet_res);
        onet_res = NMS(onet_res, 0.4f, 'm');
        BBoxPad(onet_res, image.cols, image.rows);

    }
    if (stage == 1){
        return pnet_res;
    }
    else if (stage == 2){
        return rnet_res;
    }
    else if (stage == 3){
        return onet_res;
    }
    else{
        return onet_res;
    }
}

int main(int argc, char **argv)
{

    MTCNN detector("model");
    string name_list[1] = {
		"test.jpg",
	};

//	MTCNN detector("./model");
	float factor = 0.709f;
	float threshold[3] = { 0.7f, 0.6f, 0.6f };
	int minSize = 12;
	for (int n = 0; n < 1;++n){
		cv::Mat image = cv::imread(name_list[n], 1);

        for(int i = 0 ; i < 10 ; i ++) {
            double t = (double) cv::getTickCount();
            vector<FaceInfo> faceInfo = detector.Detect_mtcnn(image, minSize, threshold, factor, 3);
            std::cout << name_list[n] << " time," << (double) (cv::getTickCount() - t) / cv::getTickFrequency() << "s"
                      << std::endl;

            for (int i = 0; i < faceInfo.size(); i++) {
                int x = (int) faceInfo[i].bbox.xmin;
                int y = (int) faceInfo[i].bbox.ymin;
                int w = (int) (faceInfo[i].bbox.xmax - faceInfo[i].bbox.xmin + 1);
                int h = (int) (faceInfo[i].bbox.ymax - faceInfo[i].bbox.ymin + 1);
                cv::rectangle(image, cv::Rect(x, y, w, h), cv::Scalar(255, 0, 0), 2);
            }
            cv::imwrite("test.png", image);
            cv::imshow("image", image);
            cv::waitKey(0);

        }


    }
 
	 
	return 1;
}

Download .txt

gitextract_fjo1u_o6/

├── README.md
├── model/
│   ├── det1.prototxt
│   ├── det1_.caffemodel
│   ├── det1_.prototxt
│   ├── det1_half.caffemodel
│   ├── det2.prototxt
│   ├── det2_half.caffemodel
│   ├── det3-half.caffemodel
│   ├── det3-half.prototxt
│   ├── det3.caffemodel
│   └── det3.prototxt
└── mtcnn_opencv.cpp

Download .txt

SYMBOL INDEX (5 symbols across 1 files)

FILE: mtcnn_opencv.cpp
  type FaceBox (line 19) | struct FaceBox {
  type FaceInfo (line 26) | struct FaceInfo {
  class MTCNN (line 35) | class MTCNN {
  function CompareBBox (line 72) | bool CompareBBox(const FaceInfo & a, const FaceInfo & b) {
  function main (line 476) | int main(int argc, char **argv)

Download .json

Condensed preview — 12 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (41K chars).

[
  {
    "path": "README.md",
    "chars": 949,
    "preview": "# Fast-MTCNN\n\nA casual work about retainining mtcnn Pnet and Onet. make it a little bit fast,which achiciving 100fps+ (1"
  },
  {
    "path": "model/det1.prototxt",
    "chars": 2353,
    "preview": "name: \"PNet\"\ninput: \"data\"\ninput_dim: 1\ninput_dim: 3\ninput_dim: 12\ninput_dim: 12\n\nlayer {\n  name: \"conv1\"\n  type: \"Convo"
  },
  {
    "path": "model/det1_.prototxt",
    "chars": 2985,
    "preview": "input: \"data\"\ninput_dim: 1\ninput_dim: 3\ninput_dim: 12\ninput_dim: 12\nlayer {\n  name: \"conv1\"\n  type: \"Convolution\"\n  bott"
  },
  {
    "path": "model/det2.prototxt",
    "chars": 3108,
    "preview": "name: \"RNet\"\ninput: \"data\"\ninput_dim: 1\ninput_dim: 3\ninput_dim: 24\ninput_dim: 24\n\nlayer {\n  name: \"conv1\"\n  type: \"Convo"
  },
  {
    "path": "model/det3-half.prototxt",
    "chars": 4113,
    "preview": "name: \"ONet\"\r\ninput: \"data\"\r\ninput_dim: 1\r\ninput_dim: 3\r\ninput_dim: 48\r\ninput_dim: 48\r\n\r\n###############################"
  },
  {
    "path": "model/det3.prototxt",
    "chars": 4248,
    "preview": "name: \"ONet\"\r\ninput: \"data\"\r\ninput_dim: 1\r\ninput_dim: 3\r\ninput_dim: 48\r\ninput_dim: 48\r\n\r\n###############################"
  },
  {
    "path": "mtcnn_opencv.cpp",
    "chars": 18967,
    "preview": "//Created by Jack Yu\r\n#include <fstream>\r\n#include <iostream>\r\n#include <opencv2/opencv.hpp>\r\n#include <opencv2/dnn.hpp>"
  }
]

// ... and 5 more files (download for full content)

About this extraction

This page contains the full source code of the szad670401/Fast-MTCNN GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 12 files (35.9 KB), approximately 11.8k tokens, and a symbol index with 5 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo