Repository: szad670401/Fast-MTCNN Branch: master Commit: b2b48e8efb82 Files: 12 Total size: 35.9 KB Directory structure: gitextract_fjo1u_o6/ ├── README.md ├── model/ │ ├── det1.prototxt │ ├── det1_.caffemodel │ ├── det1_.prototxt │ ├── det1_half.caffemodel │ ├── det2.prototxt │ ├── det2_half.caffemodel │ ├── det3-half.caffemodel │ ├── det3-half.prototxt │ ├── det3.caffemodel │ └── det3.prototxt └── mtcnn_opencv.cpp ================================================ FILE CONTENTS ================================================ ================================================ FILE: README.md ================================================ # Fast-MTCNN A casual work about retainining mtcnn Pnet and Onet. make it a little bit fast,which achiciving 100fps+ (1920*1080 minSize 60) at intel i7 6700k (st),but the accuracy is not so well. ## Dependencies + OpenCV 3.4.1 only The demo base on [OpenCV](https://github.com/opencv/opencv) DNN module. my computer with Intel i7 6700k (st) can achicive 100fps+ (1920*1080 minSize 60)compiled with OpenBLAS (OpenCV 3.4.1) ,if you wanna achieve the better performance.you can compile with [Intel MKL-DNN Inference Engine package](https://github.com/opencv/opencv/wiki/Intel%27s-Deep-Learning-Inference-Engine-backend) to accelerate. ### Demo Image ![Screen Shot 2018-05-25 at 2.25.02 AM](images/test.png) ### TODO + Optimize PNet Rnet Onet with modern net desigin (bottleneck , depthwise conv ,inverted residual block...) . + Benchmark on FDDB. + Computing sharing to accelerate speed when the detected faces increased. ## Anthor + Jack Yu ================================================ FILE: model/det1.prototxt ================================================ name: "PNet" input: "data" input_dim: 1 input_dim: 3 input_dim: 12 input_dim: 12 layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 10 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "PReLU1" type: "PReLU" bottom: "conv1" top: "conv1" } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "conv2" type: "Convolution" bottom: "pool1" top: "conv2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 16 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "PReLU2" type: "PReLU" bottom: "conv2" top: "conv2" } layer { name: "conv3" type: "Convolution" bottom: "conv2" top: "conv3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 32 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "PReLU3" type: "PReLU" bottom: "conv3" top: "conv3" } layer { name: "conv4-1" type: "Convolution" bottom: "conv3" top: "conv4-1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 2 kernel_size: 1 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "conv4-2" type: "Convolution" bottom: "conv3" top: "conv4-2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 4 kernel_size: 1 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prob1" type: "Softmax" bottom: "conv4-1" top: "prob1" } ================================================ FILE: model/det1_.prototxt ================================================ input: "data" input_dim: 1 input_dim: 3 input_dim: 12 input_dim: 12 layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" convolution_param { num_output: 10 bias_term: true pad_h: 0 pad_w: 0 kernel_h: 3 kernel_w: 3 stride_h: 1 stride_w: 1 } } layer { name: "batch_normalization_5" type: "BatchNorm" bottom: "conv1" top: "batch_normalization_5" batch_norm_param { moving_average_fraction: 0.990000009537 eps: 0.0010000000475 } } layer { name: "batch_normalization_5_scale" type: "Scale" bottom: "batch_normalization_5" top: "batch_normalization_5" scale_param { bias_term: true } } layer { name: "prelu1" type: "PReLU" bottom: "batch_normalization_5" top: "prelu1" } layer { name: "max_pooling2d_3" type: "Pooling" bottom: "prelu1" top: "max_pooling2d_3" pooling_param { pool: MAX kernel_h: 2 kernel_w: 2 stride_h: 2 stride_w: 2 pad_h: 0 pad_w: 0 } } layer { name: "conv2_" type: "Convolution" bottom: "max_pooling2d_3" top: "conv2_" convolution_param { num_output: 14 bias_term: true pad_h: 0 pad_w: 0 kernel_h: 3 kernel_w: 3 stride_h: 1 stride_w: 1 } } layer { name: "batch_normalization_6" type: "BatchNorm" bottom: "conv2_" top: "batch_normalization_6" batch_norm_param { moving_average_fraction: 0.990000009537 eps: 0.0010000000475 } } layer { name: "batch_normalization_6_scale" type: "Scale" bottom: "batch_normalization_6" top: "batch_normalization_6" scale_param { bias_term: true } } layer { name: "prelu2" type: "PReLU" bottom: "batch_normalization_6" top: "prelu2" } layer { name: "conv3" type: "Convolution" bottom: "prelu2" top: "conv3" convolution_param { num_output: 16 bias_term: true pad_h: 0 pad_w: 0 kernel_h: 3 kernel_w: 3 stride_h: 1 stride_w: 1 } } layer { name: "batch_normalization_7" type: "BatchNorm" bottom: "conv3" top: "batch_normalization_7" batch_norm_param { moving_average_fraction: 0.990000009537 eps: 0.0010000000475 } } layer { name: "batch_normalization_7_scale" type: "Scale" bottom: "batch_normalization_7" top: "batch_normalization_7" scale_param { bias_term: true } } layer { name: "prelu3" type: "PReLU" bottom: "batch_normalization_7" top: "prelu3" } layer { name: "classifier1" type: "Convolution" bottom: "prelu3" top: "classifier1" convolution_param { num_output: 2 bias_term: true pad_h: 0 pad_w: 0 kernel_h: 1 kernel_w: 1 stride_h: 1 stride_w: 1 } } layer { name: "prob1" type: "Softmax" bottom: "classifier1" top: "prob1" } layer { name: "conv4-2" type: "Convolution" bottom: "prelu3" top: "conv4-2" convolution_param { num_output: 4 bias_term: true pad_h: 0 pad_w: 0 kernel_h: 1 kernel_w: 1 stride_h: 1 stride_w: 1 } } ================================================ FILE: model/det2.prototxt ================================================ name: "RNet" input: "data" input_dim: 1 input_dim: 3 input_dim: 24 input_dim: 24 layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 28 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu1" type: "PReLU" bottom: "conv1" top: "conv1" propagate_down: true } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "conv2" type: "Convolution" bottom: "pool1" top: "conv2" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 48 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu2" type: "PReLU" bottom: "conv2" top: "conv2" propagate_down: true } layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } #################################### ################################## layer { name: "conv3" type: "Convolution" bottom: "pool2" top: "conv3" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 64 kernel_size: 2 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu3" type: "PReLU" bottom: "conv3" top: "conv3" propagate_down: true } ############################### ############################### layer { name: "conv4" type: "InnerProduct" bottom: "conv3" top: "conv4" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } inner_product_param { num_output: 128 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu4" type: "PReLU" bottom: "conv4" top: "conv4" } layer { name: "conv5-1" type: "InnerProduct" bottom: "conv4" top: "conv5-1" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } inner_product_param { num_output: 2 #kernel_size: 1 #stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "conv5-2" type: "InnerProduct" bottom: "conv4" top: "conv5-2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { num_output: 4 #kernel_size: 1 #stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prob1" type: "Softmax" bottom: "conv5-1" top: "prob1" } ================================================ FILE: model/det3-half.prototxt ================================================ name: "ONet" input: "data" input_dim: 1 input_dim: 3 input_dim: 48 input_dim: 48 ################################## layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 16 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu1" type: "PReLU" bottom: "conv1" top: "conv1" } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "conv2" type: "Convolution" bottom: "pool1" top: "conv2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 32 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu2" type: "PReLU" bottom: "conv2" top: "conv2" } layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "conv3" type: "Convolution" bottom: "pool2" top: "conv3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 32 kernel_size: 3 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu3" type: "PReLU" bottom: "conv3" top: "conv3" } layer { name: "pool3" type: "Pooling" bottom: "conv3" top: "pool3" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "conv4" type: "Convolution" bottom: "pool3" top: "conv4" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 64 kernel_size: 2 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu4" type: "PReLU" bottom: "conv4" top: "conv4" } layer { name: "conv5" type: "InnerProduct" bottom: "conv4" top: "conv5" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { #kernel_size: 3 num_output: 128 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu5" type: "PReLU" # type: "TanH" bottom: "conv5" top: "conv5" } layer { name: "conv6-1" type: "InnerProduct" bottom: "conv5" top: "conv6-1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { #kernel_size: 1 num_output: 2 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "conv6-2" type: "InnerProduct" bottom: "conv5" top: "conv6-2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { #kernel_size: 1 num_output: 4 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "conv6-3" type: "InnerProduct" bottom: "conv5" top: "conv6-3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { #kernel_size: 1 num_output: 10 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prob1" type: "Softmax" bottom: "conv6-1" top: "prob1" } ================================================ FILE: model/det3.prototxt ================================================ name: "ONet" input: "data" input_dim: 1 input_dim: 3 input_dim: 48 input_dim: 48 ################################## layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 32 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu1" type: "PReLU" bottom: "conv1" top: "conv1" } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "conv2" type: "Convolution" bottom: "pool1" top: "conv2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 64 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu2" type: "PReLU" bottom: "conv2" top: "conv2" } layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "conv3" type: "Convolution" bottom: "pool2" top: "conv3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 64 kernel_size: 3 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu3" type: "PReLU" bottom: "conv3" top: "conv3" } layer { name: "pool3" type: "Pooling" bottom: "conv3" top: "pool3" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "conv4" type: "Convolution" bottom: "pool3" top: "conv4" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 128 kernel_size: 2 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu4" type: "PReLU" bottom: "conv4" top: "conv4" } layer { name: "conv5" type: "InnerProduct" bottom: "conv4" top: "conv5" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { #kernel_size: 3 num_output: 256 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "drop5" type: "Dropout" bottom: "conv5" top: "conv5" dropout_param { dropout_ratio: 0.25 } } layer { name: "prelu5" type: "PReLU" # type: "TanH" bottom: "conv5" top: "conv5" } layer { name: "conv6-1" type: "InnerProduct" bottom: "conv5" top: "conv6-1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { #kernel_size: 1 num_output: 2 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "conv6-2" type: "InnerProduct" bottom: "conv5" top: "conv6-2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { #kernel_size: 1 num_output: 4 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "conv6-3" type: "InnerProduct" bottom: "conv5" top: "conv6-3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { #kernel_size: 1 num_output: 10 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prob1" type: "Softmax" bottom: "conv6-1" top: "prob1" } ================================================ FILE: mtcnn_opencv.cpp ================================================ //Created by Jack Yu #include #include #include #include using namespace std; using namespace cv; const float pnet_stride = 2; const float pnet_cell_size = 12; const int pnet_max_detect_num = 5000; //mean & std const float mean_val = 127.5f; const float std_val = 0.0078125f; //minibatch size const int step_size = 128; typedef struct FaceBox { float xmin; float ymin; float xmax; float ymax; float score; } FaceBox; typedef struct FaceInfo { float bbox_reg[4]; float landmark_reg[10]; float landmark[10]; FaceBox bbox; } FaceInfo; class MTCNN { public: MTCNN(const string& proto_model_dir); vector Detect_mtcnn(const cv::Mat& img, const int min_size, const float* threshold, const float factor, const int stage); //protected: vector ProposalNet(const cv::Mat& img, int min_size, float threshold, float factor); vector NextStage(const cv::Mat& image, vector &pre_stage_res, int input_w, int input_h, int stage_num, const float threshold); void BBoxRegression(vector& bboxes); void BBoxPadSquare(vector& bboxes, int width, int height); void BBoxPad(vector& bboxes, int width, int height); void GenerateBBox(Mat* confidence, Mat* reg_box, float scale, float thresh); std::vector NMS(std::vector& bboxes, float thresh, char methodType); float IoU(float xmin, float ymin, float xmax, float ymax, float xmin_, float ymin_, float xmax_, float ymax_, bool is_iom = false); // std::shared_ptr PNet_; // std::shared_ptr ONet_; // std::shared_ptr RNet_; public: dnn::Net PNet_; dnn::Net RNet_; dnn::Net ONet_; std::vector candidate_boxes_; std::vector total_boxes_; }; MTCNN::MTCNN(const string& proto_model_dir) { // PNet_ = cv::dnn::readNetFromCaffe(proto_model_dir + "/det1.prototxt", proto_model_dir + "/det1_half.caffemodel"); PNet_ = cv::dnn::readNetFromCaffe(proto_model_dir + "/det1_.prototxt", proto_model_dir + "/det1_.caffemodel"); RNet_ = cv::dnn::readNetFromCaffe(proto_model_dir + "/det2.prototxt", proto_model_dir + "/det2_half.caffemodel"); ONet_ = cv::dnn::readNetFromCaffe(proto_model_dir + "/det3-half.prototxt", proto_model_dir + "/det3-half.caffemodel"); } bool CompareBBox(const FaceInfo & a, const FaceInfo & b) { return a.bbox.score > b.bbox.score; } float MTCNN::IoU(float xmin, float ymin, float xmax, float ymax, float xmin_, float ymin_, float xmax_, float ymax_, bool is_iom) { float iw = std::min(xmax, xmax_) - std::max(xmin, xmin_) + 1; float ih = std::min(ymax, ymax_) - std::max(ymin, ymin_) + 1; if (iw <= 0 || ih <= 0) return 0; float s = iw*ih; if (is_iom) { float ov = s / min((xmax - xmin + 1)*(ymax - ymin + 1), (xmax_ - xmin_ + 1)*(ymax_ - ymin_ + 1)); return ov; } else { float ov = s / ((xmax - xmin + 1)*(ymax - ymin + 1) + (xmax_ - xmin_ + 1)*(ymax_ - ymin_ + 1) - s); return ov; } } void MTCNN::BBoxRegression(vector& bboxes) { //#pragma omp parallel for num_threads(threads_num) for (int i = 0; i < bboxes.size(); ++i) { FaceBox &bbox = bboxes[i].bbox; float *bbox_reg = bboxes[i].bbox_reg; float w = bbox.xmax - bbox.xmin + 1; float h = bbox.ymax - bbox.ymin + 1; bbox.xmin += bbox_reg[0] * w; bbox.ymin += bbox_reg[1] * h; bbox.xmax += bbox_reg[2] * w; bbox.ymax += bbox_reg[3] * h; } } void MTCNN::BBoxPad(vector& bboxes, int width, int height) { //#pragma omp parallel for num_threads(threads_num) for (int i = 0; i < bboxes.size(); ++i) { FaceBox &bbox = bboxes[i].bbox; bbox.xmin = round(max(bbox.xmin, 0.f)); bbox.ymin = round(max(bbox.ymin, 0.f)); bbox.xmax = round(min(bbox.xmax, width - 1.f)); bbox.ymax = round(min(bbox.ymax, height - 1.f)); } } void MTCNN::BBoxPadSquare(vector& bboxes, int width, int height) { //#pragma omp parallel for num_threads(threads_num) for (int i = 0; i < bboxes.size(); ++i) { FaceBox &bbox = bboxes[i].bbox; float w = bbox.xmax - bbox.xmin + 1; float h = bbox.ymax - bbox.ymin + 1; float side = h>w ? h : w; bbox.xmin = round(max(bbox.xmin + (w - side)*0.5f, 0.f)); bbox.ymin = round(max(bbox.ymin + (h - side)*0.5f, 0.f)); bbox.xmax = round(min(bbox.xmin + side - 1, width - 1.f)); bbox.ymax = round(min(bbox.ymin + side - 1, height - 1.f)); } } void MTCNN::GenerateBBox(Mat* confidence, Mat* reg_box, float scale, float thresh) { int feature_map_w_ = confidence->size[3]; int feature_map_h_ = confidence->size[2]; int spatical_size = feature_map_w_*feature_map_h_; // const float* confidence_data = (float*)(confidence->data + spatical_size); std::cout<size; std::cout<<" "<data); confidence_data += spatical_size; cv::Mat image(feature_map_h_,feature_map_w_,confidence->type()); image.data =(unsigned char*)(confidence_data); // cv::imshow("image",image); // cv::waitKey(0); // std::cout<data); candidate_boxes_.clear(); for (int i = 0; i= thresh) { if (confidence_data[i] <= 1-thresh) { int y = i / feature_map_w_; int x = i - feature_map_w_ * y; FaceInfo faceInfo; FaceBox &faceBox = faceInfo.bbox; faceBox.xmin = (float)(x * pnet_stride) / scale; faceBox.ymin = (float)(y * pnet_stride) / scale; faceBox.xmax = (float)(x * pnet_stride + pnet_cell_size - 1.f) / scale; faceBox.ymax = (float)(y * pnet_stride + pnet_cell_size - 1.f) / scale; faceInfo.bbox_reg[0] = reg_data[i]; faceInfo.bbox_reg[1] = reg_data[i + spatical_size]; faceInfo.bbox_reg[2] = reg_data[i + 2 * spatical_size]; faceInfo.bbox_reg[3] = reg_data[i + 3 * spatical_size]; faceBox.score = confidence_data[i]; candidate_boxes_.push_back(faceInfo); } } } std::vector MTCNN::NMS(std::vector& bboxes, float thresh, char methodType) { std::vector bboxes_nms; if (bboxes.size() == 0) { return bboxes_nms; } std::sort(bboxes.begin(), bboxes.end(), CompareBBox); int32_t select_idx = 0; int32_t num_bbox = static_cast(bboxes.size()); std::vector mask_merged(num_bbox, 0); bool all_merged = false; while (!all_merged) { while (select_idx < num_bbox && mask_merged[select_idx] == 1) select_idx++; if (select_idx == num_bbox) { all_merged = true; continue; } bboxes_nms.push_back(bboxes[select_idx]); mask_merged[select_idx] = 1; FaceBox select_bbox = bboxes[select_idx].bbox; float area1 = static_cast((select_bbox.xmax - select_bbox.xmin + 1) * (select_bbox.ymax - select_bbox.ymin + 1)); float x1 = static_cast(select_bbox.xmin); float y1 = static_cast(select_bbox.ymin); float x2 = static_cast(select_bbox.xmax); float y2 = static_cast(select_bbox.ymax); select_idx++; //#pragma omp parallel for num_threads(threads_num) for (int32_t i = select_idx; i < num_bbox; i++) { if (mask_merged[i] == 1) continue; FaceBox & bbox_i = bboxes[i].bbox; float x = std::max(x1, static_cast(bbox_i.xmin)); float y = std::max(y1, static_cast(bbox_i.ymin)); float w = std::min(x2, static_cast(bbox_i.xmax)) - x + 1; float h = std::min(y2, static_cast(bbox_i.ymax)) - y + 1; if (w <= 0 || h <= 0) continue; float area2 = static_cast((bbox_i.xmax - bbox_i.xmin + 1) * (bbox_i.ymax - bbox_i.ymin + 1)); float area_intersect = w * h; switch (methodType) { case 'u': if (static_cast(area_intersect) / (area1 + area2 - area_intersect) > thresh) mask_merged[i] = 1; break; case 'm': if (static_cast(area_intersect) / std::min(area1, area2) > thresh) mask_merged[i] = 1; break; default: break; } } } return bboxes_nms; } vector MTCNN::NextStage(const cv::Mat& image, vector &pre_stage_res, int input_w, int input_h, int stage_num, const float threshold) { vector res; int batch_size = (int)pre_stage_res.size(); if (batch_size == 0) return res; Mat* input_layer = nullptr; Mat* confidence = nullptr; Mat* reg_box = nullptr; Mat* reg_landmark = nullptr; std::vector< Mat > targets_blobs; switch (stage_num) { case 2: { // input_layer = RNet_->input_blobs()[0]; // input_layer->Reshape(batch_size, 3, input_h, input_w); // RNet_->Reshape(); }break; case 3: { // input_layer = ONet_->input_blobs()[0]; // input_layer->Reshape(batch_size, 3, input_h, input_w); // ONet_->Reshape(); }break; default: return res; break; } // float * input_data = input_layer->mutable_cpu_data(); int spatial_size = input_h*input_w; //#pragma omp parallel for num_threads(threads_num) std::vector inputs; for (int n = 0; n < batch_size; ++n) { FaceBox &box = pre_stage_res[n].bbox; Mat roi = image(Rect(Point((int)box.xmin, (int)box.ymin), Point((int)box.xmax, (int)box.ymax))).clone(); resize(roi, roi, Size(input_w, input_h)); inputs.push_back(roi); //resize好的face roi 里面 } // // cv::Mat inputBlob = cv::dnn::blobFromImage(resized, std_val,cv::Size(),mean_val); // cv::imshow("image",inputs[0]); // cv::waitKey(0); Mat blob_input = dnn::blobFromImages(inputs, std_val,cv::Size(),cv::Scalar(mean_val,mean_val,mean_val),false); // PNet_.setInput(inputBlob, "data"); // const std::vector< String > targets_node{"conv4-2","prob1"}; // std::vector< Mat > targets_blobs; // PNet_.forward(targets_blobs,targets_node); switch (stage_num) { case 2: { RNet_.setInput(blob_input, "data"); const std::vector< String > targets_node{"conv5-2","prob1"}; RNet_.forward(targets_blobs,targets_node); confidence = &targets_blobs[1]; reg_box = &targets_blobs[0]; float* confidence_data = (float*)confidence->data; }break; case 3: { ONet_.setInput(blob_input, "data"); const std::vector< String > targets_node{"conv6-2","conv6-3","prob1"}; ONet_.forward(targets_blobs,targets_node); reg_box = &targets_blobs[0]; reg_landmark = &targets_blobs[1]; confidence = &targets_blobs[2]; }break; } const float* confidence_data = (float*)confidence->data; // std::cout<<"confidence_data[0] "<data; const float* landmark_data = nullptr; if (reg_landmark) { landmark_data = (float*)reg_landmark->data; } for (int k = 0; k < batch_size; ++k) { if (confidence_data[2 * k + 1] >= threshold) { FaceInfo info; info.bbox.score = confidence_data[2 * k + 1]; info.bbox.xmin = pre_stage_res[k].bbox.xmin; info.bbox.ymin = pre_stage_res[k].bbox.ymin; info.bbox.xmax = pre_stage_res[k].bbox.xmax; info.bbox.ymax = pre_stage_res[k].bbox.ymax; for (int i = 0; i < 4; ++i) { info.bbox_reg[i] = reg_data[4 * k + i]; } if (reg_landmark) { float w = info.bbox.xmax - info.bbox.xmin + 1.f; float h = info.bbox.ymax - info.bbox.ymin + 1.f; for (int i = 0; i < 5; ++i){ info.landmark[2 * i] = landmark_data[10 * k + 2 * i] * w + info.bbox.xmin; info.landmark[2 * i + 1] = landmark_data[10 * k + 2 * i + 1] * h + info.bbox.ymin; } } res.push_back(info); } } return res; } vector MTCNN::ProposalNet(const cv::Mat& img, int minSize, float threshold, float factor) { cv::Mat resized; int width = img.cols; int height = img.rows; float scale = 12.f / minSize; float minWH = std::min(height, width) *scale; std::vector scales; while (minWH >= 12) { scales.push_back(scale); minWH *= factor; scale *= factor; } // Mat* input_layer = PNet_->input_blobs()[0]; total_boxes_.clear(); for (int i = 0; i < scales.size(); i++) { int ws = (int)std::ceil(width*scales[i]); int hs = (int)std::ceil(height*scales[i]); cv::resize(img, resized, cv::Size(ws, hs), 0, 0, cv::INTER_LINEAR); // // input_layer->Reshape(1, 3, hs, ws); // PNet_->Reshape(); // // float * input_data = input_layer->mutable_cpu_data(); // cv::Vec3b * img_data = (cv::Vec3b *)resized.data; // int spatial_size = ws* hs; // for (int k = 0; k < spatial_size; ++k) { // input_data[k] = float((img_data[k][0] - mean_val)* std_val); // input_data[k + spatial_size] = float((img_data[k][1] - mean_val) * std_val); // input_data[k + 2 * spatial_size] = float((img_data[k][2] - mean_val) * std_val); // } cv::Mat inputBlob = cv::dnn::blobFromImage(resized, 1/255.0,cv::Size(),cv::Scalar(0,0,0),false); float* c = (float*)inputBlob.data; PNet_.setInput(inputBlob, "data"); const std::vector< cv::String > targets_node{"conv4-2","prob1"}; std::vector< cv::Mat > targets_blobs; PNet_.forward(targets_blobs,targets_node); cv::Mat prob = targets_blobs[1] ; cv::Mat reg = targets_blobs[0]; GenerateBBox(&prob, ®, scales[i], threshold); // std::vector bboxes_nms = NMS(candidate_boxes_, 0.5, 'u'); if (bboxes_nms.size()>0) { total_boxes_.insert(total_boxes_.end(), bboxes_nms.begin(), bboxes_nms.end()); } } int num_box = (int)total_boxes_.size(); // std::cout< res_boxes; if (num_box != 0) { res_boxes = NMS(total_boxes_, 0.7f, 'u'); BBoxRegression(res_boxes); BBoxPadSquare(res_boxes, width, height); } return res_boxes; } vector MTCNN::Detect_mtcnn(const cv::Mat& image, const int minSize, const float* threshold, const float factor, const int stage) { vector pnet_res; vector rnet_res; vector onet_res; if (stage >= 1){ pnet_res = ProposalNet(image, minSize, threshold[0], factor); } if (stage >= 2 && pnet_res.size()>0){ if (pnet_max_detect_num < (int)pnet_res.size()){ pnet_res.resize(pnet_max_detect_num); } int num = (int)pnet_res.size(); int size = (int)ceil(1.f*num / step_size); for (int iter = 0; iter < size; ++iter){ int start = iter*step_size; int end = min(start + step_size, num); vector input(pnet_res.begin() + start, pnet_res.begin() + end); vector res = NextStage(image, input, 24, 24, 2, threshold[1]); rnet_res.insert(rnet_res.end(), res.begin(), res.end()); } rnet_res = NMS(rnet_res, 0.4f, 'm'); BBoxRegression(rnet_res); BBoxPadSquare(rnet_res, image.cols, image.rows); } if (stage >= 3 && rnet_res.size()>0){ int num = (int)rnet_res.size(); int size = (int)ceil(1.f*num / step_size); for (int iter = 0; iter < size; ++iter){ int start = iter*step_size; int end = min(start + step_size, num); vector input(rnet_res.begin() + start, rnet_res.begin() + end); vector res = NextStage(image, input, 48, 48, 3, threshold[2]); onet_res.insert(onet_res.end(), res.begin(), res.end()); } BBoxRegression(onet_res); onet_res = NMS(onet_res, 0.4f, 'm'); BBoxPad(onet_res, image.cols, image.rows); } if (stage == 1){ return pnet_res; } else if (stage == 2){ return rnet_res; } else if (stage == 3){ return onet_res; } else{ return onet_res; } } int main(int argc, char **argv) { MTCNN detector("model"); string name_list[1] = { "test.jpg", }; // MTCNN detector("./model"); float factor = 0.709f; float threshold[3] = { 0.7f, 0.6f, 0.6f }; int minSize = 12; for (int n = 0; n < 1;++n){ cv::Mat image = cv::imread(name_list[n], 1); for(int i = 0 ; i < 10 ; i ++) { double t = (double) cv::getTickCount(); vector faceInfo = detector.Detect_mtcnn(image, minSize, threshold, factor, 3); std::cout << name_list[n] << " time," << (double) (cv::getTickCount() - t) / cv::getTickFrequency() << "s" << std::endl; for (int i = 0; i < faceInfo.size(); i++) { int x = (int) faceInfo[i].bbox.xmin; int y = (int) faceInfo[i].bbox.ymin; int w = (int) (faceInfo[i].bbox.xmax - faceInfo[i].bbox.xmin + 1); int h = (int) (faceInfo[i].bbox.ymax - faceInfo[i].bbox.ymin + 1); cv::rectangle(image, cv::Rect(x, y, w, h), cv::Scalar(255, 0, 0), 2); } cv::imwrite("test.png", image); cv::imshow("image", image); cv::waitKey(0); } } return 1; }