Repository: habrman/FaceRecognition Branch: master Commit: 4c9516b53816 Files: 8 Total size: 33.1 KB Directory structure: gitextract_o4hu3_is/ ├── .gitignore ├── LICENSE ├── README.md ├── det1.npy ├── det2.npy ├── det3.npy ├── detect_and_align.py └── main.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ __pycache__/ .vscode/ ids/ model/ ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2017 habrman Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # FaceRecognition Webcam face recognition using tensorflow and opencv. The application tries to find faces in the webcam image and match them against images in an id folder using deep neural networks. ## Dependencies * OpenCv * Tensorflow * Scikit-learn * easygui ## Inspiration Models, training code and inspriation can be found in the [facenet](https://github.com/davidsandberg/facenet) repository. [Multi-task Cascaded Convolutional Networks](https://kpzhang93.github.io/MTCNN_face_detection_alignment/index.html) are used for facial and landmark detection while an [Inception Resnet](https://arxiv.org/abs/1602.07261) is used for ID classification. A direct link to the pretrained Inception Resnet model can be found [here](https://drive.google.com/file/d/0B5MzpY9kBtDVZ2RpVDYwWmxoSUk). ## How to Get the [model from facenet](https://drive.google.com/file/d/0B5MzpY9kBtDVZ2RpVDYwWmxoSUk) and setup your id folder. The id folder should contain subfolders, each containing at least one image of one person. The subfolders should be named after the person in the folder since this name is used as output when a match is found. E.g. id folder named `ids` containing subfolders `Adam` and `Eve`, each containing images of the respective person. ```bash ├── ids │ ├── Adam │ │ ├── Adam0.png │ │ ├── Adam1.png │ ├── Eve │ │ ├── Eve0.png ``` Download and unpack the [model](https://drive.google.com/file/d/0B5MzpY9kBtDVZ2RpVDYwWmxoSUk) to a folder and run `python3 main.py ./folder/model.pb ./ids/` to start the program. Make sure to replace `./folder/model.pb` with the path to the downloaded model. Visualization hotkeys: * l - toggle facial landmarks * b - toggle bounding box * i - toggle id * f - toggle frames per second * s - save image face detections to id folder ![alt text](https://github.com/habrman/FaceRecognition/blob/master/example.png) ================================================ FILE: detect_and_align.py ================================================ from six import string_types, iteritems import tensorflow as tf import numpy as np import cv2 import os def detect_faces(img, mtcnn): margin = 44 image_size = 160 img_size = np.asarray(img.shape)[0:2] bounding_boxes, landmarks = detect_face(img, mtcnn["pnet"], mtcnn["rnet"], mtcnn["onet"]) nrof_bb = bounding_boxes.shape[0] padded_bounding_boxes = [] face_patches = [] if nrof_bb > 0: landmarks = np.stack(landmarks) landmarks = np.transpose(landmarks, (1, 0)) for i in range(nrof_bb): det = np.squeeze(bounding_boxes[i, 0:4]) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1] : bb[3], bb[0] : bb[2], :] aligned = cv2.resize(cropped, (image_size, image_size)) prewhitened = prewhiten(aligned) padded_bounding_boxes.append(bb) face_patches.append(prewhitened) return face_patches, padded_bounding_boxes, landmarks def prewhiten(x): mean = np.mean(x) std = np.std(x) std_adj = np.maximum(std, 1.0 / np.sqrt(x.size)) y = np.multiply(np.subtract(x, mean), 1 / std_adj) return y def imresample(img, sz): im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) return im_data def generateBoundingBox(imap, reg, scale, t): # use heatmap to generate bounding boxes stride = 2 cellsize = 12 imap = np.transpose(imap) dx1 = np.transpose(reg[:, :, 0]) dy1 = np.transpose(reg[:, :, 1]) dx2 = np.transpose(reg[:, :, 2]) dy2 = np.transpose(reg[:, :, 3]) y, x = np.where(imap >= t) if y.shape[0] == 1: dx1 = np.flipud(dx1) dy1 = np.flipud(dy1) dx2 = np.flipud(dx2) dy2 = np.flipud(dy2) score = imap[(y, x)] reg = np.transpose(np.vstack([dx1[(y, x)], dy1[(y, x)], dx2[(y, x)], dy2[(y, x)]])) if reg.size == 0: reg = np.empty((0, 3)) bb = np.transpose(np.vstack([y, x])) q1 = np.fix((stride * bb + 1) / scale) q2 = np.fix((stride * bb + cellsize - 1 + 1) / scale) boundingbox = np.hstack([q1, q2, np.expand_dims(score, 1), reg]) return boundingbox, reg def nms(boxes, threshold, method): if boxes.size == 0: return np.empty((0, 3)) x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2] y2 = boxes[:, 3] s = boxes[:, 4] area = (x2 - x1 + 1) * (y2 - y1 + 1) I = np.argsort(s) pick = np.zeros_like(s, dtype=np.int16) counter = 0 while I.size > 0: i = I[-1] pick[counter] = i counter += 1 idx = I[0:-1] xx1 = np.maximum(x1[i], x1[idx]) yy1 = np.maximum(y1[i], y1[idx]) xx2 = np.minimum(x2[i], x2[idx]) yy2 = np.minimum(y2[i], y2[idx]) w = np.maximum(0.0, xx2 - xx1 + 1) h = np.maximum(0.0, yy2 - yy1 + 1) inter = w * h if method is "Min": o = inter / np.minimum(area[i], area[idx]) else: o = inter / (area[i] + area[idx] - inter) I = I[np.where(o <= threshold)] pick = pick[0:counter] return pick def rerec(bboxA): # convert bboxA to square h = bboxA[:, 3] - bboxA[:, 1] w = bboxA[:, 2] - bboxA[:, 0] l = np.maximum(w, h) bboxA[:, 0] = bboxA[:, 0] + w * 0.5 - l * 0.5 bboxA[:, 1] = bboxA[:, 1] + h * 0.5 - l * 0.5 bboxA[:, 2:4] = bboxA[:, 0:2] + np.transpose(np.tile(l, (2, 1))) return bboxA def pad(total_boxes, w, h): # compute the padding coordinates (pad the bounding boxes to square) tmpw = (total_boxes[:, 2] - total_boxes[:, 0] + 1).astype(np.int32) tmph = (total_boxes[:, 3] - total_boxes[:, 1] + 1).astype(np.int32) numbox = total_boxes.shape[0] dx = np.ones((numbox), dtype=np.int32) dy = np.ones((numbox), dtype=np.int32) edx = tmpw.copy().astype(np.int32) edy = tmph.copy().astype(np.int32) x = total_boxes[:, 0].copy().astype(np.int32) y = total_boxes[:, 1].copy().astype(np.int32) ex = total_boxes[:, 2].copy().astype(np.int32) ey = total_boxes[:, 3].copy().astype(np.int32) tmp = np.where(ex > w) edx.flat[tmp] = np.expand_dims(-ex[tmp] + w + tmpw[tmp], 1) ex[tmp] = w tmp = np.where(ey > h) edy.flat[tmp] = np.expand_dims(-ey[tmp] + h + tmph[tmp], 1) ey[tmp] = h tmp = np.where(x < 1) dx.flat[tmp] = np.expand_dims(2 - x[tmp], 1) x[tmp] = 1 tmp = np.where(y < 1) dy.flat[tmp] = np.expand_dims(2 - y[tmp], 1) y[tmp] = 1 return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph def bbreg(boundingbox, reg): # calibrate bounding boxes if reg.shape[1] == 1: reg = np.reshape(reg, (reg.shape[2], reg.shape[3])) w = boundingbox[:, 2] - boundingbox[:, 0] + 1 h = boundingbox[:, 3] - boundingbox[:, 1] + 1 b1 = boundingbox[:, 0] + reg[:, 0] * w b2 = boundingbox[:, 1] + reg[:, 1] * h b3 = boundingbox[:, 2] + reg[:, 2] * w b4 = boundingbox[:, 3] + reg[:, 3] * h boundingbox[:, 0:4] = np.transpose(np.vstack([b1, b2, b3, b4])) return boundingbox def layer(op): def layer_decorated(self, *args, **kwargs): # Automatically set a name if not provided. name = kwargs.setdefault("name", self.get_unique_name(op.__name__)) # Figure out the layer inputs. if len(self.terminals) == 0: raise RuntimeError("No input variables found for layer %s." % name) elif len(self.terminals) == 1: layer_input = self.terminals[0] else: layer_input = list(self.terminals) # Perform the operation and get the output. layer_output = op(self, layer_input, *args, **kwargs) # Add to layer LUT. self.layers[name] = layer_output # This output is now the input for the next layer. self.feed(layer_output) # Return self for chained calls. return self return layer_decorated class Network(object): def __init__(self, inputs, trainable=True): # The input nodes for this network self.inputs = inputs # The current list of terminal nodes self.terminals = [] # Mapping from layer names to layers self.layers = dict(inputs) # If true, the resulting variables are set as trainable self.trainable = trainable self.setup() def setup(self): """Construct the network. """ raise NotImplementedError("Must be implemented by the subclass.") def load(self, data_path, session, ignore_missing=False): """Load network weights. data_path: The path to the numpy-serialized network weights session: The current TensorFlow session ignore_missing: If true, serialized weights for missing layers are ignored. """ data_dict = np.load(data_path, encoding="latin1", allow_pickle=True).item() for op_name in data_dict: with tf.variable_scope(op_name, reuse=True): for param_name, data in iteritems(data_dict[op_name]): try: var = tf.get_variable(param_name) session.run(var.assign(data)) except ValueError: if not ignore_missing: raise def feed(self, *args): """Set the input(s) for the next operation by replacing the terminal nodes. The arguments can be either layer names or the actual layers. """ assert len(args) != 0 self.terminals = [] for fed_layer in args: if isinstance(fed_layer, string_types): try: fed_layer = self.layers[fed_layer] except KeyError: raise KeyError("Unknown layer name fed: %s" % fed_layer) self.terminals.append(fed_layer) return self def get_output(self): """Returns the current network output.""" return self.terminals[-1] def get_unique_name(self, prefix): """Returns an index-suffixed unique name for the given prefix. This is used for auto-generating layer names based on the type-prefix. """ ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 return "%s_%d" % (prefix, ident) def make_var(self, name, shape): """Creates a new TensorFlow variable.""" return tf.get_variable(name, shape, trainable=self.trainable) def validate_padding(self, padding): """Verifies that the padding is one of the supported ones.""" assert padding in ("SAME", "VALID") @layer def conv(self, inp, k_h, k_w, c_o, s_h, s_w, name, relu=True, padding="SAME", group=1, biased=True): # Verify that the padding is acceptable self.validate_padding(padding) # Get the number of channels in the input c_i = int(inp.get_shape()[-1]) # Verify that the grouping parameter is valid assert c_i % group == 0 assert c_o % group == 0 # Convolution for a given input and kernel def convolve(i, k): return tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) with tf.variable_scope(name) as scope: kernel = self.make_var("weights", shape=[k_h, k_w, c_i // group, c_o]) # This is the common-case. Convolve the input without any further complications. output = convolve(inp, kernel) # Add the biases if biased: biases = self.make_var("biases", [c_o]) output = tf.nn.bias_add(output, biases) if relu: # ReLU non-linearity output = tf.nn.relu(output, name=scope.name) return output @layer def prelu(self, inp, name): with tf.variable_scope(name): i = int(inp.get_shape()[-1]) alpha = self.make_var("alpha", shape=(i,)) output = tf.nn.relu(inp) + tf.multiply(alpha, -tf.nn.relu(-inp)) return output @layer def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding="SAME"): self.validate_padding(padding) return tf.nn.max_pool(inp, ksize=[1, k_h, k_w, 1], strides=[1, s_h, s_w, 1], padding=padding, name=name) @layer def fc(self, inp, num_out, name, relu=True): with tf.variable_scope(name): input_shape = inp.get_shape() if input_shape.ndims == 4: # The input is spatial. Vectorize it first. dim = 1 for d in input_shape[1:].as_list(): dim *= int(d) feed_in = tf.reshape(inp, [-1, dim]) else: feed_in, dim = (inp, input_shape[-1].value) weights = self.make_var("weights", shape=[dim, num_out]) biases = self.make_var("biases", [num_out]) op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b fc = op(feed_in, weights, biases, name=name) return fc @layer def softmax(self, target, axis, name=None): max_axis = tf.reduce_max(target, axis, keep_dims=True) target_exp = tf.exp(target - max_axis) normalize = tf.reduce_sum(target_exp, axis, keep_dims=True) softmax = tf.div(target_exp, normalize, name) return softmax class PNet(Network): def setup(self): ( self.feed("data") .conv(3, 3, 10, 1, 1, padding="VALID", relu=False, name="conv1") .prelu(name="PReLU1") .max_pool(2, 2, 2, 2, name="pool1") .conv(3, 3, 16, 1, 1, padding="VALID", relu=False, name="conv2") .prelu(name="PReLU2") .conv(3, 3, 32, 1, 1, padding="VALID", relu=False, name="conv3") .prelu(name="PReLU3") .conv(1, 1, 2, 1, 1, relu=False, name="conv4-1") .softmax(3, name="prob1") ) (self.feed("PReLU3").conv(1, 1, 4, 1, 1, relu=False, name="conv4-2")) class RNet(Network): def setup(self): ( self.feed("data") .conv(3, 3, 28, 1, 1, padding="VALID", relu=False, name="conv1") .prelu(name="prelu1") .max_pool(3, 3, 2, 2, name="pool1") .conv(3, 3, 48, 1, 1, padding="VALID", relu=False, name="conv2") .prelu(name="prelu2") .max_pool(3, 3, 2, 2, padding="VALID", name="pool2") .conv(2, 2, 64, 1, 1, padding="VALID", relu=False, name="conv3") .prelu(name="prelu3") .fc(128, relu=False, name="conv4") .prelu(name="prelu4") .fc(2, relu=False, name="conv5-1") .softmax(1, name="prob1") ) (self.feed("prelu4").fc(4, relu=False, name="conv5-2")) class ONet(Network): def setup(self): ( self.feed("data") .conv(3, 3, 32, 1, 1, padding="VALID", relu=False, name="conv1") .prelu(name="prelu1") .max_pool(3, 3, 2, 2, name="pool1") .conv(3, 3, 64, 1, 1, padding="VALID", relu=False, name="conv2") .prelu(name="prelu2") .max_pool(3, 3, 2, 2, padding="VALID", name="pool2") .conv(3, 3, 64, 1, 1, padding="VALID", relu=False, name="conv3") .prelu(name="prelu3") .max_pool(2, 2, 2, 2, name="pool3") .conv(2, 2, 128, 1, 1, padding="VALID", relu=False, name="conv4") .prelu(name="prelu4") .fc(256, relu=False, name="conv5") .prelu(name="prelu5") .fc(2, relu=False, name="conv6-1") .softmax(1, name="prob1") ) (self.feed("prelu5").fc(4, relu=False, name="conv6-2")) (self.feed("prelu5").fc(10, relu=False, name="conv6-3")) def create_mtcnn(sess, model_path): if not model_path: model_path, _ = os.path.split(os.path.realpath(__file__)) with tf.variable_scope("pnet"): data = tf.placeholder(tf.float32, (None, None, None, 3), "input") pnet = PNet({"data": data}) pnet.load(os.path.join(model_path, "det1.npy"), sess) with tf.variable_scope("rnet"): data = tf.placeholder(tf.float32, (None, 24, 24, 3), "input") rnet = RNet({"data": data}) rnet.load(os.path.join(model_path, "det2.npy"), sess) with tf.variable_scope("onet"): data = tf.placeholder(tf.float32, (None, 48, 48, 3), "input") onet = ONet({"data": data}) onet.load(os.path.join(model_path, "det3.npy"), sess) def pnet_fun(img): return sess.run(("pnet/conv4-2/BiasAdd:0", "pnet/prob1:0"), feed_dict={"pnet/input:0": img}) def rnet_fun(img): return sess.run(("rnet/conv5-2/conv5-2:0", "rnet/prob1:0"), feed_dict={"rnet/input:0": img}) def onet_fun(img): return sess.run( ("onet/conv6-2/conv6-2:0", "onet/conv6-3/conv6-3:0", "onet/prob1:0"), feed_dict={"onet/input:0": img} ) return {"pnet": pnet_fun, "rnet": rnet_fun, "onet": onet_fun} def detect_face(img, pnet, rnet, onet): minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor factor_count = 0 total_boxes = np.empty((0, 9)) points = [] h = img.shape[0] w = img.shape[1] minl = np.amin([h, w]) m = 12.0 / minsize minl = minl * m # creat scale pyramid scales = [] while minl >= 12: scales += [m * np.power(factor, factor_count)] minl = minl * factor factor_count += 1 # first stage for j in range(len(scales)): scale = scales[j] hs = int(np.ceil(h * scale)) ws = int(np.ceil(w * scale)) im_data = imresample(img, (hs, ws)) im_data = (im_data - 127.5) * 0.0078125 img_x = np.expand_dims(im_data, 0) img_y = np.transpose(img_x, (0, 2, 1, 3)) out = pnet(img_y) out0 = np.transpose(out[0], (0, 2, 1, 3)) out1 = np.transpose(out[1], (0, 2, 1, 3)) boxes, _ = generateBoundingBox(out1[0, :, :, 1].copy(), out0[0, :, :, :].copy(), scale, threshold[0]) # inter-scale nms pick = nms(boxes.copy(), 0.5, "Union") if boxes.size > 0 and pick.size > 0: boxes = boxes[pick, :] total_boxes = np.append(total_boxes, boxes, axis=0) numbox = total_boxes.shape[0] if numbox > 0: pick = nms(total_boxes.copy(), 0.7, "Union") total_boxes = total_boxes[pick, :] regw = total_boxes[:, 2] - total_boxes[:, 0] regh = total_boxes[:, 3] - total_boxes[:, 1] qq1 = total_boxes[:, 0] + total_boxes[:, 5] * regw qq2 = total_boxes[:, 1] + total_boxes[:, 6] * regh qq3 = total_boxes[:, 2] + total_boxes[:, 7] * regw qq4 = total_boxes[:, 3] + total_boxes[:, 8] * regh total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:, 4]])) total_boxes = rerec(total_boxes.copy()) total_boxes[:, 0:4] = np.fix(total_boxes[:, 0:4]).astype(np.int32) dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) numbox = total_boxes.shape[0] if numbox > 0: # second stage tempimg = np.zeros((24, 24, 3, numbox)) for k in range(0, numbox): tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3)) tmp[dy[k] - 1 : edy[k], dx[k] - 1 : edx[k], :] = img[y[k] - 1 : ey[k], x[k] - 1 : ex[k], :] if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0: tempimg[:, :, :, k] = imresample(tmp, (24, 24)) else: return np.empty() tempimg = (tempimg - 127.5) * 0.0078125 tempimg1 = np.transpose(tempimg, (3, 1, 0, 2)) out = rnet(tempimg1) out0 = np.transpose(out[0]) out1 = np.transpose(out[1]) score = out1[1, :] ipass = np.where(score > threshold[1]) total_boxes = np.hstack([total_boxes[ipass[0], 0:4].copy(), np.expand_dims(score[ipass].copy(), 1)]) mv = out0[:, ipass[0]] if total_boxes.shape[0] > 0: pick = nms(total_boxes, 0.7, "Union") total_boxes = total_boxes[pick, :] total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:, pick])) total_boxes = rerec(total_boxes.copy()) numbox = total_boxes.shape[0] if numbox > 0: # third stage total_boxes = np.fix(total_boxes).astype(np.int32) dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) tempimg = np.zeros((48, 48, 3, numbox)) for k in range(0, numbox): tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3)) tmp[dy[k] - 1 : edy[k], dx[k] - 1 : edx[k], :] = img[y[k] - 1 : ey[k], x[k] - 1 : ex[k], :] if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0: tempimg[:, :, :, k] = imresample(tmp, (48, 48)) else: return np.empty() tempimg = (tempimg - 127.5) * 0.0078125 tempimg1 = np.transpose(tempimg, (3, 1, 0, 2)) out = onet(tempimg1) out0 = np.transpose(out[0]) out1 = np.transpose(out[1]) out2 = np.transpose(out[2]) score = out2[1, :] points = out1 ipass = np.where(score > threshold[2]) points = points[:, ipass[0]] total_boxes = np.hstack([total_boxes[ipass[0], 0:4].copy(), np.expand_dims(score[ipass].copy(), 1)]) mv = out0[:, ipass[0]] w = total_boxes[:, 2] - total_boxes[:, 0] + 1 h = total_boxes[:, 3] - total_boxes[:, 1] + 1 points[0:5, :] = np.tile(w, (5, 1)) * points[0:5, :] + np.tile(total_boxes[:, 0], (5, 1)) - 1 points[5:10, :] = np.tile(h, (5, 1)) * points[5:10, :] + np.tile(total_boxes[:, 1], (5, 1)) - 1 if total_boxes.shape[0] > 0: total_boxes = bbreg(total_boxes.copy(), np.transpose(mv)) pick = nms(total_boxes.copy(), 0.7, "Min") total_boxes = total_boxes[pick, :] points = points[:, pick] return total_boxes, points ================================================ FILE: main.py ================================================ from sklearn.metrics.pairwise import pairwise_distances from tensorflow.python.platform import gfile import tensorflow as tf import numpy as np import detect_and_align import argparse import easygui import time import cv2 import os class IdData: """Keeps track of known identities and calculates id matches""" def __init__( self, id_folder, mtcnn, sess, embeddings, images_placeholder, phase_train_placeholder, distance_treshold ): print("Loading known identities: ", end="") self.distance_treshold = distance_treshold self.id_folder = id_folder self.mtcnn = mtcnn self.id_names = [] self.embeddings = None image_paths = [] os.makedirs(id_folder, exist_ok=True) ids = os.listdir(os.path.expanduser(id_folder)) if not ids: return for id_name in ids: id_dir = os.path.join(id_folder, id_name) image_paths = image_paths + [os.path.join(id_dir, img) for img in os.listdir(id_dir)] print("Found %d images in id folder" % len(image_paths)) aligned_images, id_image_paths = self.detect_id_faces(image_paths) feed_dict = {images_placeholder: aligned_images, phase_train_placeholder: False} self.embeddings = sess.run(embeddings, feed_dict=feed_dict) if len(id_image_paths) < 5: self.print_distance_table(id_image_paths) def add_id(self, embedding, new_id, face_patch): if self.embeddings is None: self.embeddings = np.atleast_2d(embedding) else: self.embeddings = np.vstack([self.embeddings, embedding]) self.id_names.append(new_id) id_folder = os.path.join(self.id_folder, new_id) os.makedirs(id_folder, exist_ok=True) filenames = [s.split(".")[0] for s in os.listdir(id_folder)] numbered_filenames = [int(f) for f in filenames if f.isdigit()] img_number = max(numbered_filenames) + 1 if numbered_filenames else 0 cv2.imwrite(os.path.join(id_folder, f"{img_number}.jpg"), face_patch) def detect_id_faces(self, image_paths): aligned_images = [] id_image_paths = [] for image_path in image_paths: image = cv2.imread(os.path.expanduser(image_path), cv2.IMREAD_COLOR) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) face_patches, _, _ = detect_and_align.detect_faces(image, self.mtcnn) if len(face_patches) > 1: print( "Warning: Found multiple faces in id image: %s" % image_path + "\nMake sure to only have one face in the id images. " + "If that's the case then it's a false positive detection and" + " you can solve it by increasing the thresolds of the cascade network" ) aligned_images = aligned_images + face_patches id_image_paths += [image_path] * len(face_patches) path = os.path.dirname(image_path) self.id_names += [os.path.basename(path)] * len(face_patches) return np.stack(aligned_images), id_image_paths def print_distance_table(self, id_image_paths): """Prints distances between id embeddings""" distance_matrix = pairwise_distances(self.embeddings, self.embeddings) image_names = [path.split("/")[-1] for path in id_image_paths] print("Distance matrix:\n{:20}".format(""), end="") [print("{:20}".format(name), end="") for name in image_names] for path, distance_row in zip(image_names, distance_matrix): print("\n{:20}".format(path), end="") for distance in distance_row: print("{:20}".format("%0.3f" % distance), end="") print() def find_matching_ids(self, embs): if self.id_names: matching_ids = [] matching_distances = [] distance_matrix = pairwise_distances(embs, self.embeddings) for distance_row in distance_matrix: min_index = np.argmin(distance_row) if distance_row[min_index] < self.distance_treshold: matching_ids.append(self.id_names[min_index]) matching_distances.append(distance_row[min_index]) else: matching_ids.append(None) matching_distances.append(None) else: matching_ids = [None] * len(embs) matching_distances = [np.inf] * len(embs) return matching_ids, matching_distances def load_model(model): model_exp = os.path.expanduser(model) if os.path.isfile(model_exp): print("Loading model filename: %s" % model_exp) with gfile.FastGFile(model_exp, "rb") as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) tf.import_graph_def(graph_def, name="") else: raise ValueError("Specify model file, not directory!") def main(args): with tf.Graph().as_default(): with tf.Session() as sess: # Setup models mtcnn = detect_and_align.create_mtcnn(sess, None) load_model(args.model) images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") # Load anchor IDs id_data = IdData( args.id_folder[0], mtcnn, sess, embeddings, images_placeholder, phase_train_placeholder, args.threshold ) cap = cv2.VideoCapture(0) frame_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) show_landmarks = False show_bb = False show_id = True show_fps = False frame_detections = None while True: start = time.time() _, frame = cap.read() # Locate faces and landmarks in frame face_patches, padded_bounding_boxes, landmarks = detect_and_align.detect_faces(frame, mtcnn) if len(face_patches) > 0: face_patches = np.stack(face_patches) feed_dict = {images_placeholder: face_patches, phase_train_placeholder: False} embs = sess.run(embeddings, feed_dict=feed_dict) matching_ids, matching_distances = id_data.find_matching_ids(embs) frame_detections = {"embs": embs, "bbs": padded_bounding_boxes, "frame": frame.copy()} print("Matches in frame:") for bb, landmark, matching_id, dist in zip( padded_bounding_boxes, landmarks, matching_ids, matching_distances ): if matching_id is None: matching_id = "Unknown" print("Unknown! Couldn't fint match.") else: print("Hi %s! Distance: %1.4f" % (matching_id, dist)) if show_id: font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(frame, matching_id, (bb[0], bb[3]), font, 1, (255, 255, 255), 1, cv2.LINE_AA) if show_bb: cv2.rectangle(frame, (bb[0], bb[1]), (bb[2], bb[3]), (255, 0, 0), 2) if show_landmarks: for j in range(5): size = 1 top_left = (int(landmark[j]) - size, int(landmark[j + 5]) - size) bottom_right = (int(landmark[j]) + size, int(landmark[j + 5]) + size) cv2.rectangle(frame, top_left, bottom_right, (255, 0, 255), 2) else: print("Couldn't find a face") end = time.time() seconds = end - start fps = round(1 / seconds, 2) if show_fps: font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(frame, str(fps), (0, int(frame_height) - 5), font, 1, (255, 255, 255), 1, cv2.LINE_AA) cv2.imshow("frame", frame) key = cv2.waitKey(1) if key == ord("q"): break elif key == ord("l"): show_landmarks = not show_landmarks elif key == ord("b"): show_bb = not show_bb elif key == ord("i"): show_id = not show_id elif key == ord("f"): show_fps = not show_fps elif key == ord("s") and frame_detections is not None: for emb, bb in zip(frame_detections["embs"], frame_detections["bbs"]): patch = frame_detections["frame"][bb[1] : bb[3], bb[0] : bb[2], :] cv2.imshow("frame", patch) cv2.waitKey(1) new_id = easygui.enterbox("Who's in the image? Leave empty for non-valid") if len(new_id) > 0: id_data.add_id(emb, new_id, patch) cap.release() cv2.destroyAllWindows() if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("model", type=str, help="Path to model protobuf (.pb) file") parser.add_argument("id_folder", type=str, nargs="+", help="Folder containing ID folders") parser.add_argument("-t", "--threshold", type=float, help="Distance threshold defining an id match", default=1.0) main(parser.parse_args())