Repository: papermsucode/advhat Branch: master Commit: b34289475e24 Files: 23 Total size: 244.8 KB Directory structure: gitextract_3cazs35n/ ├── Attack/ │ ├── README.md │ ├── attack.py │ ├── cos_mx.py │ ├── cos_tf.py │ ├── face_preparation.py │ ├── stn.py │ └── utils.py ├── Demo/ │ ├── 1000_from_CASIA/ │ │ ├── centroids.npy │ │ └── centroids_names.txt │ ├── README.md │ ├── align/ │ │ ├── .gitignore │ │ ├── __init__.py │ │ ├── det1.npy │ │ ├── det2.npy │ │ ├── det3.npy │ │ └── detect_face.py │ ├── alignment.py │ ├── demo.py │ └── dumping.py ├── LICENSE ├── README.md └── Utils/ ├── MXtoTF.ipynb └── README.md ================================================ FILE CONTENTS ================================================ ================================================ FILE: Attack/README.md ================================================ ## Preparing an attack 1. First, make a full-face photo of attacked person, a full-face photo in a hat, and a full-face photo in a hat with an example sticker on the hat. To be sure that you use a sticker with the correct size follow instructions: 1. Download an example.png. 2. Open downloaded image with standard Windows print util. 3. Choose the regime with 4 photos per page (9 x 13 sm). 4. Uncheck the box "Fit picture to frame". 5. Print page with an example sticker. 6. Cut out the sticker and put in on the hat. 2. Use the next command to prepare photos: `python3 face_preparation.py PATH_TO_THE_IMAGE` 3. You need to find parameters for the sticker position initialization. Use the next command to find these parameters: `python3 face_preparation.py PATH_TO_THE_IMAGE_WITH_HAT_ONLY --mask` It will show sticker placement with default parameters. Change parameters until the image looks like a prepared image with the sticker. You can see the parameters using `--help` flag. 4. Download TensorFlow ArcFace model [here](https://drive.google.com/file/d/1fb70KgMRSmaEUF5cJ67BCD_DmTPCR5uJ/view?usp=sharing). 5. Launch an attack preparation: `python3 attack.py PATH_TO_THE_PREPARED_IMAGED_WITH_HAT PATH_TO_THE_TF_MODEL --anchor_face PATH_TO_THE_PREPARED_IMAGE_WITHOUT_HAT (sticker position parameters in the same format from the third step)` 6. Print the obtained sticker, put it on the hat as before, and make a new photo with the sticker. 7. Use "face_preparation.py" again to prepare a new photo and "cos_tf.py" to calculate a new similarity. `python3 cos_tf.py PATH_TO_THE_PREPARED_IMAGE_WITHOUT_HAT PATH_TO_THE_PREPARED_IMAGE_WITH_HAT_ONLY` - baseline similarity `python3 cos_tf.py PATH_TO_THE_PREPARED_IMAGE_WITHOUT_HAT PATH_TO_THE_PREPARED_IMAGE_WITH_THE_NEW_STICKER` - final similarity ### Notes Note that our printer has good color rendering, that is why NPS-loss does not make influence in our experiments. You may need to add NPS-loss for your printer. ================================================ FILE: Attack/attack.py ================================================ import argparse import sys import os import tensorflow as tf import numpy as np import skimage.io as io from skimage.transform import rescale from tqdm import tqdm from stn import spatial_transformer_network as stn from utils import TVloss, projector from sklearn.linear_model import LinearRegression as LR from time import time import datetime import matplotlib.pyplot as plt # Prepare image to network input format def prep(im): if len(im.shape)==3: return np.transpose(im,[2,0,1]).reshape((1,3,112,112))*2-1 elif len(im.shape)==4: return np.transpose(im,[0,3,1,2]).reshape((im.shape[0],3,112,112))*2-1 def main(args): print(args) now = str(datetime.datetime.now()) sess = tf.Session() # Off-plane sticker projection logo = tf.placeholder(tf.float32,shape=[None,400,900,3],name='logo_input') param = tf.placeholder(tf.float32,shape=[None,1],name='param_input') ph = tf.placeholder(tf.float32,shape=[None,1],name='ph_input') result = projector(param,ph,logo) # Union of the sticker and face image mask_input = tf.placeholder(tf.float32,shape=[None,900,900,3],name='mask_input') face_input = tf.placeholder(tf.float32,shape=[None,600,600,3],name='face_input') theta = tf.placeholder(tf.float32,shape=[None,6],name='theta_input') prepared = stn(result,theta) # Transformation to ArcFace template theta2 = tf.placeholder(tf.float32,shape=[None,6],name='theta2_input') united = prepared[:,300:,150:750]*mask_input[:,300:,150:750]+\ face_input*(1-mask_input[:,300:,150:750]) final_crop = tf.clip_by_value(stn(united,theta2,(112,112)),0.,1.) # TV loss and gradients w_tv = tf.placeholder(tf.float32,name='w_tv_input') tv_loss = TVloss(logo,w_tv) grads_tv = tf.gradients(tv_loss,logo) grads_input = tf.placeholder(tf.float32,shape=[None,112,112,3],name='grads_input') grads1 = tf.gradients(final_crop,logo,grad_ys=grads_input) # Varios images generator class Imgen(object): def __init__(self): self.fdict = {ph:[[args.ph]],\ logo:np.ones((1,400,900,3)),\ param:[[args.param]],\ theta:1./args.scale*np.array([[1.,0.,-args.x/450.,0.,1.,-args.y/450.]]),\ theta2:[[1.,0.,0.,0.,1.,0.]],\ w_tv:args.w_tv} mask = sess.run(prepared,feed_dict=self.fdict) self.fdict[mask_input] = mask def gen_fixed(self,im,advhat): self.fdict[face_input] = np.expand_dims(im,0) self.fdict[logo] = np.expand_dims(advhat,0) return self.fdict, sess.run(final_crop,feed_dict=self.fdict) def gen_random(self,im,advhat,batch=args.batch_size): alpha1 = np.random.uniform(-1.,1.,size=(batch,1))/180.*np.pi scale1 = np.random.uniform(args.scale-0.02,args.scale+0.02,size=(batch,1)) y1 = np.random.uniform(args.y-600./112.,args.y+600./112.,size=(batch,1)) x1 = np.random.uniform(args.x-600./112.,args.x+600./112.,size=(batch,1)) alpha2 = np.random.uniform(-1.,1.,size=(batch,1))/180.*np.pi scale2 = np.random.uniform(1./1.04,1.04,size=(batch,1)) y2 = np.random.uniform(-1.,1.,size=(batch,1))/66. angle = np.random.uniform(args.ph-2.,args.ph+2.,size=(batch,1)) parab = np.random.uniform(args.param-0.0002,args.param+0.0002,size=(batch,1)) fdict = {ph:angle,param:parab,w_tv:args.w_tv,\ theta:1./scale1*np.hstack([np.cos(alpha1),np.sin(alpha1),-x1/450.,\ -np.sin(alpha1),np.cos(alpha1),-y1/450.]),\ theta2:scale2*np.hstack([np.cos(alpha2),np.sin(alpha2),np.zeros((batch,1)),\ -np.sin(alpha2),np.cos(alpha2),y2]),\ logo:np.ones((batch,400,900,3)),\ face_input:np.tile(np.expand_dims(im,0),[batch,1,1,1])} mask = sess.run(prepared,feed_dict=fdict) fdict[mask_input] = mask fdict[logo] = np.tile(np.expand_dims(advhat,0),[batch,1,1,1]) return fdict, sess.run(final_crop,feed_dict=fdict) gener = Imgen() # Initialization of the sticker init_logo = np.ones((400,900,3))*127./255. if args.init_face!=None: init_face = io.imread(args.init_face)/255. init_loss = tv_loss+tf.reduce_sum(tf.abs(init_face-united[0])) init_grads = tf.gradients(init_loss,logo) init_logo = np.ones((400,900,3))*127./255. fdict, _ = gener.gen_fixed(init_face,init_logo) moments = np.zeros((400,900,3)) print('Initialization from face, step 1/2') for i in tqdm(range(500)): fdict[logo] = np.expand_dims(init_logo,0) grads = moments*0.9+sess.run(init_grads,feed_dict=fdict)[0][0] moments = moments*0.9 + grads*0.1 init_logo = np.clip(init_logo-1./51.*np.sign(grads),0.,1.) print('Initialization from face, step 2/2') for i in tqdm(range(500)): fdict[logo] = np.expand_dims(init_logo,0) grads = moments*0.9+sess.run(init_grads,feed_dict=fdict)[0][0] moments = moments*0.9 + grads*0.1 init_logo = np.clip(init_logo-1./255.*np.sign(grads),0.,1.) io.imsave(now+'_init_logo.png',init_logo) elif args.init_logo!=None: init_logo[:] = io.imread(args.init_logo)/255. # Embedding model with tf.gfile.GFile(args.model, "rb") as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) tf.import_graph_def(graph_def, input_map=None, return_elements=None, name="") image_input = tf.get_default_graph().get_tensor_by_name('image_input:0') keep_prob = tf.get_default_graph().get_tensor_by_name('keep_prob:0') is_train = tf.get_default_graph().get_tensor_by_name('training_mode:0') embedding = tf.get_default_graph().get_tensor_by_name('embedding:0') orig_emb = tf.placeholder(tf.float32,shape=[None,512],name='orig_emb_input') cos_loss = tf.reduce_sum(tf.multiply(embedding,orig_emb),axis=1) grads2 = tf.gradients(cos_loss,image_input) fdict2 = {keep_prob:1.0,is_train:False} # Anchor embedding calculation if args.anchor_face!=None: anch_im = rescale(io.imread(args.anchor_face)/255.,112./600.,order=5) fdict2[image_input] = prep(anch_im) fdict2[orig_emb] = sess.run(embedding,feed_dict=fdict2) elif args.anchor_emb!=None: fdict2[orig_emb] = np.load(args.anchor_emb)[-1:] else: anch_im = rescale(io.imread(args.image)/255.,112./600.,order=5) fdict2[image_input] = prep(anch_im) fdict2[orig_emb] = sess.run(embedding,feed_dict=fdict2) # Attack constants im0 = io.imread(args.image)/255. regr = LR(n_jobs=4) regr_len = 100 regr_coef = -1. moments = np.zeros((400,900,3)) moment_val = 0.9 step_val = 1./51. stage = 1 step = 0 lr_thresh = 100 ls = [] t = time() while True: # Projecting sticker to the face and feeding it to the embedding model fdict,ims = gener.gen_random(im0,init_logo) fdict2[image_input] = prep(ims) grad_tmp = sess.run(grads2,feed_dict=fdict2) fdict_val, im_val = gener.gen_fixed(im0,init_logo) fdict2[image_input] = prep(im_val) ls.append(sess.run(cos_loss,feed_dict=fdict2)[0]) # Gradients to the original sticker image fdict[grads_input] = np.transpose(grad_tmp[0],[0,2,3,1]) grads_on_logo = np.mean(sess.run(grads1,feed_dict=fdict)[0],0) grads_on_logo += sess.run(grads_tv,feed_dict=fdict)[0][0] moments = moments*moment_val + grads_on_logo*(1.-moment_val) init_logo -= step_val*np.sign(moments) init_logo = np.clip(init_logo,0.,1.) # Logging step += 1 if step%20==0: print('Stage:',stage,'Step:',step,'Av. time:',round((time()-t)/step,2),'Loss:',round(ls[-1],2),'Coef:',regr_coef) # Switching to the second stage if step>lr_thresh: regr.fit(np.expand_dims(np.arange(100),1),np.hstack(ls[-100:])) regr_coef = regr.coef_[0] if regr_coef>=0: if stage==1: stage = 2 moment_val = 0.995 step_val = 1./255. step = 0 regr_coef = -1. lr_thresh = 200 t = time() else: break plt.plot(range(len(ls)),ls) plt.savefig(now+'_cosine.png') io.imsave(now+'_advhat.png',init_logo) def parse_arguments(argv): parser = argparse.ArgumentParser() parser.add_argument('image', type=str, help='Path to the image for attack.') parser.add_argument('model', type=str, help='Path to the model for attack.') parser.add_argument('--init_face', type=str, default=None, help='Path to the face for sticker inititalization.') parser.add_argument('--init_logo', type=str, default=None, help='Path to the image for inititalization.') parser.add_argument('--anchor_face', type=str, default=None, help='Path to the anchor face.') parser.add_argument('--anchor_emb', type=str, default=None, help='Path to the anchor emb (the last will be used)') parser.add_argument('--w_tv', type=float, default=1e-4, help='Weight of the TV loss') parser.add_argument('--ph', type=float, default=17., help='Angle of the off-plane rotation') parser.add_argument('--param', type=float, default=0.0013, help='Parabola rate for the off-plane parabolic transformation') parser.add_argument('--scale', type=float, default=0.465, help='Scaling parameter for the sticker') parser.add_argument('--x', type=float, default=0., help='Translation of the sticker along x-axis') parser.add_argument('--y', type=float, default=-15., help='Translation of the sticker along y-axis') parser.add_argument('--batch_size', type=int, default=20, help='Batch size for attack') return parser.parse_args(argv) if __name__ == '__main__': main(parse_arguments(sys.argv[1:])) ================================================ FILE: Attack/cos_mx.py ================================================ import argparse import sys import mxnet as mx import mxnet.ndarray as nd import numpy as np import skimage.io as io from skimage.transform import rescale from numpy import linalg as LA # Prepare image to network input format def prep(im): if len(im.shape)==3: return np.transpose(im,[2,0,1]).reshape((1,3,112,112)) elif len(im.shape)==4: return np.transpose(im,[0,3,1,2]).reshape((im.shape[0],3,112,112)) def main(args): print(args) # Embedding model sym, arg_params, aux_params = mx.model.load_checkpoint(args.model, 0) sym = sym.get_internals()['fc1_output'] model = mx.mod.Module(symbol=sym, context=mx.gpu(0), label_names = None) model.bind(data_shapes=[('data', (1, 3, 112, 112))]) model.set_params(arg_params, aux_params) # Embedding calculation im1 = (prep(rescale(io.imread(args.face1)/255.,112./600.,order=5))*255.).astype(np.uint8) im2 = (prep(rescale(io.imread(args.face2)/255.,112./600.,order=5))*255.).astype(np.uint8) batch = mx.io.DataBatch(data=[nd.array(im1)]) model.forward(batch, is_train=False) emb1 = model.get_outputs()[0].asnumpy()[0] batch = mx.io.DataBatch(data=[nd.array(im2)]) model.forward(batch, is_train=False) emb2 = model.get_outputs()[0].asnumpy()[0] # Normalization emb1 /= LA.norm(emb1) emb2 /= LA.norm(emb2) cos_sim = np.sum(emb1 * emb2) # Result print('Cos_sim(face1, face2) =', cos_sim) def parse_arguments(argv): parser = argparse.ArgumentParser() parser.add_argument('face1', type=str, help='Path to the preprocessed face1.') parser.add_argument('face2', type=str, help='Path to the preprocessed face2.') parser.add_argument('model', type=str, help='Path to the model.') return parser.parse_args(argv) if __name__ == '__main__': main(parse_arguments(sys.argv[1:])) ================================================ FILE: Attack/cos_tf.py ================================================ import argparse import sys import tensorflow as tf import numpy as np import skimage.io as io from skimage.transform import rescale # Prepare image to network input format def prep(im): if len(im.shape)==3: return np.transpose(im,[2,0,1]).reshape((1,3,112,112))*2-1 elif len(im.shape)==4: return np.transpose(im,[0,3,1,2]).reshape((im.shape[0],3,112,112))*2-1 def main(args): print(args) sess = tf.Session() # Embedding model with tf.gfile.GFile(args.model, "rb") as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) tf.import_graph_def(graph_def, input_map=None, return_elements=None, name="") image_input = tf.get_default_graph().get_tensor_by_name('image_input:0') keep_prob = tf.get_default_graph().get_tensor_by_name('keep_prob:0') is_train = tf.get_default_graph().get_tensor_by_name('training_mode:0') embedding = tf.get_default_graph().get_tensor_by_name('embedding:0') tfdict = {keep_prob:1.0, is_train:False} # Embedding calculation im1 = prep(rescale(io.imread(args.face1)/255.,112./600.,order=5)) im2 = prep(rescale(io.imread(args.face2)/255.,112./600.,order=5)) tfdict[image_input] = im1 emb1 = sess.run(embedding,feed_dict=tfdict) tfdict[image_input] = im2 emb2 = sess.run(embedding,feed_dict=tfdict) # Result cos_sim = np.sum(emb1 * emb2) print('Cos_sim(face1, face2) =', cos_sim) def parse_arguments(argv): parser = argparse.ArgumentParser() parser.add_argument('face1', type=str, help='Path to the preprocessed face1.') parser.add_argument('face2', type=str, help='Path to the preprocessed face2.') parser.add_argument('model', type=str, help='Path to the model.') return parser.parse_args(argv) if __name__ == '__main__': main(parse_arguments(sys.argv[1:])) ================================================ FILE: Attack/face_preparation.py ================================================ import argparse import os import sys sys.path.append(os.path.join(os.path.dirname(__file__), '../Demo/')) import tensorflow as tf import numpy as np import cv2 import skimage.io as io from skimage import transform as trans from align import detect_face from stn import spatial_transformer_network as stn from utils import projector # Align face as ArcFace template def preprocess(img, landmark): image_size = [600,600] src = 600./112.*np.array([ [38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366], [41.5493, 92.3655], [70.7299, 92.2041] ], dtype=np.float32) dst = landmark.astype(np.float32) tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2,:] warped = cv2.warpAffine(img,M,(image_size[1],image_size[0]), borderValue = 0.0) return warped def main(args): sess = tf.Session() pnet, rnet, onet = detect_face.create_mtcnn(sess, None) threshold = [ 0.6, 0.7, 0.7 ] factor = 0.709 img = io.imread(args.image) _minsize = min(min(img.shape[0]//5, img.shape[1]//5),80) bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, threshold, factor) assert bounding_boxes.size>0 points = points[:, 0] landmark = points.reshape((2,5)).T warped = preprocess(img, landmark) io.imsave(args.image[:-4]+'_aligned.png',warped) if args.mask: logo_mask = np.ones((1,400,900,3),dtype=np.float32) logo = tf.placeholder(tf.float32,shape=[1,400,900,3]) param = tf.placeholder(tf.float32,shape=[1,1]) ph = tf.placeholder(tf.float32,shape=[1,1]) result = projector(param,ph,logo) face_input = tf.placeholder(tf.float32,shape=[1,600,600,3]) theta = tf.placeholder(tf.float32,shape=[1,6]) prepared = stn(result,theta) united = prepared[:,300:,150:750]+face_input*(1-prepared[:,300:,150:750]) img_with_mask = sess.run(united,feed_dict={ph:[[args.ph]],logo:logo_mask,param:[[args.param]],\ face_input:np.expand_dims(warped/255.,0),\ theta:1./args.scale*np.array([[1.,0.,-args.x/450.,0.,1.,-args.y/450.]])})[0] io.imsave(args.image[:-4]+'_mask.png',img_with_mask) def parse_arguments(argv): parser = argparse.ArgumentParser() parser.add_argument('image', type=str, help='Path to the image.') parser.add_argument('--mask', action='store_true', help='Use when search the sticker parameters') parser.add_argument('--ph', type=float, default=17., help='Angle of the off-plane rotation') parser.add_argument('--param', type=float, default=0.0013, help='Parabola rate for the off-plane parabolic transformation') parser.add_argument('--scale', type=float, default=0.465, help='Scaling parameter for the sticker') parser.add_argument('--x', type=float, default=0., help='Translation of the sticker along x-axis') parser.add_argument('--y', type=float, default=-15., help='Translation of the sticker along y-axis') return parser.parse_args(argv) if __name__ == '__main__': main(parse_arguments(sys.argv[1:])) ================================================ FILE: Attack/stn.py ================================================ import tensorflow as tf def spatial_transformer_network(input_fmap, theta, out_dims=None, **kwargs): """ Spatial Transformer Network layer implementation as described in [1]. The layer is composed of 3 elements: - localization_net: takes the original image as input and outputs the parameters of the affine transformation that should be applied to the input image. - affine_grid_generator: generates a grid of (x,y) coordinates that correspond to a set of points where the input should be sampled to produce the transformed output. - bilinear_sampler: takes as input the original image and the grid and produces the output image using bilinear interpolation. Input ----- - input_fmap: output of the previous layer. Can be input if spatial transformer layer is at the beginning of architecture. Should be a tensor of shape (B, H, W, C). - theta: affine transform tensor of shape (B, 6). Permits cropping, translation and isotropic scaling. Initialize to identity matrix. It is the output of the localization network. Returns ------- - out_fmap: transformed input feature map. Tensor of size (B, H, W, C). Notes ----- [1]: 'Spatial Transformer Networks', Jaderberg et. al, (https://arxiv.org/abs/1506.02025) """ # grab input dimensions B = tf.shape(input_fmap)[0] H = tf.shape(input_fmap)[1] W = tf.shape(input_fmap)[2] # reshape theta to (B, 2, 3) theta = tf.reshape(theta, [B, 2, 3]) # generate grids of same size or upsample/downsample if specified if out_dims: out_H = out_dims[0] out_W = out_dims[1] batch_grids = affine_grid_generator(out_H, out_W, theta) else: batch_grids = affine_grid_generator(H, W, theta) x_s = batch_grids[:, 0, :, :] y_s = batch_grids[:, 1, :, :] # sample input with grid to get output out_fmap = bilinear_sampler(input_fmap, x_s, y_s) return out_fmap def get_pixel_value(img, x, y): """ Utility function to get pixel value for coordinate vectors x and y from a 4D tensor image. Input ----- - img: tensor of shape (B, H, W, C) - x: flattened tensor of shape (B*H*W,) - y: flattened tensor of shape (B*H*W,) Returns ------- - output: tensor of shape (B, H, W, C) """ shape = tf.shape(x) batch_size = shape[0] height = shape[1] width = shape[2] batch_idx = tf.range(0, batch_size) batch_idx = tf.reshape(batch_idx, (batch_size, 1, 1)) b = tf.tile(batch_idx, (1, height, width)) indices = tf.stack([b, y, x], 3) return tf.gather_nd(img, indices) def affine_grid_generator(height, width, theta): """ This function returns a sampling grid, which when used with the bilinear sampler on the input feature map, will create an output feature map that is an affine transformation [1] of the input feature map. Input ----- - height: desired height of grid/output. Used to downsample or upsample. - width: desired width of grid/output. Used to downsample or upsample. - theta: affine transform matrices of shape (num_batch, 2, 3). For each image in the batch, we have 6 theta parameters of the form (2x3) that define the affine transformation T. Returns ------- - normalized grid (-1, 1) of shape (num_batch, 2, H, W). The 2nd dimension has 2 components: (x, y) which are the sampling points of the original image for each point in the target image. Note ---- [1]: the affine transformation allows cropping, translation, and isotropic scaling. """ num_batch = tf.shape(theta)[0] # create normalized 2D grid x = tf.linspace(-1.0, 1.0, width) y = tf.linspace(-1.0, 1.0, height) x_t, y_t = tf.meshgrid(x, y) # flatten x_t_flat = tf.reshape(x_t, [-1]) y_t_flat = tf.reshape(y_t, [-1]) # reshape to [x_t, y_t , 1] - (homogeneous form) ones = tf.ones_like(x_t_flat) sampling_grid = tf.stack([x_t_flat, y_t_flat, ones]) # repeat grid num_batch times sampling_grid = tf.expand_dims(sampling_grid, axis=0) sampling_grid = tf.tile(sampling_grid, tf.stack([num_batch, 1, 1])) # cast to float32 (required for matmul) theta = tf.cast(theta, 'float32') sampling_grid = tf.cast(sampling_grid, 'float32') # transform the sampling grid - batch multiply batch_grids = tf.matmul(theta, sampling_grid) # batch grid has shape (num_batch, 2, H*W) # reshape to (num_batch, H, W, 2) batch_grids = tf.reshape(batch_grids, [num_batch, 2, height, width]) return batch_grids def bilinear_sampler(img, x, y): """ Performs bilinear sampling of the input images according to the normalized coordinates provided by the sampling grid. Note that the sampling is done identically for each channel of the input. To test if the function works properly, output image should be identical to input image when theta is initialized to identity transform. Input ----- - img: batch of images in (B, H, W, C) layout. - grid: x, y which is the output of affine_grid_generator. Returns ------- - out: interpolated images according to grids. Same size as grid. """ H = tf.shape(img)[1] W = tf.shape(img)[2] max_y = tf.cast(H - 1, 'int32') max_x = tf.cast(W - 1, 'int32') zero = tf.zeros([], dtype='int32') # rescale x and y to [0, W-1/H-1] x = tf.cast(x, 'float32') y = tf.cast(y, 'float32') x = 0.5 * ((x + 1.0) * tf.cast(max_x-1, 'float32')) y = 0.5 * ((y + 1.0) * tf.cast(max_y-1, 'float32')) # grab 4 nearest corner points for each (x_i, y_i) x0 = tf.cast(tf.floor(x), 'int32') x1 = x0 + 1 y0 = tf.cast(tf.floor(y), 'int32') y1 = y0 + 1 # clip to range [0, H-1/W-1] to not violate img boundaries x0 = tf.clip_by_value(x0, zero, max_x) x1 = tf.clip_by_value(x1, zero, max_x) y0 = tf.clip_by_value(y0, zero, max_y) y1 = tf.clip_by_value(y1, zero, max_y) # get pixel value at corner coords Ia = get_pixel_value(img, x0, y0) Ib = get_pixel_value(img, x0, y1) Ic = get_pixel_value(img, x1, y0) Id = get_pixel_value(img, x1, y1) # recast as float for delta calculation x0 = tf.cast(x0, 'float32') x1 = tf.cast(x1, 'float32') y0 = tf.cast(y0, 'float32') y1 = tf.cast(y1, 'float32') # calculate deltas wa = (x1-x) * (y1-y) wb = (x1-x) * (y-y0) wc = (x-x0) * (y1-y) wd = (x-x0) * (y-y0) # add dimension for addition wa = tf.expand_dims(wa, axis=3) wb = tf.expand_dims(wb, axis=3) wc = tf.expand_dims(wc, axis=3) wd = tf.expand_dims(wd, axis=3) # compute output out = tf.add_n([wa*Ia, wb*Ib, wc*Ic, wd*Id]) return out ================================================ FILE: Attack/utils.py ================================================ import numpy as np import tensorflow as tf def tf_integral(x,a): return 0.5*(x*tf.sqrt(x**2+a)+a*tf.log(tf.abs(x+tf.sqrt(x**2+a)))) def tf_pre_parabol(x,par): x = x-450. prev = 2.*par*(tf_integral(tf.abs(x),0.25/(par**2))-tf_integral(0,0.25/(par**2))) return prev+450. def projector(param,ph,logo): '''Apply off-plane transformations to the sticker images param: parabola rate of the off-plane parabolic tranformation, rank 2 tensor with shape [N, 1] ph:angle of the off-plane rotation, rank 2 tensor with shape [N, 1] logo: rank 4 tensor with format NHWC and shape [N, 400, 900, 3] return: rank 4 tensor with format NHWC and shape [N, 900, 900, 3] ''' right_cumsum = tf.transpose(tf.pad(tf.cumsum(logo[:,:,450:],axis=2),tf.constant([[0,0],[0,0],[1,0],[0,0]])),[0,2,1,3]) left_cumsum = tf.transpose(tf.pad(tf.cumsum(logo[:,:,:450][:,:,::-1],axis=2),tf.constant([[0,0],[0,0],[1,0],[0,0]])),[0,2,1,3]) anchors = tf.expand_dims(tf.cast(tf.round(tf.clip_by_value(\ tf_pre_parabol(tf.expand_dims(tf.constant(np.arange(450,901,dtype=np.float32)),0),\ param)-450.,0,450.)),tf.int32),2) anch_inds = tf.tile(tf.expand_dims(tf.expand_dims(tf.range(tf.shape(param)[0]),1),2),[1,451,1]) new_anchors = tf.concat([anch_inds,anchors],2) anchors_div = tf.expand_dims(tf.cast(tf.clip_by_value(anchors[:,1:]-anchors[:,:-1],1,900),tf.float32),3) right_anchors_cumsum = tf.gather_nd(right_cumsum,new_anchors) right_anchors_diffs = right_anchors_cumsum[:,1:]-right_anchors_cumsum[:,:-1] right = right_anchors_diffs/anchors_div left_anchors_cumsum = tf.gather_nd(left_cumsum,new_anchors) left_anchors_diffs = left_anchors_cumsum[:,1:]-left_anchors_cumsum[:,:-1] left = left_anchors_diffs/anchors_div tmp_result = tf.transpose(tf.concat([left[:,::-1],right],axis=1),[0,2,1,3]) cumsum = tf.pad(tf.cumsum(tmp_result,axis=1),tf.constant([[0,0],[1,0],[0,0],[0,0]])) angle = tf.expand_dims(np.pi/180.*ph,2) z = param*tf.constant((np.arange(900,dtype=np.float32)-449.5)**2) z_tile = tf.tile(tf.expand_dims(z,1),tf.constant([1,901,1])) y_coord = tf.constant(np.arange(-250,651,dtype=np.float32)) y_tile = tf.tile(tf.expand_dims(tf.expand_dims(y_coord,1),0),[tf.shape(param)[0],1,900]) y_prev = (y_tile+z_tile*tf.sin(-angle))/tf.cos(angle) y_round = tf.cast(tf.round(tf.clip_by_value(y_prev,0,400.)),tf.int32) y_div = tf.clip_by_value(y_round[:,1:]-y_round[:,:-1],1,900) x_coord = tf.constant(np.arange(900,dtype=np.int32)) x_tile = tf.tile(tf.expand_dims(tf.expand_dims(x_coord,0),0),[tf.shape(param)[0],901,1]) b_coord = tf.tile(tf.expand_dims(tf.expand_dims(tf.range(tf.shape(param)[0]),1),2),[1,901,900]) indices = tf.stack([b_coord,y_round,x_tile],axis=3) chosen_cumsum = tf.gather_nd(cumsum,indices) chosen_cumsum_diffs = chosen_cumsum[:,1:]-chosen_cumsum[:,:-1] final_results = tf.clip_by_value(chosen_cumsum_diffs/tf.expand_dims(tf.cast(y_div,tf.float32),3),0.,1.) return final_results def TVloss(logo,w_tv): '''Calculate TV loss of the sticker image with predefined weight. logo: rank 4 tensor with format NHWC w_tv: weight of the TV loss return: scalar value of the TV loss ''' vert_diff = logo[:,1:]-logo[:,:-1] hor_diff = logo[:,:,1:]-logo[:,:,:-1] vert_diff_sq = tf.square(vert_diff) hor_diff_sq = tf.square(hor_diff) vert_pad = tf.pad(vert_diff_sq,tf.constant([[0,0],[1,0],[0,0],[0,0]])) hor_pad = tf.pad(hor_diff_sq,tf.constant([[0,0],[0,0],[1,0],[0,0]])) tv_sum = vert_pad+hor_pad tv = tf.sqrt(tv_sum+1e-5) tv_final_sum = tf.reduce_sum(tv) tv_loss = w_tv*tv_final_sum return tv_loss ================================================ FILE: Demo/1000_from_CASIA/centroids_names.txt ================================================ 0000045 0000099 0000100 0000102 0000103 0000105 0000107 0000108 0000114 0000117 0000119 0000121 0000133 0000137 0000141 0000143 0000144 0000145 0000147 0000156 0000157 0000159 0000166 0000168 0000169 0000170 0000174 0000177 0000183 0000185 0000186 0000188 0000189 0000192 0000195 0000198 0000202 0000204 0000205 0000207 0000208 0000210 0000211 0000212 0000214 0000220 0000225 0000233 0000238 0000240 0000247 0000249 0000254 0000256 0000260 0000262 0000263 0000268 0000270 0000271 0000272 0000275 0000280 0000281 0000282 0000284 0000286 0000287 0000293 0000295 0000296 0000297 0000299 0000301 0000302 0000304 0000307 0000310 0000317 0000318 0000319 0000321 0000324 0000327 0000331 0000332 0000333 0000334 0000335 0000342 0000343 0000344 0000346 0000349 0000350 0000351 0000352 0000353 0000356 0000357 0000360 0000362 0000363 0000364 0000365 0000368 0000373 0000374 0000381 0000383 0000385 0000386 0000387 0000388 0000389 0000391 0000394 0000396 0000397 0000399 0000402 0000405 0000408 0000410 0000411 0000413 0000415 0000420 0000422 0000426 0000427 0000430 0000431 0000433 0000434 0000436 0000437 0000438 0000439 0000442 0000444 0000446 0000447 0000448 0000451 0000452 0000455 0000457 0000459 0000460 0000461 0000462 0000463 0000464 0000465 0000467 0000471 0000473 0000477 0000480 0000481 0000482 0000483 0000484 0000486 0000487 0000492 0000494 0000495 0000498 0000499 0000500 0000505 0000506 0000510 0000512 0000513 0000514 0000515 0000520 0000521 0000524 0000525 0000526 0000529 0000530 0000531 0000532 0000533 0000534 0000535 0000538 0000539 0000541 0000545 0000546 0000547 0000550 0000551 0000552 0000554 0000555 0000562 0000563 0000568 0000570 0000571 0000574 0000575 0000579 0000580 0000582 0000583 0000585 0000588 0000589 0000592 0000593 0000595 0000596 0000597 0000598 0000599 0000600 0000601 0000605 0000606 0000607 0000609 0000610 0000611 0000612 0000613 0000614 0000615 0000616 0000617 0000619 0000620 0000622 0000623 0000624 0000625 0000628 0000630 0000633 0000637 0000642 0000646 0000648 0000651 0000652 0000653 0000655 0000656 0000657 0000662 0000663 0000664 0000665 0000667 0000670 0000672 0000675 0000678 0000679 0000680 0000688 0000689 0000690 0000691 0000693 0000694 0000695 0000696 0000700 0000703 0000705 0000707 0000708 0000709 0000711 0000717 0000725 0000729 0000745 0000749 0000751 0000756 0000759 0000760 0000775 0000776 0000777 0000782 0000792 0000793 0000796 0000800 0000801 0000803 0000809 0000815 0000816 0000830 0000836 0000837 0000838 0000867 0000868 0000871 0000874 0000876 0000880 0000881 0000884 0000889 0000892 0000893 0000902 0000903 0000915 0000916 0000928 0000929 0000933 0000934 0000943 0000944 0000948 0000950 0000952 0000954 0000956 0000959 0000960 0000961 0000962 0000965 0000968 0000977 0000980 0000981 0000982 0000985 0000986 0000991 0000996 0000997 0000998 0001002 0001004 0001005 0001006 0001015 0001018 0001019 0001022 0001026 0001029 0001035 0001037 0001038 0001039 0001040 0001043 0001044 0001046 0001048 0001049 0001053 0001054 0001057 0001061 0001062 0001063 0001064 0001065 0001068 0001069 0001071 0001075 0001081 0001082 0001083 0001084 0001086 0001089 0001092 0001096 0001097 0001099 0001101 0001103 0001104 0001107 0001108 0001110 0001111 0001114 0001116 0001117 0001118 0001126 0001127 0001129 0001131 0001138 0001139 0001143 0001146 0001147 0001151 0001152 0001153 0001154 0001155 0001162 0001165 0001166 0001168 0001170 0001172 0001176 0001180 0001183 0001187 0001194 0001199 0001200 0001201 0001208 0001209 0001210 0001217 0001218 0001223 0001231 0001233 0001235 0001240 0001242 0001250 0001251 0001261 0001263 0001265 0001266 0001267 0001272 0001274 0001277 0001281 0001282 0001286 0001287 0001290 0001292 0001293 0001295 0001298 0001302 0001305 0001307 0001309 0001312 0001314 0001315 0001317 0001319 0001323 0001324 0001325 0001326 0001332 0001334 0001337 0001339 0001344 0001346 0001347 0001348 0001353 0001356 0001364 0001365 0001367 0001368 0001370 0001373 0001377 0001378 0001384 0001387 0001388 0001389 0001390 0001392 0001393 0001398 0001399 0001400 0001403 0001404 0001406 0001407 0001409 0001410 0001412 0001416 0001418 0001424 0001431 0001435 0001436 0001438 0001440 0001441 0001442 0001444 0001457 0001459 0001461 0001462 0001467 0001468 0001469 0001472 0001480 0001484 0001488 0001491 0001492 0001493 0001494 0001496 0001497 0001498 0001499 0001504 0001507 0001508 0001515 0001517 0001518 0001519 0001520 0001521 0001524 0001525 0001527 0001529 0001530 0001533 0001538 0001540 0001541 0001542 0001548 0001550 0001554 0001556 0001557 0001558 0001562 0001564 0001565 0001567 0001568 0001569 0001572 0001573 0001574 0001575 0001578 0001582 0001589 0001590 0001593 0001594 0001595 0001597 0001598 0001599 0001601 0001602 0001604 0001605 0001607 0001608 0001610 0001614 0001617 0001621 0001622 0001624 0001629 0001630 0001631 0001632 0001633 0001634 0001638 0001640 0001641 0001642 0001650 0001652 0001653 0001661 0001662 0001664 0001667 0001668 0001669 0001670 0001675 0001684 0001688 0001694 0001696 0001697 0001698 0001701 0001704 0001709 0001710 0001711 0001713 0001714 0001716 0001720 0001722 0001723 0001728 0001729 0001731 0001732 0001733 0001735 0001736 0001738 0001741 0001742 0001743 0001746 0001748 0001752 0001756 0001758 0001759 0001760 0001762 0001763 0001764 0001767 0001770 0001773 0001774 0001776 0001778 0001780 0001783 0001785 0001787 0001790 0001793 0001794 0001795 0001796 0001797 0001802 0001803 0001804 0001806 0001808 0001810 0001817 0001821 0001823 0001824 0001828 0001832 0001833 0001834 0001835 0001836 0001837 0001838 0001839 0001840 0001841 0001844 0001845 0001848 0001851 0001852 0001857 0001858 0001863 0001868 0001869 0001873 0001877 0001879 0001880 0001882 0001912 0001938 0001942 0001943 0001951 0001952 0001953 0001971 0001973 0001974 0001978 0001993 0002003 0002004 0002006 0002015 0002023 0002026 0002027 0002029 0002033 0002041 0002043 0002055 0002059 0002064 0002065 0002067 0002073 0002076 0002077 0002088 0002090 0002091 0002100 0002102 0002105 0002117 0002119 0002120 0002122 0002124 0002127 0002140 0002142 0002150 0002191 0002217 0002239 0002243 0002253 0002262 0002325 0002332 0002365 0002396 0002436 0002546 0002653 0002657 0002700 0002743 0002773 0002800 0002801 0002871 0002901 0002907 0002913 0002916 0002928 0002936 0002944 0002956 0003067 0003069 0003071 0003072 0003078 0003082 0003115 0003210 0003244 0003265 0003289 0003353 0003354 0003457 0003494 0003506 0003563 0003577 0003620 0003633 0003697 0003777 0003779 0003807 0003888 0003928 0003931 0003941 0003981 0004051 0004056 0004081 0004095 0004109 0004133 0004137 0004147 0004248 0004266 0004284 0004286 0004294 0004303 0004306 0004328 0004349 0004371 0004376 0004426 0004456 0004518 0004539 0004540 0004623 0004645 0004657 0004691 0004692 0004700 0004705 0004710 0004712 0004715 0004716 0004719 0004721 0004724 0004725 0004727 0004728 0004729 0004731 0004734 0004735 0004736 0004739 0004740 0004741 0004743 0004744 0004745 0004747 0004748 0004749 0004751 0004752 0004753 0004757 0004760 0004761 0004763 0004770 0004771 0004774 0004775 0004779 0004782 0004785 0004786 0004787 0004790 0004792 0004793 0004797 0004801 0004802 0004804 0004805 0004806 0004808 0004809 0004810 0004812 0004814 0004818 0004819 0004820 0004822 0004825 0004826 0004827 0004830 0004834 0004838 0004839 0004840 0004841 0004844 0004846 0004849 0004850 0004852 0004853 0004854 0004857 0004859 0004861 0004865 0004866 0004867 0004868 0004871 0004875 0004879 0004880 0004883 0004884 0004886 0004887 0004889 0004892 0004893 0004894 0004895 0004897 0004898 0004899 0004900 0004904 0004906 0004909 0004911 0004912 0004914 0004917 0004918 0004921 0004922 0004923 0004925 0004928 0004929 0004930 0004933 0004936 0004937 0004939 0004940 0004941 0004943 0004947 0004954 0004955 0004956 0004957 0004959 0004960 0004965 0004966 0004967 0004968 0004969 0004971 0004975 0004977 0004978 0004979 0004980 0004981 0004982 0004984 0004985 0004986 0004987 0004988 0004990 0004991 0004993 0004994 0004996 0004999 0005000 0005002 0005006 0005007 0005009 0005010 0005011 0005012 ================================================ FILE: Demo/README.md ================================================ ## Demo launch ArcFace@ms1m-refine-v2 transformed to TensorFlow is available [here](https://drive.google.com/file/d/1fb70KgMRSmaEUF5cJ67BCD_DmTPCR5uJ/view?usp=sharing). The command for demo launch: `python3 demo.py PATH_TO_THE_DOWNLOADED_MODEL PATH_TO_THE_DIRECTORY_WITH_CLASS_CENTROIDS` Centroids for the first 1000 classes of CASIA are in the "1000_from_CASIA" directory. ## Preparation of your own centroids ### Alignment The dataset of your images has to be arranged in the following way: ├── Person 1 │ ├── Person_1_image_1.png │ ├── Person_1_image_2.png │ ├── Person_1_image_3.png │ └── Person_1_image_4.png ├── Person 2 │ ├── Person_2_image_1.png │ ├── Person_2_image_2.png │ ├── Person_2_image_3.png │ ├── Person_2_image_4.png │ └── Person_2_image_5.png ├── Person 3 │ ├── Person_3_image_1.png │ ├── Person_3_image_2.png ... The command for images alignment: `python3 alignment.py PATH_TO_DIRECTIRY_WITH_IMAGES PATH_FOR_THE_ALIGNED_IMAGES` ### Centroids calculation Using directory with aligned images from the previous step, you can obtain centroids with the next command: `python3 dumping.py PATH_TO_DIRECTORY_WITH_ALIGNED_IMAGES PATH_FOR_THE_CENTROIDS PATH_TO_THE_DOWNLOADED_MODEL` ================================================ FILE: Demo/align/.gitignore ================================================ *.pyc ================================================ FILE: Demo/align/__init__.py ================================================ ================================================ FILE: Demo/align/detect_face.py ================================================ """ Tensorflow implementation of the face detection / alignment algorithm found at https://github.com/kpzhang93/MTCNN_face_detection_alignment """ # MIT License # # Copyright (c) 2016 David Sandberg # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from __future__ import absolute_import from __future__ import division from __future__ import print_function from six import string_types, iteritems import numpy as np import tensorflow as tf #from math import floor import cv2 import os def layer(op): '''Decorator for composable network layers.''' def layer_decorated(self, *args, **kwargs): # Automatically set a name if not provided. name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) # Figure out the layer inputs. if len(self.terminals) == 0: raise RuntimeError('No input variables found for layer %s.' % name) elif len(self.terminals) == 1: layer_input = self.terminals[0] else: layer_input = list(self.terminals) # Perform the operation and get the output. layer_output = op(self, layer_input, *args, **kwargs) # Add to layer LUT. self.layers[name] = layer_output # This output is now the input for the next layer. self.feed(layer_output) # Return self for chained calls. return self return layer_decorated class Network(object): def __init__(self, inputs, trainable=True): # The input nodes for this network self.inputs = inputs # The current list of terminal nodes self.terminals = [] # Mapping from layer names to layers self.layers = dict(inputs) # If true, the resulting variables are set as trainable self.trainable = trainable self.setup() def setup(self): '''Construct the network. ''' raise NotImplementedError('Must be implemented by the subclass.') def load(self, data_path, session, ignore_missing=False): '''Load network weights. data_path: The path to the numpy-serialized network weights session: The current TensorFlow session ignore_missing: If true, serialized weights for missing layers are ignored. ''' data_dict = np.load(data_path, encoding='latin1', allow_pickle=True).item() #pylint: disable=no-member for op_name in data_dict: with tf.variable_scope(op_name, reuse=True): for param_name, data in iteritems(data_dict[op_name]): try: var = tf.get_variable(param_name) session.run(var.assign(data)) except ValueError: if not ignore_missing: raise def feed(self, *args): '''Set the input(s) for the next operation by replacing the terminal nodes. The arguments can be either layer names or the actual layers. ''' assert len(args) != 0 self.terminals = [] for fed_layer in args: if isinstance(fed_layer, string_types): try: fed_layer = self.layers[fed_layer] except KeyError: raise KeyError('Unknown layer name fed: %s' % fed_layer) self.terminals.append(fed_layer) return self def get_output(self): '''Returns the current network output.''' return self.terminals[-1] def get_unique_name(self, prefix): '''Returns an index-suffixed unique name for the given prefix. This is used for auto-generating layer names based on the type-prefix. ''' ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 return '%s_%d' % (prefix, ident) def make_var(self, name, shape): '''Creates a new TensorFlow variable.''' return tf.get_variable(name, shape, trainable=self.trainable) def validate_padding(self, padding): '''Verifies that the padding is one of the supported ones.''' assert padding in ('SAME', 'VALID') @layer def conv(self, inp, k_h, k_w, c_o, s_h, s_w, name, relu=True, padding='SAME', group=1, biased=True): # Verify that the padding is acceptable self.validate_padding(padding) # Get the number of channels in the input c_i = int(inp.get_shape()[-1]) # Verify that the grouping parameter is valid assert c_i % group == 0 assert c_o % group == 0 # Convolution for a given input and kernel convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) with tf.variable_scope(name) as scope: kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o]) # This is the common-case. Convolve the input without any further complications. output = convolve(inp, kernel) # Add the biases if biased: biases = self.make_var('biases', [c_o]) output = tf.nn.bias_add(output, biases) if relu: # ReLU non-linearity output = tf.nn.relu(output, name=scope.name) return output @layer def prelu(self, inp, name): with tf.variable_scope(name): i = int(inp.get_shape()[-1]) alpha = self.make_var('alpha', shape=(i,)) output = tf.nn.relu(inp) + tf.multiply(alpha, -tf.nn.relu(-inp)) return output @layer def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'): self.validate_padding(padding) return tf.nn.max_pool(inp, ksize=[1, k_h, k_w, 1], strides=[1, s_h, s_w, 1], padding=padding, name=name) @layer def fc(self, inp, num_out, name, relu=True): with tf.variable_scope(name): input_shape = inp.get_shape() if input_shape.ndims == 4: # The input is spatial. Vectorize it first. dim = 1 for d in input_shape[1:].as_list(): dim *= int(d) feed_in = tf.reshape(inp, [-1, dim]) else: feed_in, dim = (inp, input_shape[-1].value) weights = self.make_var('weights', shape=[dim, num_out]) biases = self.make_var('biases', [num_out]) op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b fc = op(feed_in, weights, biases, name=name) return fc """ Multi dimensional softmax, refer to https://github.com/tensorflow/tensorflow/issues/210 compute softmax along the dimension of target the native softmax only supports batch_size x dimension """ @layer def softmax(self, target, axis, name=None): max_axis = tf.reduce_max(target, axis, keep_dims=True) target_exp = tf.exp(target-max_axis) normalize = tf.reduce_sum(target_exp, axis, keep_dims=True) softmax = tf.div(target_exp, normalize, name) return softmax class PNet(Network): def setup(self): (self.feed('data') #pylint: disable=no-value-for-parameter, no-member .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1') .prelu(name='PReLU1') .max_pool(2, 2, 2, 2, name='pool1') .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2') .prelu(name='PReLU2') .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3') .prelu(name='PReLU3') .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1') .softmax(3,name='prob1')) (self.feed('PReLU3') #pylint: disable=no-value-for-parameter .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2')) class RNet(Network): def setup(self): (self.feed('data') #pylint: disable=no-value-for-parameter, no-member .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1') .prelu(name='prelu1') .max_pool(3, 3, 2, 2, name='pool1') .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2') .prelu(name='prelu2') .max_pool(3, 3, 2, 2, padding='VALID', name='pool2') .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3') .prelu(name='prelu3') .fc(128, relu=False, name='conv4') .prelu(name='prelu4') .fc(2, relu=False, name='conv5-1') .softmax(1,name='prob1')) (self.feed('prelu4') #pylint: disable=no-value-for-parameter .fc(4, relu=False, name='conv5-2')) class ONet(Network): def setup(self): (self.feed('data') #pylint: disable=no-value-for-parameter, no-member .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1') .prelu(name='prelu1') .max_pool(3, 3, 2, 2, name='pool1') .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2') .prelu(name='prelu2') .max_pool(3, 3, 2, 2, padding='VALID', name='pool2') .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3') .prelu(name='prelu3') .max_pool(2, 2, 2, 2, name='pool3') .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4') .prelu(name='prelu4') .fc(256, relu=False, name='conv5') .prelu(name='prelu5') .fc(2, relu=False, name='conv6-1') .softmax(1, name='prob1')) (self.feed('prelu5') #pylint: disable=no-value-for-parameter .fc(4, relu=False, name='conv6-2')) (self.feed('prelu5') #pylint: disable=no-value-for-parameter .fc(10, relu=False, name='conv6-3')) def create_mtcnn(sess, model_path): if not model_path: model_path,_ = os.path.split(os.path.realpath(__file__)) with tf.variable_scope('pnet'): data = tf.placeholder(tf.float32, (None,None,None,3), 'input') pnet = PNet({'data':data}) pnet.load(os.path.join(model_path, 'det1.npy'), sess) with tf.variable_scope('rnet'): data = tf.placeholder(tf.float32, (None,24,24,3), 'input') rnet = RNet({'data':data}) rnet.load(os.path.join(model_path, 'det2.npy'), sess) with tf.variable_scope('onet'): data = tf.placeholder(tf.float32, (None,48,48,3), 'input') onet = ONet({'data':data}) onet.load(os.path.join(model_path, 'det3.npy'), sess) pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img}) rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img}) onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img}) return pnet_fun, rnet_fun, onet_fun def detect_face(img, minsize, pnet, rnet, onet, threshold, factor): # im: input image # minsize: minimum of faces' size # pnet, rnet, onet: caffemodel # threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold # fastresize: resize img from last scale (using in high-resolution images) if fastresize==true factor_count=0 total_boxes=np.empty((0,9)) points=[] h=img.shape[0] w=img.shape[1] minl=np.amin([h, w]) m=12.0/minsize minl=minl*m # creat scale pyramid scales=[] while minl>=12: scales += [m*np.power(factor, factor_count)] minl = minl*factor factor_count += 1 # first stage for j in range(len(scales)): scale=scales[j] hs=int(np.ceil(h*scale)) ws=int(np.ceil(w*scale)) im_data = imresample(img, (hs, ws)) im_data = (im_data-127.5)*0.0078125 img_x = np.expand_dims(im_data, 0) img_y = np.transpose(img_x, (0,2,1,3)) out = pnet(img_y) out0 = np.transpose(out[0], (0,2,1,3)) out1 = np.transpose(out[1], (0,2,1,3)) boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0]) # inter-scale nms pick = nms(boxes.copy(), 0.5, 'Union') if boxes.size>0 and pick.size>0: boxes = boxes[pick,:] total_boxes = np.append(total_boxes, boxes, axis=0) numbox = total_boxes.shape[0] if numbox>0: pick = nms(total_boxes.copy(), 0.7, 'Union') total_boxes = total_boxes[pick,:] regw = total_boxes[:,2]-total_boxes[:,0] regh = total_boxes[:,3]-total_boxes[:,1] qq1 = total_boxes[:,0]+total_boxes[:,5]*regw qq2 = total_boxes[:,1]+total_boxes[:,6]*regh qq3 = total_boxes[:,2]+total_boxes[:,7]*regw qq4 = total_boxes[:,3]+total_boxes[:,8]*regh total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]])) total_boxes = rerec(total_boxes.copy()) total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32) dy,edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) numbox = total_boxes.shape[0] if numbox>0: # second stage tempimg = np.zeros((24,24,3,numbox)) for k in range(0,numbox): tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: tempimg[:,:,:,k] = imresample(tmp, (24, 24)) else: return np.empty() tempimg = (tempimg-127.5)*0.0078125 tempimg1 = np.transpose(tempimg, (3,1,0,2)) out = rnet(tempimg1) out0 = np.transpose(out[0]) out1 = np.transpose(out[1]) score = out1[1,:] ipass = np.where(score>threshold[1]) total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) mv = out0[:,ipass[0]] if total_boxes.shape[0]>0: pick = nms(total_boxes, 0.7, 'Union') total_boxes = total_boxes[pick,:] total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick])) total_boxes = rerec(total_boxes.copy()) numbox = total_boxes.shape[0] if numbox>0: # third stage total_boxes = np.fix(total_boxes).astype(np.int32) dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) tempimg = np.zeros((48,48,3,numbox)) for k in range(0,numbox): tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: tempimg[:,:,:,k] = imresample(tmp, (48, 48)) else: return np.empty() tempimg = (tempimg-127.5)*0.0078125 tempimg1 = np.transpose(tempimg, (3,1,0,2)) out = onet(tempimg1) out0 = np.transpose(out[0]) out1 = np.transpose(out[1]) out2 = np.transpose(out[2]) score = out2[1,:] points = out1 ipass = np.where(score>threshold[2]) points = points[:,ipass[0]] total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) mv = out0[:,ipass[0]] w = total_boxes[:,2]-total_boxes[:,0]+1 h = total_boxes[:,3]-total_boxes[:,1]+1 points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1 points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1 if total_boxes.shape[0]>0: total_boxes = bbreg(total_boxes.copy(), np.transpose(mv)) pick = nms(total_boxes.copy(), 0.7, 'Min') total_boxes = total_boxes[pick,:] points = points[:,pick] return total_boxes, points def detect_face_force(img, bbox, pnet, rnet, onet): total_boxes = np.zeros( (1,5), dtype=np.float32) total_boxes[0,0:4] = bbox threshold = [0.0,0.0,0.0] h=img.shape[0] w=img.shape[1] numbox = total_boxes.shape[0] if numbox>0: dy,edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) # second stage tempimg = np.zeros((24,24,3,numbox)) for k in range(0,numbox): tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: tempimg[:,:,:,k] = imresample(tmp, (24, 24)) else: return np.empty() tempimg = (tempimg-127.5)*0.0078125 tempimg1 = np.transpose(tempimg, (3,1,0,2)) out = rnet(tempimg1) out0 = np.transpose(out[0]) out1 = np.transpose(out[1]) score = out1[1,:] ipass = np.where(score>threshold[1]) total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) mv = out0[:,ipass[0]] if total_boxes.shape[0]>0: pick = nms(total_boxes, 0.7, 'Union') total_boxes = total_boxes[pick,:] total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick])) total_boxes = rerec(total_boxes.copy()) numbox = total_boxes.shape[0] if numbox>0: # third stage total_boxes = np.fix(total_boxes).astype(np.int32) dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) tempimg = np.zeros((48,48,3,numbox)) for k in range(0,numbox): tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: tempimg[:,:,:,k] = imresample(tmp, (48, 48)) else: return np.empty() tempimg = (tempimg-127.5)*0.0078125 tempimg1 = np.transpose(tempimg, (3,1,0,2)) out = onet(tempimg1) out0 = np.transpose(out[0]) out1 = np.transpose(out[1]) out2 = np.transpose(out[2]) score = out2[1,:] points = out1 ipass = np.where(score>threshold[2]) points = points[:,ipass[0]] total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) mv = out0[:,ipass[0]] w = total_boxes[:,2]-total_boxes[:,0]+1 h = total_boxes[:,3]-total_boxes[:,1]+1 points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1 points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1 if total_boxes.shape[0]>0: total_boxes = bbreg(total_boxes.copy(), np.transpose(mv)) pick = nms(total_boxes.copy(), 0.7, 'Min') total_boxes = total_boxes[pick,:] points = points[:,pick] return total_boxes, points def bulk_detect_face(images, detection_window_size_ratio, pnet, rnet, onet, threshold, factor): # im: input image # minsize: minimum of faces' size # pnet, rnet, onet: caffemodel # threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold [0-1] all_scales = [None] * len(images) images_with_boxes = [None] * len(images) for i in range(len(images)): images_with_boxes[i] = {'total_boxes': np.empty((0, 9))} # create scale pyramid for index, img in enumerate(images): all_scales[index] = [] h = img.shape[0] w = img.shape[1] minsize = int(detection_window_size_ratio * np.minimum(w, h)) factor_count = 0 minl = np.amin([h, w]) if minsize <= 12: minsize = 12 m = 12.0 / minsize minl = minl * m while minl >= 12: all_scales[index].append(m * np.power(factor, factor_count)) minl = minl * factor factor_count += 1 # # # # # # # # # # # # # # first stage - fast proposal network (pnet) to obtain face candidates # # # # # # # # # # # # # images_obj_per_resolution = {} # TODO: use some type of rounding to number module 8 to increase probability that pyramid images will have the same resolution across input images for index, scales in enumerate(all_scales): h = images[index].shape[0] w = images[index].shape[1] for scale in scales: hs = int(np.ceil(h * scale)) ws = int(np.ceil(w * scale)) if (ws, hs) not in images_obj_per_resolution: images_obj_per_resolution[(ws, hs)] = [] im_data = imresample(images[index], (hs, ws)) im_data = (im_data - 127.5) * 0.0078125 img_y = np.transpose(im_data, (1, 0, 2)) # caffe uses different dimensions ordering images_obj_per_resolution[(ws, hs)].append({'scale': scale, 'image': img_y, 'index': index}) for resolution in images_obj_per_resolution: images_per_resolution = [i['image'] for i in images_obj_per_resolution[resolution]] outs = pnet(images_per_resolution) for index in range(len(outs[0])): scale = images_obj_per_resolution[resolution][index]['scale'] image_index = images_obj_per_resolution[resolution][index]['index'] out0 = np.transpose(outs[0][index], (1, 0, 2)) out1 = np.transpose(outs[1][index], (1, 0, 2)) boxes, _ = generateBoundingBox(out1[:, :, 1].copy(), out0[:, :, :].copy(), scale, threshold[0]) # inter-scale nms pick = nms(boxes.copy(), 0.5, 'Union') if boxes.size > 0 and pick.size > 0: boxes = boxes[pick, :] images_with_boxes[image_index]['total_boxes'] = np.append(images_with_boxes[image_index]['total_boxes'], boxes, axis=0) for index, image_obj in enumerate(images_with_boxes): numbox = image_obj['total_boxes'].shape[0] if numbox > 0: h = images[index].shape[0] w = images[index].shape[1] pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Union') image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] regw = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] regh = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] qq1 = image_obj['total_boxes'][:, 0] + image_obj['total_boxes'][:, 5] * regw qq2 = image_obj['total_boxes'][:, 1] + image_obj['total_boxes'][:, 6] * regh qq3 = image_obj['total_boxes'][:, 2] + image_obj['total_boxes'][:, 7] * regw qq4 = image_obj['total_boxes'][:, 3] + image_obj['total_boxes'][:, 8] * regh image_obj['total_boxes'] = np.transpose(np.vstack([qq1, qq2, qq3, qq4, image_obj['total_boxes'][:, 4]])) image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy()) image_obj['total_boxes'][:, 0:4] = np.fix(image_obj['total_boxes'][:, 0:4]).astype(np.int32) dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h) numbox = image_obj['total_boxes'].shape[0] tempimg = np.zeros((24, 24, 3, numbox)) if numbox > 0: for k in range(0, numbox): tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3)) tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :] if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0: tempimg[:, :, :, k] = imresample(tmp, (24, 24)) else: return np.empty() tempimg = (tempimg - 127.5) * 0.0078125 image_obj['rnet_input'] = np.transpose(tempimg, (3, 1, 0, 2)) # # # # # # # # # # # # # # second stage - refinement of face candidates with rnet # # # # # # # # # # # # # bulk_rnet_input = np.empty((0, 24, 24, 3)) for index, image_obj in enumerate(images_with_boxes): if 'rnet_input' in image_obj: bulk_rnet_input = np.append(bulk_rnet_input, image_obj['rnet_input'], axis=0) out = rnet(bulk_rnet_input) out0 = np.transpose(out[0]) out1 = np.transpose(out[1]) score = out1[1, :] i = 0 for index, image_obj in enumerate(images_with_boxes): if 'rnet_input' not in image_obj: continue rnet_input_count = image_obj['rnet_input'].shape[0] score_per_image = score[i:i + rnet_input_count] out0_per_image = out0[:, i:i + rnet_input_count] ipass = np.where(score_per_image > threshold[1]) image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(), np.expand_dims(score_per_image[ipass].copy(), 1)]) mv = out0_per_image[:, ipass[0]] if image_obj['total_boxes'].shape[0] > 0: h = images[index].shape[0] w = images[index].shape[1] pick = nms(image_obj['total_boxes'], 0.7, 'Union') image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv[:, pick])) image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy()) numbox = image_obj['total_boxes'].shape[0] if numbox > 0: tempimg = np.zeros((48, 48, 3, numbox)) image_obj['total_boxes'] = np.fix(image_obj['total_boxes']).astype(np.int32) dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h) for k in range(0, numbox): tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3)) tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :] if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0: tempimg[:, :, :, k] = imresample(tmp, (48, 48)) else: return np.empty() tempimg = (tempimg - 127.5) * 0.0078125 image_obj['onet_input'] = np.transpose(tempimg, (3, 1, 0, 2)) i += rnet_input_count # # # # # # # # # # # # # # third stage - further refinement and facial landmarks positions with onet # # # # # # # # # # # # # bulk_onet_input = np.empty((0, 48, 48, 3)) for index, image_obj in enumerate(images_with_boxes): if 'onet_input' in image_obj: bulk_onet_input = np.append(bulk_onet_input, image_obj['onet_input'], axis=0) out = onet(bulk_onet_input) out0 = np.transpose(out[0]) out1 = np.transpose(out[1]) out2 = np.transpose(out[2]) score = out2[1, :] points = out1 i = 0 ret = [] for index, image_obj in enumerate(images_with_boxes): if 'onet_input' not in image_obj: ret.append(None) continue onet_input_count = image_obj['onet_input'].shape[0] out0_per_image = out0[:, i:i + onet_input_count] score_per_image = score[i:i + onet_input_count] points_per_image = points[:, i:i + onet_input_count] ipass = np.where(score_per_image > threshold[2]) points_per_image = points_per_image[:, ipass[0]] image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(), np.expand_dims(score_per_image[ipass].copy(), 1)]) mv = out0_per_image[:, ipass[0]] w = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + 1 h = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + 1 points_per_image[0:5, :] = np.tile(w, (5, 1)) * points_per_image[0:5, :] + np.tile( image_obj['total_boxes'][:, 0], (5, 1)) - 1 points_per_image[5:10, :] = np.tile(h, (5, 1)) * points_per_image[5:10, :] + np.tile( image_obj['total_boxes'][:, 1], (5, 1)) - 1 if image_obj['total_boxes'].shape[0] > 0: image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv)) pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Min') image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] points_per_image = points_per_image[:, pick] ret.append((image_obj['total_boxes'], points_per_image)) else: ret.append(None) i += onet_input_count return ret # function [boundingbox] = bbreg(boundingbox,reg) def bbreg(boundingbox,reg): # calibrate bounding boxes if reg.shape[1]==1: reg = np.reshape(reg, (reg.shape[2], reg.shape[3])) w = boundingbox[:,2]-boundingbox[:,0]+1 h = boundingbox[:,3]-boundingbox[:,1]+1 b1 = boundingbox[:,0]+reg[:,0]*w b2 = boundingbox[:,1]+reg[:,1]*h b3 = boundingbox[:,2]+reg[:,2]*w b4 = boundingbox[:,3]+reg[:,3]*h boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ])) return boundingbox def generateBoundingBox(imap, reg, scale, t): # use heatmap to generate bounding boxes stride=2 cellsize=12 imap = np.transpose(imap) dx1 = np.transpose(reg[:,:,0]) dy1 = np.transpose(reg[:,:,1]) dx2 = np.transpose(reg[:,:,2]) dy2 = np.transpose(reg[:,:,3]) y, x = np.where(imap >= t) if y.shape[0]==1: dx1 = np.flipud(dx1) dy1 = np.flipud(dy1) dx2 = np.flipud(dx2) dy2 = np.flipud(dy2) score = imap[(y,x)] reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ])) if reg.size==0: reg = np.empty((0,3)) bb = np.transpose(np.vstack([y,x])) q1 = np.fix((stride*bb+1)/scale) q2 = np.fix((stride*bb+cellsize-1+1)/scale) boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg]) return boundingbox, reg # function pick = nms(boxes,threshold,type) def nms(boxes, threshold, method): if boxes.size==0: return np.empty((0,3)) x1 = boxes[:,0] y1 = boxes[:,1] x2 = boxes[:,2] y2 = boxes[:,3] s = boxes[:,4] area = (x2-x1+1) * (y2-y1+1) I = np.argsort(s) pick = np.zeros_like(s, dtype=np.int16) counter = 0 while I.size>0: i = I[-1] pick[counter] = i counter += 1 idx = I[0:-1] xx1 = np.maximum(x1[i], x1[idx]) yy1 = np.maximum(y1[i], y1[idx]) xx2 = np.minimum(x2[i], x2[idx]) yy2 = np.minimum(y2[i], y2[idx]) w = np.maximum(0.0, xx2-xx1+1) h = np.maximum(0.0, yy2-yy1+1) inter = w * h if method == 'Min': o = inter / np.minimum(area[i], area[idx]) else: o = inter / (area[i] + area[idx] - inter) I = I[np.where(o<=threshold)] pick = pick[0:counter] return pick # function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h) def pad(total_boxes, w, h): # compute the padding coordinates (pad the bounding boxes to square) tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32) tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32) numbox = total_boxes.shape[0] dx = np.ones((numbox), dtype=np.int32) dy = np.ones((numbox), dtype=np.int32) edx = tmpw.copy().astype(np.int32) edy = tmph.copy().astype(np.int32) x = total_boxes[:,0].copy().astype(np.int32) y = total_boxes[:,1].copy().astype(np.int32) ex = total_boxes[:,2].copy().astype(np.int32) ey = total_boxes[:,3].copy().astype(np.int32) tmp = np.where(ex>w) edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1) ex[tmp] = w tmp = np.where(ey>h) edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1) ey[tmp] = h tmp = np.where(x<1) dx.flat[tmp] = np.expand_dims(2-x[tmp],1) x[tmp] = 1 tmp = np.where(y<1) dy.flat[tmp] = np.expand_dims(2-y[tmp],1) y[tmp] = 1 return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph # function [bboxA] = rerec(bboxA) def rerec(bboxA): # convert bboxA to square h = bboxA[:,3]-bboxA[:,1] w = bboxA[:,2]-bboxA[:,0] l = np.maximum(w, h) bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5 bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5 bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1))) return bboxA def imresample(img, sz): im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) #@UndefinedVariable return im_data # This method is kept for debugging purpose # h=img.shape[0] # w=img.shape[1] # hs, ws = sz # dx = float(w) / ws # dy = float(h) / hs # im_data = np.zeros((hs,ws,3)) # for a1 in range(0,hs): # for a2 in range(0,ws): # for a3 in range(0,3): # im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3] # return im_data ================================================ FILE: Demo/alignment.py ================================================ import argparse import numpy as np import cv2 from skimage import transform as trans import tensorflow as tf import os import skimage.io as io import sys from tqdm import tqdm import align.detect_face as detect_face # Transform grey image to RGB image def to_rgb(img): w, h = img.shape ret = np.empty((w, h, 3), dtype=np.uint8) ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img return ret # Align face as ArcFace template def preprocess(img, landmark): image_size = [112,112] src = np.array([ [38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366], [41.5493, 92.3655], [70.7299, 92.2041] ], dtype=np.float32) dst = landmark.astype(np.float32) tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2,:] warped = cv2.warpAffine(img,M,(image_size[1],image_size[0]), borderValue = 0.0) return warped def main(args): # MTCNN with tf.Graph().as_default(): sess = tf.Session() with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) threshold = [ 0.6, 0.7, 0.7 ] factor = 0.709 # Output dirs creation if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) images = [] for path in sorted(os.listdir(args.input_dir)): if not os.path.exists(os.path.join(args.output_dir,path)): os.mkdir(os.path.join(args.output_dir,path)) for name in sorted(os.listdir(os.path.join(args.input_dir,path))): images.append(os.path.join(path,name)) # Alignment procedure for path in tqdm(images): img = io.imread(os.path.join(args.input_dir,path)) if img.ndim == 2: img = to_rgb(img) img = img[:,:,0:3] _minsize = min(min(img.shape[0]//5, img.shape[1]//5),80) bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, threshold, factor) if bounding_boxes.size>0: bindex = -1 nrof_faces = bounding_boxes.shape[0] if nrof_faces>0: det = bounding_boxes[:,0:4] img_size = np.asarray(img.shape)[0:2] bindex = 0 if nrof_faces>1: bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) img_center = img_size / 2 offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets,2.0),0) bindex = np.argmax(bounding_box_size-offset_dist_squared*2.0) points = points[:, bindex] landmark = points.reshape((2,5)).T warped = preprocess(img, landmark) io.imsave(os.path.join(args.output_dir,path), warped) else: print(path+' was skipped') def parse_arguments(argv): parser = argparse.ArgumentParser() parser.add_argument('input_dir', type=str, help='Directory with unaligned images.') parser.add_argument('output_dir', type=str, help='Directory for aligned face thumbnails.') return parser.parse_args(argv) if __name__ == '__main__': main(parse_arguments(sys.argv[1:])) ================================================ FILE: Demo/demo.py ================================================ import sys import argparse import numpy as np import cv2 import tensorflow as tf from align import detect_face from skimage import transform as trans from skimage.io import imsave import os import datetime # Align face as ArcFace template def preprocess(img, landmark): image_size = [112,112] src = np.array([ [38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366], [41.5493, 92.3655], [70.7299, 92.2041] ], dtype=np.float32) dst = landmark.astype(np.float32) tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2,:] warped = cv2.warpAffine(img,M,(image_size[1],image_size[0]), borderValue = 0.0) return warped def main(args): # Models download frozen_graph = args.model with tf.gfile.GFile(frozen_graph, "rb") as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) with tf.Graph().as_default() as graph: tf.import_graph_def(graph_def, input_map=None, return_elements=None, name="") image_input = graph.get_tensor_by_name('image_input:0') keep_prob = graph.get_tensor_by_name('keep_prob:0') is_train = graph.get_tensor_by_name('training_mode:0') embedding = graph.get_tensor_by_name('embedding:0') minsize = 100 threshold = [ 0.6, 0.7, 0.7 ] factor = 0.709 sess = tf.Session(graph=graph) pnet, rnet, onet = detect_face.create_mtcnn(sess, None) # Centroids download anchor = np.load(os.path.join(args.centroids,'centroids.npy')) names = open(os.path.join(args.centroids,'centroids_names.txt')).read().split('\n')[:-1] IDcolor = [255., 255., 255.] IDcolor2 = [255., 0., 0.] video_capture = cv2.VideoCapture(0) video_capture.set(3, 1280) video_capture.set(4, 1024) while(True): # Start of video sequence processing ret, frame = video_capture.read() frame = cv2.flip(frame[:,:,::-1], 1) if not ret: print('Cannot access the webcam') break key = cv2.waitKey(1) if key == ord('q'): break if key == ord('s'): imsave('Demo-'+str(datetime.datetime.now())+'.jpg',frame) # Search and preparation of all faces on the frame bounding_boxes, points = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor) batch = np.zeros((bounding_boxes.shape[0],3,112,112),dtype=np.float32) for i in range(bounding_boxes.shape[0]): landmark = points[:,i].reshape((2,5)).T warped = preprocess(frame, landmark = landmark) warped = np.transpose(warped,[2,0,1]).reshape((1,3,112,112)) batch[i] = (warped-127.5)*0.0078125 # Recognition of all faces if batch.shape[0]!=0: embs = sess.run(embedding,feed_dict={image_input:batch,keep_prob:1.0,is_train:False}) for i in range(bounding_boxes.shape[0]): probabilities = np.dot(anchor,embs[i]) val = np.max(probabilities) pos = np.argmax(probabilities) pt1 = (int(bounding_boxes[i][0]), int(bounding_boxes[i][1])) pt2 = (int(bounding_boxes[i][2]), int(bounding_boxes[i][3])) cv2.rectangle(frame, pt1, pt2, IDcolor) cv2.putText(frame, 'Top-1 class: '+names[pos], (int(bounding_boxes[i][0]), int(bounding_boxes[i][1])-5), cv2.FONT_HERSHEY_SIMPLEX, 1., IDcolor, 3) cv2.putText(frame, 'Sim. to top-1 class: '+str(round(val,4)), (int(bounding_boxes[i][0]), int(bounding_boxes[i][3])+30), cv2.FONT_HERSHEY_SIMPLEX, 1., IDcolor, 3) cv2.imshow('Camera ("q" to quit, "s" to save frame)', frame[:,:,::-1]) video_capture.release() cv2.destroyAllWindows() def parse_arguments(argv): parser = argparse.ArgumentParser() parser.add_argument('model',type=str, help='Path to the model.') parser.add_argument('centroids',type=str, help='Dir with centoids of classes for classifier.') return parser.parse_args(argv) if __name__ == '__main__': main(parse_arguments(sys.argv[1:])) ================================================ FILE: Demo/dumping.py ================================================ import skimage.io as io import os import numpy as np from tqdm import tqdm import sys import argparse def main(args): # Output dirs creation if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) images = [] labels = [] label = 0 for path in sorted(os.listdir(args.input_dir)): for name in sorted(os.listdir(os.path.join(args.input_dir,path))): if args.mx: images.append([[label],os.path.join(args.input_dir,path,name)]) else: images.append(os.path.join(args.input_dir,path,name)) labels.append(label) label += 1 if args.mx: # MXnet model import mxnet as mx sym, arg_params, aux_params = mx.model.load_checkpoint(args.model, 0) sym = sym.get_internals()['fc1_output'] model = mx.mod.Module(symbol=sym, context=mx.gpu(0), label_names = None) model.bind(data_shapes=[('data', (1, 3, 112, 112))]) model.set_params(arg_params, aux_params) iterator = mx.image.ImageIter(batch_size=args.batch,data_shape=(3,112,112),imglist=images,path_root='') else: # TensorFlow model import tensorflow as tf frozen_graph = args.model with tf.gfile.GFile(frozen_graph, "rb") as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) with tf.Graph().as_default() as graph: tf.import_graph_def(graph_def, input_map=None, return_elements=None, name="") image_input = graph.get_tensor_by_name('image_input:0') keep_prob = graph.get_tensor_by_name('keep_prob:0') is_train = graph.get_tensor_by_name('training_mode:0') embedding = graph.get_tensor_by_name('embedding:0') sess = tf.Session(graph=graph) inp_place = tf.placeholder(np.array(['1','2'],dtype='str').dtype) pipeline = tf.data.Dataset.from_tensor_slices(inp_place) def parse(filename): image_string = tf.read_file(filename) image = tf.image.decode_jpeg(image_string,dct_method="INTEGER_ACCURATE") image = tf.cast(image,tf.float32) image = (image - 127.5)*0.0078125 image = tf.transpose(image,perm=[2,0,1]) return image pipeline = pipeline.map(parse,num_parallel_calls=4) pipeline = pipeline.batch(args.batch) pipiline = pipeline.prefetch(8) iterator = pipeline.make_initializable_iterator() next_element = iterator.get_next() sess.run(iterator.initializer,feed_dict={inp_place:images}) # Embeddings evaluation embs = np.zeros((len(images),512),dtype=np.float32) for i in tqdm(range(int(np.ceil(len(images)/args.batch)))): if args.mx: db = mx.io.DataBatch(data=iterator.next().data) model.forward(db, is_train=False) emb = model.get_outputs()[0].asnumpy() length = min(args.batch,len(images)-i*args.batch) embs[i*args.batch:i*args.batch+length] = emb[:length]/np.expand_dims(np.sqrt(np.sum(emb[:length]**2,1)),1) else: db = sess.run(next_element) embs[i*args.batch:min((i+1)*args.batch,len(images))] = sess.run(embedding,feed_dict=\ {image_input:db,keep_prob:1.0,is_train:False}) # Centroids preparation anchor = np.zeros((label,512),dtype=np.float32) labels = np.array(labels) for i in range(label): tmp = np.sum(embs[labels==i],axis=0) anchor[i] = tmp/np.sqrt(np.sum(tmp**2)) np.save(os.path.join(args.output_dir,'centroids'),anchor) names = open(os.path.join(args.output_dir,'centroids_names.txt'),'w') for i in sorted(os.listdir(args.input_dir)): names.write(i+'\n') names.close() def parse_arguments(argv): parser = argparse.ArgumentParser() parser.add_argument('input_dir', type=str, help='Directory with aligned images.') parser.add_argument('output_dir', type=str, help='Directory to save embeddings.') parser.add_argument('model',type=str, help='Path to the model.') parser.add_argument('--mx',action='store_true', help='Flag to use the original mxnet model.') parser.add_argument('--batch',type=int, help='Batch size.',default=30) return parser.parse_args(argv) if __name__ == '__main__': main(parse_arguments(sys.argv[1:])) ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2019 Stepan Komkov, Aleksandr Petiushko Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # AdvHat: Real-world adversarial attack on ArcFace Face ID system By Stepan Komkov and Aleksandr Petiushko This is the code repository for the AdvHat research article. The article is available [here](https://arxiv.org/abs/1908.08705). The video demo is available [here](https://youtu.be/a4iNg0wWBsQ). Code that is used for the article is available right here. ## Abstract We propose a novel easily reproducible technique to attack the best public Face ID system ArcFace in different shooting conditions. To create an attack, we print the rectangular paper sticker on a common color printer and put it on the hat. The adversarial sticker is prepared with a novel algorithm for off-plane transformations of the image which imitates sticker location on the hat. Such an approach confuses the state-of-the-art public Face ID model LResNet100E-IR, ArcFace@ms1m-refine-v2 and is transferable to other Face ID models. ## The repository The repository is organized as follows: * In the Attack directory, you can find code and instructions on how to reproduce an attack for your images. * In the Demo directory, you can find a demo script which can help you to verify the robustness of the prepared attack to the real-world shooting conditions. ## Built With * [InsightFace's ArcFace](https://github.com/deepinsight/insightface) - The SOTA public FaceID model * [Kevin Zakka's STN](https://github.com/kevinzakka/spatial-transformer-network) - Spatial Transformer realization ## Citation ``` @article{komkov2019advhat, title={AdvHat: Real-world adversarial attack on ArcFace Face ID system}, author={Komkov, Stepan and Petiushko, Aleksandr}, journal={arXiv preprint arXiv:1908.08705}, year={2019} } ``` ## License This project is licensed under the MIT License - see the [LICENSE.md](https://github.com/papermsucode/advhat/blob/master/LICENSE) file for details. ================================================ FILE: Utils/MXtoTF.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import mxnet as mx\n", "from mxnet import ndarray as nd\n", "\n", "from easydict import EasyDict as edict\n", "import numpy as np\n", "import os\n", "from tqdm import tqdm\n", "import skimage.io as io\n", "\n", "import tensorflow as tf\n", "import tensorflow.contrib.slim as slim\n", "gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.3)\n", "sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### My versions of libraries" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'1.4.1'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mx.__version__" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'1.13.1'" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tf.__version__" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Transformation of the MXNet model weights to NumPy" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "ctx = mx.gpu(0)\n", "net = edict()\n", "net.ctx = ctx\n", "net.sym, net.arg_params, net.aux_params = mx.model.load_checkpoint('../../Matcher/models/model-r34-amf/model', 0)\n", "all_layers = net.sym.get_internals()\n", "net.sym = all_layers['fc1_output']\n", "net.model = mx.mod.Module(symbol=net.sym, context=net.ctx, label_names = None)\n", "net.model.bind(data_shapes=[('data', (2, 3, 112, 112))])\n", "net.model.set_params(net.arg_params, net.aux_params)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "stage1_length = len(set([x.name.split('_')[1] for x in net.sym.get_internals() if 'stage1' in x.name]))\n", "stage2_length = len(set([x.name.split('_')[1] for x in net.sym.get_internals() if 'stage2' in x.name]))\n", "stage3_length = len(set([x.name.split('_')[1] for x in net.sym.get_internals() if 'stage3' in x.name]))\n", "stage4_length = len(set([x.name.split('_')[1] for x in net.sym.get_internals() if 'stage4' in x.name]))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "plot\n", "\n", "\n", "\n", "data\n", "\n", "data\n", "\n", "\n", "\n", "id\n", "\n", "id\n", "\n", "\n", "\n", "id->data\n", "\n", "\n", "\n", "\n", "\n", "_minusscalar0\n", "\n", "_minusscalar0\n", "\n", "\n", "\n", "_minusscalar0->id\n", "\n", "\n", "\n", "\n", "\n", "_mulscalar0\n", "\n", "_mulscalar0\n", "\n", "\n", "\n", "_mulscalar0->_minusscalar0\n", "\n", "\n", "\n", "\n", "\n", "conv0\n", "\n", "Convolution\n", "3x3/1x1, 64\n", "\n", "\n", "\n", "conv0->_mulscalar0\n", "\n", "\n", "\n", "\n", "\n", "bn0\n", "\n", "bn0\n", "\n", "\n", "\n", "bn0->conv0\n", "\n", "\n", "\n", "\n", "\n", "relu0\n", "\n", "LeakyReLU\n", "prelu\n", "\n", "\n", "\n", "relu0->bn0\n", "\n", "\n", "\n", "\n", "\n", "stage1_unit1_bn1\n", "\n", "stage1_unit1_bn1\n", "\n", "\n", "\n", "stage1_unit1_bn1->relu0\n", "\n", "\n", "\n", "\n", "\n", "stage1_unit1_conv1\n", "\n", "Convolution\n", "3x3/1x1, 64\n", "\n", "\n", "\n", "stage1_unit1_conv1->stage1_unit1_bn1\n", "\n", "\n", "\n", "\n", "\n", "stage1_unit1_bn2\n", "\n", "stage1_unit1_bn2\n", "\n", "\n", "\n", "stage1_unit1_bn2->stage1_unit1_conv1\n", "\n", "\n", "\n", "\n", "\n", "stage1_unit1_relu1\n", "\n", "LeakyReLU\n", "prelu\n", "\n", "\n", "\n", "stage1_unit1_relu1->stage1_unit1_bn2\n", "\n", "\n", "\n", "\n", "\n", "stage1_unit1_conv2\n", "\n", "Convolution\n", "3x3/2x2, 64\n", "\n", "\n", "\n", "stage1_unit1_conv2->stage1_unit1_relu1\n", "\n", "\n", "\n", "\n", "\n", "stage1_unit1_bn3\n", "\n", "stage1_unit1_bn3\n", "\n", "\n", "\n", "stage1_unit1_bn3->stage1_unit1_conv2\n", "\n", "\n", "\n", "\n", "\n", "stage1_unit1_conv1sc\n", "\n", "Convolution\n", "1x1/2x2, 64\n", "\n", "\n", "\n", "stage1_unit1_conv1sc->relu0\n", "\n", "\n", "\n", "\n", "\n", "stage1_unit1_sc\n", "\n", "stage1_unit1_sc\n", "\n", "\n", "\n", "stage1_unit1_sc->stage1_unit1_conv1sc\n", "\n", "\n", "\n", "\n", "\n", "_plus0\n", "\n", "_plus0\n", "\n", "\n", "\n", "_plus0->stage1_unit1_bn3\n", "\n", "\n", "\n", "\n", "\n", "_plus0->stage1_unit1_sc\n", "\n", "\n", "\n", "\n", "\n", "stage1_unit2_bn1\n", "\n", "stage1_unit2_bn1\n", "\n", "\n", "\n", "stage1_unit2_bn1->_plus0\n", "\n", "\n", "\n", "\n", "\n", "stage1_unit2_conv1\n", "\n", "Convolution\n", "3x3/1x1, 64\n", "\n", "\n", "\n", "stage1_unit2_conv1->stage1_unit2_bn1\n", "\n", "\n", "\n", "\n", "\n", "stage1_unit2_bn2\n", "\n", "stage1_unit2_bn2\n", "\n", "\n", "\n", "stage1_unit2_bn2->stage1_unit2_conv1\n", "\n", "\n", "\n", "\n", "\n", "stage1_unit2_relu1\n", "\n", "LeakyReLU\n", "prelu\n", "\n", "\n", "\n", "stage1_unit2_relu1->stage1_unit2_bn2\n", "\n", "\n", "\n", "\n", "\n", "stage1_unit2_conv2\n", "\n", "Convolution\n", "3x3/1x1, 64\n", "\n", "\n", "\n", "stage1_unit2_conv2->stage1_unit2_relu1\n", "\n", "\n", "\n", "\n", "\n", "stage1_unit2_bn3\n", "\n", "stage1_unit2_bn3\n", "\n", "\n", "\n", "stage1_unit2_bn3->stage1_unit2_conv2\n", "\n", "\n", "\n", "\n", "\n", "_plus1\n", "\n", "_plus1\n", "\n", "\n", "\n", "_plus1->_plus0\n", "\n", "\n", "\n", "\n", "\n", "_plus1->stage1_unit2_bn3\n", "\n", "\n", "\n", "\n", "\n", "stage1_unit3_bn1\n", "\n", "stage1_unit3_bn1\n", "\n", "\n", "\n", "stage1_unit3_bn1->_plus1\n", "\n", "\n", "\n", "\n", "\n", "stage1_unit3_conv1\n", "\n", "Convolution\n", "3x3/1x1, 64\n", "\n", "\n", "\n", "stage1_unit3_conv1->stage1_unit3_bn1\n", "\n", "\n", "\n", "\n", "\n", "stage1_unit3_bn2\n", "\n", "stage1_unit3_bn2\n", "\n", "\n", "\n", "stage1_unit3_bn2->stage1_unit3_conv1\n", "\n", "\n", "\n", "\n", "\n", "stage1_unit3_relu1\n", "\n", "LeakyReLU\n", "prelu\n", "\n", "\n", "\n", "stage1_unit3_relu1->stage1_unit3_bn2\n", "\n", "\n", "\n", "\n", "\n", "stage1_unit3_conv2\n", "\n", "Convolution\n", "3x3/1x1, 64\n", "\n", "\n", "\n", "stage1_unit3_conv2->stage1_unit3_relu1\n", "\n", "\n", "\n", "\n", "\n", "stage1_unit3_bn3\n", "\n", "stage1_unit3_bn3\n", "\n", "\n", "\n", "stage1_unit3_bn3->stage1_unit3_conv2\n", "\n", "\n", "\n", "\n", "\n", "_plus2\n", "\n", "_plus2\n", "\n", "\n", "\n", "_plus2->_plus1\n", "\n", "\n", "\n", "\n", "\n", "_plus2->stage1_unit3_bn3\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit1_bn1\n", "\n", "stage2_unit1_bn1\n", "\n", "\n", "\n", "stage2_unit1_bn1->_plus2\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit1_conv1\n", "\n", "Convolution\n", "3x3/1x1, 128\n", "\n", "\n", "\n", "stage2_unit1_conv1->stage2_unit1_bn1\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit1_bn2\n", "\n", "stage2_unit1_bn2\n", "\n", "\n", "\n", "stage2_unit1_bn2->stage2_unit1_conv1\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit1_relu1\n", "\n", "LeakyReLU\n", "prelu\n", "\n", "\n", "\n", "stage2_unit1_relu1->stage2_unit1_bn2\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit1_conv2\n", "\n", "Convolution\n", "3x3/2x2, 128\n", "\n", "\n", "\n", "stage2_unit1_conv2->stage2_unit1_relu1\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit1_bn3\n", "\n", "stage2_unit1_bn3\n", "\n", "\n", "\n", "stage2_unit1_bn3->stage2_unit1_conv2\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit1_conv1sc\n", "\n", "Convolution\n", "1x1/2x2, 128\n", "\n", "\n", "\n", "stage2_unit1_conv1sc->_plus2\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit1_sc\n", "\n", "stage2_unit1_sc\n", "\n", "\n", "\n", "stage2_unit1_sc->stage2_unit1_conv1sc\n", "\n", "\n", "\n", "\n", "\n", "_plus3\n", "\n", "_plus3\n", "\n", "\n", "\n", "_plus3->stage2_unit1_bn3\n", "\n", "\n", "\n", "\n", "\n", "_plus3->stage2_unit1_sc\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit2_bn1\n", "\n", "stage2_unit2_bn1\n", "\n", "\n", "\n", "stage2_unit2_bn1->_plus3\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit2_conv1\n", "\n", "Convolution\n", "3x3/1x1, 128\n", "\n", "\n", "\n", "stage2_unit2_conv1->stage2_unit2_bn1\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit2_bn2\n", "\n", "stage2_unit2_bn2\n", "\n", "\n", "\n", "stage2_unit2_bn2->stage2_unit2_conv1\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit2_relu1\n", "\n", "LeakyReLU\n", "prelu\n", "\n", "\n", "\n", "stage2_unit2_relu1->stage2_unit2_bn2\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit2_conv2\n", "\n", "Convolution\n", "3x3/1x1, 128\n", "\n", "\n", "\n", "stage2_unit2_conv2->stage2_unit2_relu1\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit2_bn3\n", "\n", "stage2_unit2_bn3\n", "\n", "\n", "\n", "stage2_unit2_bn3->stage2_unit2_conv2\n", "\n", "\n", "\n", "\n", "\n", "_plus4\n", "\n", "_plus4\n", "\n", "\n", "\n", "_plus4->_plus3\n", "\n", "\n", "\n", "\n", "\n", "_plus4->stage2_unit2_bn3\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit3_bn1\n", "\n", "stage2_unit3_bn1\n", "\n", "\n", "\n", "stage2_unit3_bn1->_plus4\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit3_conv1\n", "\n", "Convolution\n", "3x3/1x1, 128\n", "\n", "\n", "\n", "stage2_unit3_conv1->stage2_unit3_bn1\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit3_bn2\n", "\n", "stage2_unit3_bn2\n", "\n", "\n", "\n", "stage2_unit3_bn2->stage2_unit3_conv1\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit3_relu1\n", "\n", "LeakyReLU\n", "prelu\n", "\n", "\n", "\n", "stage2_unit3_relu1->stage2_unit3_bn2\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit3_conv2\n", "\n", "Convolution\n", "3x3/1x1, 128\n", "\n", "\n", "\n", "stage2_unit3_conv2->stage2_unit3_relu1\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit3_bn3\n", "\n", "stage2_unit3_bn3\n", "\n", "\n", "\n", "stage2_unit3_bn3->stage2_unit3_conv2\n", "\n", "\n", "\n", "\n", "\n", "_plus5\n", "\n", "_plus5\n", "\n", "\n", "\n", "_plus5->_plus4\n", "\n", "\n", "\n", "\n", "\n", "_plus5->stage2_unit3_bn3\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit4_bn1\n", "\n", "stage2_unit4_bn1\n", "\n", "\n", "\n", "stage2_unit4_bn1->_plus5\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit4_conv1\n", "\n", "Convolution\n", "3x3/1x1, 128\n", "\n", "\n", "\n", "stage2_unit4_conv1->stage2_unit4_bn1\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit4_bn2\n", "\n", "stage2_unit4_bn2\n", "\n", "\n", "\n", "stage2_unit4_bn2->stage2_unit4_conv1\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit4_relu1\n", "\n", "LeakyReLU\n", "prelu\n", "\n", "\n", "\n", "stage2_unit4_relu1->stage2_unit4_bn2\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit4_conv2\n", "\n", "Convolution\n", "3x3/1x1, 128\n", "\n", "\n", "\n", "stage2_unit4_conv2->stage2_unit4_relu1\n", "\n", "\n", "\n", "\n", "\n", "stage2_unit4_bn3\n", "\n", "stage2_unit4_bn3\n", "\n", "\n", "\n", "stage2_unit4_bn3->stage2_unit4_conv2\n", "\n", "\n", "\n", "\n", "\n", "_plus6\n", "\n", "_plus6\n", "\n", "\n", "\n", "_plus6->_plus5\n", "\n", "\n", "\n", "\n", "\n", "_plus6->stage2_unit4_bn3\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit1_bn1\n", "\n", "stage3_unit1_bn1\n", "\n", "\n", "\n", "stage3_unit1_bn1->_plus6\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit1_conv1\n", "\n", "Convolution\n", "3x3/1x1, 256\n", "\n", "\n", "\n", "stage3_unit1_conv1->stage3_unit1_bn1\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit1_bn2\n", "\n", "stage3_unit1_bn2\n", "\n", "\n", "\n", "stage3_unit1_bn2->stage3_unit1_conv1\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit1_relu1\n", "\n", "LeakyReLU\n", "prelu\n", "\n", "\n", "\n", "stage3_unit1_relu1->stage3_unit1_bn2\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit1_conv2\n", "\n", "Convolution\n", "3x3/2x2, 256\n", "\n", "\n", "\n", "stage3_unit1_conv2->stage3_unit1_relu1\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit1_bn3\n", "\n", "stage3_unit1_bn3\n", "\n", "\n", "\n", "stage3_unit1_bn3->stage3_unit1_conv2\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit1_conv1sc\n", "\n", "Convolution\n", "1x1/2x2, 256\n", "\n", "\n", "\n", "stage3_unit1_conv1sc->_plus6\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit1_sc\n", "\n", "stage3_unit1_sc\n", "\n", "\n", "\n", "stage3_unit1_sc->stage3_unit1_conv1sc\n", "\n", "\n", "\n", "\n", "\n", "_plus7\n", "\n", "_plus7\n", "\n", "\n", "\n", "_plus7->stage3_unit1_bn3\n", "\n", "\n", "\n", "\n", "\n", "_plus7->stage3_unit1_sc\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit2_bn1\n", "\n", "stage3_unit2_bn1\n", "\n", "\n", "\n", "stage3_unit2_bn1->_plus7\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit2_conv1\n", "\n", "Convolution\n", "3x3/1x1, 256\n", "\n", "\n", "\n", "stage3_unit2_conv1->stage3_unit2_bn1\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit2_bn2\n", "\n", "stage3_unit2_bn2\n", "\n", "\n", "\n", "stage3_unit2_bn2->stage3_unit2_conv1\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit2_relu1\n", "\n", "LeakyReLU\n", "prelu\n", "\n", "\n", "\n", "stage3_unit2_relu1->stage3_unit2_bn2\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit2_conv2\n", "\n", "Convolution\n", "3x3/1x1, 256\n", "\n", "\n", "\n", "stage3_unit2_conv2->stage3_unit2_relu1\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit2_bn3\n", "\n", "stage3_unit2_bn3\n", "\n", "\n", "\n", "stage3_unit2_bn3->stage3_unit2_conv2\n", "\n", "\n", "\n", "\n", "\n", "_plus8\n", "\n", "_plus8\n", "\n", "\n", "\n", "_plus8->_plus7\n", "\n", "\n", "\n", "\n", "\n", "_plus8->stage3_unit2_bn3\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit3_bn1\n", "\n", "stage3_unit3_bn1\n", "\n", "\n", "\n", "stage3_unit3_bn1->_plus8\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit3_conv1\n", "\n", "Convolution\n", "3x3/1x1, 256\n", "\n", "\n", "\n", "stage3_unit3_conv1->stage3_unit3_bn1\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit3_bn2\n", "\n", "stage3_unit3_bn2\n", "\n", "\n", "\n", "stage3_unit3_bn2->stage3_unit3_conv1\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit3_relu1\n", "\n", "LeakyReLU\n", "prelu\n", "\n", "\n", "\n", "stage3_unit3_relu1->stage3_unit3_bn2\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit3_conv2\n", "\n", "Convolution\n", "3x3/1x1, 256\n", "\n", "\n", "\n", "stage3_unit3_conv2->stage3_unit3_relu1\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit3_bn3\n", "\n", "stage3_unit3_bn3\n", "\n", "\n", "\n", "stage3_unit3_bn3->stage3_unit3_conv2\n", "\n", "\n", "\n", "\n", "\n", "_plus9\n", "\n", "_plus9\n", "\n", "\n", "\n", "_plus9->_plus8\n", "\n", "\n", "\n", "\n", "\n", "_plus9->stage3_unit3_bn3\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit4_bn1\n", "\n", "stage3_unit4_bn1\n", "\n", "\n", "\n", "stage3_unit4_bn1->_plus9\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit4_conv1\n", "\n", "Convolution\n", "3x3/1x1, 256\n", "\n", "\n", "\n", "stage3_unit4_conv1->stage3_unit4_bn1\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit4_bn2\n", "\n", "stage3_unit4_bn2\n", "\n", "\n", "\n", "stage3_unit4_bn2->stage3_unit4_conv1\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit4_relu1\n", "\n", "LeakyReLU\n", "prelu\n", "\n", "\n", "\n", "stage3_unit4_relu1->stage3_unit4_bn2\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit4_conv2\n", "\n", "Convolution\n", "3x3/1x1, 256\n", "\n", "\n", "\n", "stage3_unit4_conv2->stage3_unit4_relu1\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit4_bn3\n", "\n", "stage3_unit4_bn3\n", "\n", "\n", "\n", "stage3_unit4_bn3->stage3_unit4_conv2\n", "\n", "\n", "\n", "\n", "\n", "_plus10\n", "\n", "_plus10\n", "\n", "\n", "\n", "_plus10->_plus9\n", "\n", "\n", "\n", "\n", "\n", "_plus10->stage3_unit4_bn3\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit5_bn1\n", "\n", "stage3_unit5_bn1\n", "\n", "\n", "\n", "stage3_unit5_bn1->_plus10\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit5_conv1\n", "\n", "Convolution\n", "3x3/1x1, 256\n", "\n", "\n", "\n", "stage3_unit5_conv1->stage3_unit5_bn1\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit5_bn2\n", "\n", "stage3_unit5_bn2\n", "\n", "\n", "\n", "stage3_unit5_bn2->stage3_unit5_conv1\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit5_relu1\n", "\n", "LeakyReLU\n", "prelu\n", "\n", "\n", "\n", "stage3_unit5_relu1->stage3_unit5_bn2\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit5_conv2\n", "\n", "Convolution\n", "3x3/1x1, 256\n", "\n", "\n", "\n", "stage3_unit5_conv2->stage3_unit5_relu1\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit5_bn3\n", "\n", "stage3_unit5_bn3\n", "\n", "\n", "\n", "stage3_unit5_bn3->stage3_unit5_conv2\n", "\n", "\n", "\n", "\n", "\n", "_plus11\n", "\n", "_plus11\n", "\n", "\n", "\n", "_plus11->_plus10\n", "\n", "\n", "\n", "\n", "\n", "_plus11->stage3_unit5_bn3\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit6_bn1\n", "\n", "stage3_unit6_bn1\n", "\n", "\n", "\n", "stage3_unit6_bn1->_plus11\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit6_conv1\n", "\n", "Convolution\n", "3x3/1x1, 256\n", "\n", "\n", "\n", "stage3_unit6_conv1->stage3_unit6_bn1\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit6_bn2\n", "\n", "stage3_unit6_bn2\n", "\n", "\n", "\n", "stage3_unit6_bn2->stage3_unit6_conv1\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit6_relu1\n", "\n", "LeakyReLU\n", "prelu\n", "\n", "\n", "\n", "stage3_unit6_relu1->stage3_unit6_bn2\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit6_conv2\n", "\n", "Convolution\n", "3x3/1x1, 256\n", "\n", "\n", "\n", "stage3_unit6_conv2->stage3_unit6_relu1\n", "\n", "\n", "\n", "\n", "\n", "stage3_unit6_bn3\n", "\n", "stage3_unit6_bn3\n", "\n", "\n", "\n", "stage3_unit6_bn3->stage3_unit6_conv2\n", "\n", "\n", "\n", "\n", "\n", "_plus12\n", "\n", "_plus12\n", "\n", "\n", "\n", "_plus12->_plus11\n", "\n", "\n", "\n", "\n", "\n", "_plus12->stage3_unit6_bn3\n", "\n", "\n", "\n", "\n", "\n", "stage4_unit1_bn1\n", "\n", "stage4_unit1_bn1\n", "\n", "\n", "\n", "stage4_unit1_bn1->_plus12\n", "\n", "\n", "\n", "\n", "\n", "stage4_unit1_conv1\n", "\n", "Convolution\n", "3x3/1x1, 512\n", "\n", "\n", "\n", "stage4_unit1_conv1->stage4_unit1_bn1\n", "\n", "\n", "\n", "\n", "\n", "stage4_unit1_bn2\n", "\n", "stage4_unit1_bn2\n", "\n", "\n", "\n", "stage4_unit1_bn2->stage4_unit1_conv1\n", "\n", "\n", "\n", "\n", "\n", "stage4_unit1_relu1\n", "\n", "LeakyReLU\n", "prelu\n", "\n", "\n", "\n", "stage4_unit1_relu1->stage4_unit1_bn2\n", "\n", "\n", "\n", "\n", "\n", "stage4_unit1_conv2\n", "\n", "Convolution\n", "3x3/2x2, 512\n", "\n", "\n", "\n", "stage4_unit1_conv2->stage4_unit1_relu1\n", "\n", "\n", "\n", "\n", "\n", "stage4_unit1_bn3\n", "\n", "stage4_unit1_bn3\n", "\n", "\n", "\n", "stage4_unit1_bn3->stage4_unit1_conv2\n", "\n", "\n", "\n", "\n", "\n", "stage4_unit1_conv1sc\n", "\n", "Convolution\n", "1x1/2x2, 512\n", "\n", "\n", "\n", "stage4_unit1_conv1sc->_plus12\n", "\n", "\n", "\n", "\n", "\n", "stage4_unit1_sc\n", "\n", "stage4_unit1_sc\n", "\n", "\n", "\n", "stage4_unit1_sc->stage4_unit1_conv1sc\n", "\n", "\n", "\n", "\n", "\n", "_plus13\n", "\n", "_plus13\n", "\n", "\n", "\n", "_plus13->stage4_unit1_bn3\n", "\n", "\n", "\n", "\n", "\n", "_plus13->stage4_unit1_sc\n", "\n", "\n", "\n", "\n", "\n", "stage4_unit2_bn1\n", "\n", "stage4_unit2_bn1\n", "\n", "\n", "\n", "stage4_unit2_bn1->_plus13\n", "\n", "\n", "\n", "\n", "\n", "stage4_unit2_conv1\n", "\n", "Convolution\n", "3x3/1x1, 512\n", "\n", "\n", "\n", "stage4_unit2_conv1->stage4_unit2_bn1\n", "\n", "\n", "\n", "\n", "\n", "stage4_unit2_bn2\n", "\n", "stage4_unit2_bn2\n", "\n", "\n", "\n", "stage4_unit2_bn2->stage4_unit2_conv1\n", "\n", "\n", "\n", "\n", "\n", "stage4_unit2_relu1\n", "\n", "LeakyReLU\n", "prelu\n", "\n", "\n", "\n", "stage4_unit2_relu1->stage4_unit2_bn2\n", "\n", "\n", "\n", "\n", "\n", "stage4_unit2_conv2\n", "\n", "Convolution\n", "3x3/1x1, 512\n", "\n", "\n", "\n", "stage4_unit2_conv2->stage4_unit2_relu1\n", "\n", "\n", "\n", "\n", "\n", "stage4_unit2_bn3\n", "\n", "stage4_unit2_bn3\n", "\n", "\n", "\n", "stage4_unit2_bn3->stage4_unit2_conv2\n", "\n", "\n", "\n", "\n", "\n", "_plus14\n", "\n", "_plus14\n", "\n", "\n", "\n", "_plus14->_plus13\n", "\n", "\n", "\n", "\n", "\n", "_plus14->stage4_unit2_bn3\n", "\n", "\n", "\n", "\n", "\n", "stage4_unit3_bn1\n", "\n", "stage4_unit3_bn1\n", "\n", "\n", "\n", "stage4_unit3_bn1->_plus14\n", "\n", "\n", "\n", "\n", "\n", "stage4_unit3_conv1\n", "\n", "Convolution\n", "3x3/1x1, 512\n", "\n", "\n", "\n", "stage4_unit3_conv1->stage4_unit3_bn1\n", "\n", "\n", "\n", "\n", "\n", "stage4_unit3_bn2\n", "\n", "stage4_unit3_bn2\n", "\n", "\n", "\n", "stage4_unit3_bn2->stage4_unit3_conv1\n", "\n", "\n", "\n", "\n", "\n", "stage4_unit3_relu1\n", "\n", "LeakyReLU\n", "prelu\n", "\n", "\n", "\n", "stage4_unit3_relu1->stage4_unit3_bn2\n", "\n", "\n", "\n", "\n", "\n", "stage4_unit3_conv2\n", "\n", "Convolution\n", "3x3/1x1, 512\n", "\n", "\n", "\n", "stage4_unit3_conv2->stage4_unit3_relu1\n", "\n", "\n", "\n", "\n", "\n", "stage4_unit3_bn3\n", "\n", "stage4_unit3_bn3\n", "\n", "\n", "\n", "stage4_unit3_bn3->stage4_unit3_conv2\n", "\n", "\n", "\n", "\n", "\n", "_plus15\n", "\n", "_plus15\n", "\n", "\n", "\n", "_plus15->_plus14\n", "\n", "\n", "\n", "\n", "\n", "_plus15->stage4_unit3_bn3\n", "\n", "\n", "\n", "\n", "\n", "bn1\n", "\n", "bn1\n", "\n", "\n", "\n", "bn1->_plus15\n", "\n", "\n", "\n", "\n", "\n", "dropout0\n", "\n", "dropout0\n", "\n", "\n", "\n", "dropout0->bn1\n", "\n", "\n", "\n", "\n", "\n", "pre_fc1\n", "\n", "FullyConnected\n", "512\n", "\n", "\n", "\n", "pre_fc1->dropout0\n", "\n", "\n", "\n", "\n", "\n", "fc1\n", "\n", "fc1\n", "\n", "\n", "\n", "fc1->pre_fc1\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mx.viz.plot_network(net.sym)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 110/110 [00:00<00:00, 5757.45it/s]\n", "100%|██████████| 166/166 [00:00<00:00, 1132.65it/s]\n" ] } ], "source": [ "try:\n", " os.mkdir('Weights')\n", "except:\n", " pass\n", "for i in tqdm(net.aux_params.keys()):\n", " np.save('Weights/'+i,net.aux_params[i].asnumpy())\n", "for i in tqdm(net.arg_params.keys()):\n", " np.save('Weights/'+i,net.arg_params[i].asnumpy())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Testing batch to compare embeddings" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "img = io.imread('002.jpg')\n", "img = np.transpose(img, [2,0,1])\n", "b = np.random.uniform(0,255,size=(2,3,112,112))\n", "b[0] = img" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "data = mx.nd.array(b)\n", "db = mx.io.DataBatch(data=(data,))\n", "net.model.forward(db, is_train=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Common functions" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "def conv(x,name,size,inm,outm,stride=1,pad='SAME'):\n", " W = tf.get_variable('W_'+name,dtype=tf.float32,shape=[size,size,inm,outm],initializer=tf.constant_initializer(np.transpose(np.load('Weights/'+name+'_weight.npy'),[2,3,1,0])))\n", " return tf.nn.conv2d(x,W,[1,1,stride,stride],pad,name=name,data_format='NCHW')\n", "def bn(x,name):\n", " return slim.batch_norm(x,center=True,scale=True,epsilon=2e-5,\n", " param_initializers={\n", " 'beta':tf.constant_initializer(np.load('Weights/'+name+'_beta.npy')),\n", " 'gamma':tf.constant_initializer(np.load('Weights/'+name+'_gamma.npy')),\n", " 'moving_mean':tf.constant_initializer(np.load('Weights/'+name+'_moving_mean.npy')),\n", " 'moving_variance':tf.constant_initializer(np.load('Weights/'+name+'_moving_var.npy'))},\n", " is_training=train,data_format='NCHW')\n", "def lrelu(x,name,maps):\n", " a = tf.get_variable('a_'+name,dtype=tf.float32,shape=[maps],initializer=tf.constant_initializer(np.load('Weights/'+name+'_gamma.npy')))\n", " a = tf.reshape(a,[maps,1,1])\n", " return tf.nn.relu(x,name=name+'_pos')-tf.nn.relu(-x,name=name+'_neg')*a\n", "def reduce_block(x,name,mapsin,mapsout):\n", " v1 = bn(x,name+'_bn1')\n", " v1 = conv(v1,name+'_conv1',3,mapsin,mapsout)\n", " v1 = bn(v1,name+'_bn2')\n", " v1 = lrelu(v1,name+'_relu1',mapsout)\n", " v1 = tf.pad(v1,[[0,0],[0, 0,], [1, 0],[1,0]])\n", " v1 = conv(v1,name+'_conv2',3,mapsout,mapsout,2,'VALID')\n", " v1 = bn(v1,name+'_bn3')\n", " x = conv(x,name+'_conv1sc',1,mapsin,mapsout,2)\n", " x = bn(x,name+'_sc')\n", " return x+v1\n", "def block(x,name,maps):\n", " v1 = bn(x,name+'_bn1')\n", " v1 = conv(v1,name+'_conv1',3,maps,maps)\n", " v1 = bn(v1,name+'_bn2')\n", " v1 = lrelu(v1,name+'_relu1',maps)\n", " v1 = conv(v1,name+'_conv2',3,maps,maps)\n", " v1 = bn(v1,name+'_bn3')\n", " return x+v1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## TensorFlow twin " ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "x = tf.placeholder(tf.float32, [None,3,112,112], 'image_input')\n", "train = tf.placeholder(tf.bool,name='training_mode')\n", "prob = tf.placeholder(tf.float32,name='keep_prob')" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "WARNING:tensorflow:From /home/stepan/.local/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n", "Instructions for updating:\n", "Colocations handled automatically by placer.\n" ] } ], "source": [ "conv0 = conv(x,'conv0',3,3,64)\n", "bn0 = bn(conv0,'bn0')\n", "relu0 = lrelu(bn0,'relu0',64)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "bl1 = reduce_block(relu0,'stage1_unit1',64,64)\n", "for i in range(stage1_length-1):\n", " bl1 = block(bl1,'stage1_unit'+str(i+2),64)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "bl2 = reduce_block(bl1,'stage2_unit1',64,128)\n", "for i in range(stage2_length-1):\n", " bl2 = block(bl2,'stage2_unit'+str(i+2),128)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "bl3 = reduce_block(bl2,'stage3_unit1',128,256)\n", "for i in range(stage3_length-1):\n", " bl3 = block(bl3,'stage3_unit'+str(i+2),256)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "bl4 = reduce_block(bl3,'stage4_unit1',256,512)\n", "for i in range(stage4_length-1):\n", " bl4 = block(bl4,'stage4_unit'+str(i+2),512)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "WARNING:tensorflow:From :2: flatten (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.\n", "Instructions for updating:\n", "Use keras.layers.flatten instead.\n", "WARNING:tensorflow:From :3: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.\n", "Instructions for updating:\n", "Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.\n" ] } ], "source": [ "bn1 = bn(bl4,'bn1')\n", "flat = tf.layers.flatten(bn1)\n", "flat = tf.nn.dropout(flat,prob)\n", "Wfc = tf.get_variable('W_fc',dtype=tf.float32,shape=[25088, 512],initializer=tf.constant_initializer(np.transpose(np.load('Weights/pre_fc1_weight.npy'),[1,0])))\n", "fc = tf.matmul(flat, Wfc)\n", "fc = bn(fc,'fc1')" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "L1 difference between embeddings: 3.9339066e-06\n" ] } ], "source": [ "sess.run(tf.global_variables_initializer())\n", "print('L1 difference between embeddings:',np.max(np.abs(sess.run(fc,feed_dict={x:(b - 127.5)*0.0078125,prob:1.0,train:False})-net.model.get_outputs()[0].asnumpy())))" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "x_norm = tf.nn.l2_normalize(fc,1,name='embedding')" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "graph = tf.get_default_graph()\n", "input_graph_def = graph.as_graph_def()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "WARNING:tensorflow:From :5: convert_variables_to_constants (from tensorflow.python.framework.graph_util_impl) is deprecated and will be removed in a future version.\n", "Instructions for updating:\n", "Use tf.compat.v1.graph_util.convert_variables_to_constants\n", "WARNING:tensorflow:From /home/stepan/.local/lib/python3.6/site-packages/tensorflow/python/framework/graph_util_impl.py:245: extract_sub_graph (from tensorflow.python.framework.graph_util_impl) is deprecated and will be removed in a future version.\n", "Instructions for updating:\n", "Use tf.compat.v1.graph_util.extract_sub_graph\n", "INFO:tensorflow:Froze 275 variables.\n", "INFO:tensorflow:Converted 275 variables to const ops.\n" ] } ], "source": [ "output_node_names=\"embedding\"\n", "output_graph_def = tf.graph_util.convert_variables_to_constants(\n", " sess, # The session\n", " input_graph_def, # input_graph_def is useful for retrieving the nodes \n", " output_node_names.split(\",\") \n", ")" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "output_graph=\"./r34.pb\"\n", "with tf.gfile.GFile(output_graph, \"wb\") as f:\n", " f.write(output_graph_def.SerializeToString())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## PB check" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "frozen_graph=\"./r34.pb\"\n", "with tf.gfile.GFile(frozen_graph, \"rb\") as f:\n", " graph_def = tf.GraphDef()\n", " graph_def.ParseFromString(f.read())\n", "\n", "with tf.Graph().as_default() as graph:\n", " tf.import_graph_def(graph_def,\n", " input_map=None,\n", " return_elements=None,\n", " name=\"\")" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "image_input = graph.get_tensor_by_name('image_input:0')\n", "keep_prob = graph.get_tensor_by_name('keep_prob:0')\n", "is_train = graph.get_tensor_by_name('training_mode:0')\n", "embs = graph.get_tensor_by_name('embedding:0')" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "L1 difference between normalized embeddings: 2.6077032e-07\n" ] } ], "source": [ "c = net.model.get_outputs()[0].asnumpy()\n", "for i in range(2):\n", " c[i] = c[i]/np.sqrt(np.sum(c[i]**2))\n", "sess = tf.Session(graph=graph)\n", "print('L1 difference between normalized embeddings:',np.max(np.abs(sess.run(embs,feed_dict={image_input:(b - 127.5)*0.0078125,keep_prob:1.0,is_train:False})-c)))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: Utils/README.md ================================================ An example notebook of the MX to TF transformation of the ArcFace models. Can be directly applicable for *LResNet100E-IR,ArcFace@ms1m-refine-v2*, *LResNet50E-IR,ArcFace@ms1m-refine-v1*, *LResNet34E-IR,ArcFace@ms1m-refine-v1*. By analogy, *MobileFaceNet,ArcFace@ms1m-refine-v1* can be transformed by slight modifications. In response to [this](https://github.com/papermsucode/advhat/issues/11) issue.