Repository: grib0ed0v/face_recognition.pytorch Branch: develop Commit: 05cb9b30e822 Files: 72 Total size: 223.4 KB Directory structure: gitextract_tiiskuzj/ ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── __init__.py ├── configs/ │ └── mobilefacenet_vgg2.yml ├── datasets/ │ ├── __init__.py │ ├── casia.py │ ├── celeba.py │ ├── imdbface.py │ ├── lfw.py │ ├── megaface.py │ ├── ms_celeb1m.py │ ├── ndg.py │ ├── trillion_pairs.py │ └── vggface2.py ├── demo/ │ ├── README.md │ └── run_demo.py ├── devtools/ │ └── pylint.rc ├── dump_features.py ├── evaluate_landmarks.py ├── evaluate_lfw.py ├── init_venv.sh ├── losses/ │ ├── __init__.py │ ├── alignment.py │ ├── am_softmax.py │ ├── centroid_based.py │ ├── metric_losses.py │ └── regularizer.py ├── model/ │ ├── __init__.py │ ├── backbones/ │ │ ├── __init__.py │ │ ├── resnet.py │ │ ├── rmnet.py │ │ ├── se_resnet.py │ │ ├── se_resnext.py │ │ └── shufflenet_v2.py │ ├── blocks/ │ │ ├── __init__.py │ │ ├── mobilenet_v2_blocks.py │ │ ├── resnet_blocks.py │ │ ├── rmnet_blocks.py │ │ ├── se_resnet_blocks.py │ │ ├── se_resnext_blocks.py │ │ ├── shared_blocks.py │ │ └── shufflenet_v2_blocks.py │ ├── common.py │ ├── landnet.py │ ├── mobilefacenet.py │ ├── resnet_angular.py │ ├── rmnet_angular.py │ ├── se_resnet_angular.py │ └── shufflenet_v2_angular.py ├── requirements.txt ├── scripts/ │ ├── __init__.py │ ├── accuracy_check.py │ ├── align_images.py │ ├── count_flops.py │ ├── matio.py │ ├── plot_roc_curves_lfw.py │ └── pytorch2onnx.py ├── tests/ │ ├── __init__.py │ ├── test_alignment.py │ ├── test_models.py │ └── test_utils.py ├── train.py ├── train_landmarks.py └── utils/ ├── __init__.py ├── augmentation.py ├── face_align.py ├── ie_tools.py ├── landmarks_augmentation.py ├── parser_yaml.py └── utils.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ *.pyc __pycache__ .idea/ *.iml **/venv data/test external/cocoapi tensorflow_toolkit/tests/models tensorflow_toolkit/**/model ================================================ FILE: .travis.yml ================================================ language: python sudo: required dist: xenial python: - "3.5" cache: pip install: - bash ./init_venv.sh jobs: include: - stage: Tests script: - . venv/bin/activate - python -m unittest ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "{}" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2018 algo Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ # Face Recognition in PyTorch [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![Build Status](https://travis-ci.com/grib0ed0v/face_recognition.pytorch.svg?branch=develop)](https://travis-ci.com/grib0ed0v/face_recognition.pytorch) By [Alexey Gruzdev](https://www.linkedin.com/in/alexey-gruzdev-454399128/) and [Vladislav Sovrasov](https://www.linkedin.com/in/%D0%B2%D0%BB%D0%B0%D0%B4%D0%B8%D1%81%D0%BB%D0%B0%D0%B2-%D1%81%D0%BE%D0%B2%D1%80%D0%B0%D1%81%D0%BE%D0%B2-173b23104/) ## Introduction *A repository for different experimental Face Recognition models such as [CosFace](https://arxiv.org/pdf/1801.09414.pdf), [ArcFace](https://arxiv.org/pdf/1801.07698.pdf), [SphereFace](https://arxiv.org/pdf/1704.08063.pdf), [SV-Softmax](https://arxiv.org/pdf/1812.11317.pdf), etc.* ## Contents 1. [Installation](#installation) 2. [Preparation](#preparation) 3. [Train/Eval](#traineval) 4. [Models](#models) 5. [Face Recognition Demo](#demo) ## Installation 1. Create and activate virtual python environment ```bash bash init_venv.sh . venv/bin/activate ``` ## Preparation 1. For Face Recognition training you should download [VGGFace2](http://www.robots.ox.ac.uk/~vgg/data/vgg_face2/) data. We will refer to this folder as `$VGGFace2_ROOT`. 2. For Face Recognition evaluation you need to download [LFW](http://vis-www.cs.umass.edu/lfw/) data and [LFW landmarks](https://github.com/clcarwin/sphereface_pytorch/blob/master/data/lfw_landmark.txt). Place everything in one folder, which will be `$LFW_ROOT`. ## Train/Eval 1. Go to `$FR_ROOT` folder ```bash cd $FR_ROOT/ ``` 2. To start training FR model: ```bash python train.py --train_data_root $VGGFace2_ROOT/train/ --train_list $VGGFace2_ROOT/meta/train_list.txt --train_landmarks $VGGFace2_ROOT/bb_landmark/ --val_data_root $LFW_ROOT/lfw/ --val_list $LFW_ROOT/pairs.txt --val_landmarks $LFW_ROOT/lfw_landmark.txt --train_batch_size 200 --snap_prefix mobilenet_256 --lr 0.35 --embed_size 256 --model mobilenet --device 1 ``` 3. To evaluate FR snapshot (let's say we have MobileNet with 256 embedding size trained for 300k): ```bash python evaluate_lfw.py --val_data_root $LFW_ROOT/lfw/ --val_list $LFW_ROOT/pairs.txt --val_landmarks $LFW_ROOT/lfw_landmark.txt --snap /path/to/snapshot/mobilenet_256_300000.pt --model mobilenet --embed_size 256 ``` ## Configuration files Besides passing all the required parameters via command line, the training script allows to read them from a `yaml` configuration file. Each line of such file should contain a valid description of one parameter in the `yaml` fromat. Example: ```yml #optimizer parameters lr: 0.4 train_batch_size: 256 #loss options margin_type: cos s: 30 m: 0.35 #model parameters model: mobilenet embed_size: 256 #misc snap_prefix: MobileFaceNet devices: [0, 1] #datasets train_dataset: vgg train_data_root: $VGGFace2_ROOT/train/ #... and so on ``` Path to the config file can be passed to the training script via command line. In case if any other arguments were passed before the config, they will be overwritten. ```bash python train.py -m 0.35 @./my_config.yml #here m can be overwritten with the value from my_config.yml ``` ## Models 1. You can download pretrained model from fileshare as well - https://download.01.org/openvinotoolkit/open_model_zoo/training_toolbox_pytorch/models/fr/Mobilenet_se_focal_121000.pt ```bash cd $FR_ROOT python evaluate_lfw.py --val_data_root $LFW_ROOT/lfw/ --val_list $LFW_ROOT/pairs.txt --val_landmarks $LFW_ROOT/lfw_landmark.txt --snap /path/to/snapshot/Mobilenet_se_focal_121000.pt --model mobilenet --embed_size 256 ``` 2. You should get the following output: ``` I1114 09:33:37.846870 10544 evaluate_lfw.py:242] Accuracy/Val_same_accuracy mean: 0.9923 I1114 09:33:37.847019 10544 evaluate_lfw.py:243] Accuracy/Val_diff_accuracy mean: 0.9970 I1114 09:33:37.847069 10544 evaluate_lfw.py:244] Accuracy/Val_accuracy mean: 0.9947 I1114 09:33:37.847179 10544 evaluate_lfw.py:245] Accuracy/Val_accuracy std dev: 0.0035 I1114 09:33:37.847229 10544 evaluate_lfw.py:246] AUC: 0.9995 I1114 09:33:37.847305 10544 evaluate_lfw.py:247] Estimated threshold: 0.7241 ``` ## Demo 1. For setting up demo, please go to [Face Recognition demo with OpenVINO Toolkit](./demo/README.md) ================================================ FILE: __init__.py ================================================ ================================================ FILE: configs/mobilefacenet_vgg2.yml ================================================ #optimizer parameters lr: 0.4 train_batch_size: 256 #loss options margin_type: cos s: 30 m: 0.35 mining_type: sv t: 1.1 #model parameters model: mobilenet embed_size: 256 train_dataset: vgg snap_prefix: MobileFaceNet devices: [0, 1] ================================================ FILE: datasets/__init__.py ================================================ from .lfw import LFW from .vggface2 import VGGFace2 from .ms_celeb1m import MSCeleb1M from .trillion_pairs import TrillionPairs from .imdbface import IMDBFace from .celeba import CelebA from .ndg import NDG __all__ = [LFW, VGGFace2, MSCeleb1M, TrillionPairs, IMDBFace, CelebA, NDG] ================================================ FILE: datasets/casia.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import os.path as osp from tqdm import tqdm from torch.utils.data import Dataset import cv2 as cv from utils.face_align import FivePointsAligner class CASIA(Dataset): """CASIA Dataset compatible with PyTorch DataLoader.""" def __init__(self, images_root_path, image_list_path, transform, use_landmarks=True): self.image_list_path = image_list_path self.images_root_path = images_root_path self.identities = {} self.use_landmarks = use_landmarks self.samples_info = self._read_samples_info() self.transform = transform def _read_samples_info(self): """Reads annotation of the dataset""" samples = [] with open(self.image_list_path, 'r') as f: for line in tqdm(f.readlines(), 'Preparing CASIA dataset'): sample = line.split() sample_id = sample[1] landmarks = [[sample[i], sample[i+1]] for i in range(2, 12, 2)] self.identities[sample_id] = [1] samples.append((osp.join(self.images_root_path, sample[0]), sample_id, landmarks)) return samples def get_num_classes(self): """Returns total number of identities""" return len(self.identities) def __len__(self): """Returns total number of samples""" return len(self.samples_info) def __getitem__(self, idx): img = cv.imread(self.samples_info[idx][0]) if self.use_landmarks: img = FivePointsAligner.align(img, self.samples_info[idx][2], d_size=(200, 200), normalized=True, show=False) if self.transform: img = self.transform(img) return {'img': img, 'label': int(self.samples_info[idx][1])} ================================================ FILE: datasets/celeba.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import os.path as osp import numpy as np from tqdm import tqdm from torch.utils.data import Dataset import cv2 as cv class CelebA(Dataset): """CelebA Dataset compatible with PyTorch DataLoader.""" def __init__(self, images_root_path, landmarks_folder_path, transform=None, test=False): self.test = test self.have_landmarks = True self.images_root_path = images_root_path bb_file_name = 'list_bbox_celeba.txt' landmarks_file_name = 'list_landmarks_celeba.txt' self.detections_file = open(osp.join(landmarks_folder_path, bb_file_name), 'r') self.landmarks_file = open(osp.join(landmarks_folder_path, landmarks_file_name), 'r') self.samples_info = self._read_samples_info() self.transform = transform def _read_samples_info(self): """Reads annotation of the dataset""" samples = [] detections_file_lines = self.detections_file.readlines()[2:] landmarks_file_lines = self.landmarks_file.readlines()[2:] assert len(detections_file_lines) == len(landmarks_file_lines) if self.test: images_range = range(182638, len(landmarks_file_lines)) else: images_range = range(182637) for i in tqdm(images_range): line = detections_file_lines[i].strip() img_name = line.split(' ')[0] img_path = osp.join(self.images_root_path, img_name) bbox = list(filter(bool, line.split(' ')[1:])) bbox = [int(coord) for coord in bbox] if bbox[2] == 0 or bbox[3] == 0: continue line_landmarks = landmarks_file_lines[i].strip().split(' ')[1:] landmarks = list(filter(bool, line_landmarks)) landmarks = [float(coord) for coord in landmarks] samples.append((img_path, bbox, landmarks)) return samples def __len__(self): """Returns total number of samples""" return len(self.samples_info) def __getitem__(self, idx): """Returns sample (image, landmarks) by index""" img = cv.imread(self.samples_info[idx][0], cv.IMREAD_COLOR) bbox = self.samples_info[idx][1] landmarks = self.samples_info[idx][2] img = img[bbox[1]:bbox[1] + bbox[3], bbox[0]:bbox[0] + bbox[2]] landmarks = np.array([(float(landmarks[2*i]-bbox[0]) / bbox[2], float(landmarks[2*i + 1]-bbox[1])/ bbox[3]) \ for i in range(len(landmarks)//2)]).reshape(-1) data = {'img': img, 'landmarks': landmarks} if self.transform: data = self.transform(data) return data ================================================ FILE: datasets/imdbface.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import os.path as osp import cv2 as cv from tqdm import tqdm from torch.utils.data import Dataset from utils.face_align import FivePointsAligner class IMDBFace(Dataset): """IMDBFace Dataset compatible with PyTorch DataLoader.""" def __init__(self, images_root_path, image_list_path, transform=None): self.image_list_path = image_list_path self.images_root_path = images_root_path self.identities = {} assert osp.isfile(image_list_path) self.have_landmarks = True self.all_samples_info = self._read_samples_info() self.samples_info = self.all_samples_info self.transform = transform def _read_samples_info(self): """Reads annotation of the dataset""" samples = [] with open(self.image_list_path, 'r') as f: images_file_lines = f.readlines() last_class_id = -1 for i in tqdm(range(len(images_file_lines))): line = images_file_lines[i] terms = line.split('|') if len(terms) < 3: continue # FD has failed on this imsage path, landmarks, _ = terms image_id, _ = path.rsplit('/', 1) if image_id in self.identities: self.identities[image_id].append(len(samples)) else: last_class_id += 1 self.identities[image_id] = [len(samples)] landmarks = [float(coord) for coord in landmarks.strip().split(' ')] assert len(landmarks) == 10 samples.append((osp.join(self.images_root_path, path).strip(), last_class_id, image_id, landmarks)) return samples def get_weights(self): """Computes weights of the each identity in dataset according to frequency of it's occurance""" weights = [0.]*len(self.all_samples_info) for i, sample in enumerate(self.all_samples_info): weights[i] = float(len(self.all_samples_info)) / len(self.identities[sample[2]]) return weights def get_num_classes(self): """Returns total number of identities""" return len(self.identities) def __len__(self): """Returns total number of samples""" return len(self.samples_info) def __getitem__(self, idx): """Returns sample (image, class id, image id) by index""" img = cv.imread(self.samples_info[idx][0], cv.IMREAD_COLOR) landmarks = self.samples_info[idx][-1] img = FivePointsAligner.align(img, landmarks, d_size=(200, 200), normalized=True, show=False) if self.transform: img = self.transform(img) return {'img': img, 'label': self.samples_info[idx][1], 'instance': self.samples_info[idx][2]} ================================================ FILE: datasets/lfw.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import os.path as osp import cv2 as cv import numpy as np from torch.utils.data import Dataset from utils.face_align import FivePointsAligner class LFW(Dataset): """LFW Dataset compatible with PyTorch DataLoader.""" def __init__(self, images_root_path, pairs_path, landmark_file_path='', transform=None): self.pairs_path = pairs_path self.images_root_path = images_root_path self.landmark_file_path = landmark_file_path self.use_landmarks = len(self.landmark_file_path) > 0 if self.use_landmarks: self.landmarks = self._read_landmarks() self.pairs = self._read_pairs() self.transform = transform def _read_landmarks(self): """Reads landmarks of the dataset""" landmarks = {} with open(self.landmark_file_path, 'r') as f: for line in f.readlines(): sp = line.split() key = sp[0][sp[0].rfind('/')+1:] landmarks[key] = [[int(sp[i]), int(sp[i+1])] for i in range(1, 11, 2)] return landmarks def _read_pairs(self): """Reads annotation of the dataset""" pairs = [] with open(self.pairs_path, 'r') as f: for line in f.readlines()[1:]: # skip header pair = line.strip().split() pairs.append(pair) file_ext = 'jpg' lfw_dir = self.images_root_path path_list = [] for pair in pairs: if len(pair) == 3: path0 = osp.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[1]) + '.' + file_ext) id0 = pair[0] path1 = osp.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[2]) + '.' + file_ext) id1 = pair[0] issame = True elif len(pair) == 4: path0 = osp.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[1]) + '.' + file_ext) id0 = pair[0] path1 = osp.join(lfw_dir, pair[2], pair[2] + '_' + '%04d' % int(pair[3]) + '.' + file_ext) id1 = pair[0] issame = False path_list.append((path0, path1, issame, id0, id1)) return path_list def _load_img(self, img_path): """Loads an image from dist, then performs face alignment and applies transform""" img = cv.imread(img_path, cv.IMREAD_COLOR) if self.use_landmarks: landmarks = np.array(self.landmarks[img_path[img_path.rfind('/')+1:]]).reshape(-1) img = FivePointsAligner.align(img, landmarks, show=False) if self.transform is None: return img return self.transform(img) def show_item(self, index): """Saves a pair with a given index to disk""" path_1, path_2, _, _, _ = self.pairs[index] img1 = cv.imread(path_1) img2 = cv.imread(path_2) if self.use_landmarks: landmarks1 = np.array(self.landmarks[path_1[path_1.rfind('/')+1:]]).reshape(-1) landmarks2 = np.array(self.landmarks[path_2[path_2.rfind('/')+1:]]).reshape(-1) img1 = FivePointsAligner.align(img1, landmarks1) img2 = FivePointsAligner.align(img2, landmarks2) else: img1 = cv.resize(img1, (400, 400)) img2 = cv.resize(img2, (400, 400)) cv.imwrite('misclassified_{}.jpg'.format(index), np.hstack([img1, img2])) def __getitem__(self, index): """Returns a pair of images and similarity flag by index""" (path_1, path_2, is_same, id0, id1) = self.pairs[index] img1, img2 = self._load_img(path_1), self._load_img(path_2) return {'img1': img1, 'img2': img2, 'is_same': is_same, 'id0': id0, 'id1': id1} def __len__(self): """Returns total number of samples""" return len(self.pairs) ================================================ FILE: datasets/megaface.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import numpy as np from torch.utils.data import Dataset import cv2 as cv from utils.face_align import FivePointsAligner class MegaFace(Dataset): """MegaFace Dataset compatible with PyTorch DataLoader.""" def __init__(self, images_lsit, transform=None): self.samples_info = images_lsit self.transform = transform def __len__(self): """Returns total number of samples""" return len(self.samples_info) def __getitem__(self, idx): """Returns sample (image, index)""" img = None try: img = cv.imread(self.samples_info[idx]['path'], cv.IMREAD_COLOR) bbox = self.samples_info[idx]['bbox'] landmarks = self.samples_info[idx]['landmarks'] if bbox is not None or landmarks is not None: if landmarks is not None: landmarks = np.array(landmarks).reshape(5, -1) landmarks[:,0] = landmarks[:,0]*bbox[2] + bbox[0] landmarks[:,1] = landmarks[:,1]*bbox[3] + bbox[1] img = FivePointsAligner.align(img, landmarks.reshape(-1), d_size=(bbox[2], bbox[3]), normalized=False, show=False) if bbox is not None and landmarks is None: img = img[bbox[1]:bbox[1] + bbox[3], bbox[0]:bbox[0] + bbox[2]] except BaseException: print('Corrupted image!', self.samples_info[idx]) img = np.zeros((128, 128, 3), dtype='uint8') if self.transform: img = self.transform(img) return {'img': img, 'idx': idx} ================================================ FILE: datasets/ms_celeb1m.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import os.path as osp import cv2 as cv from tqdm import tqdm from torch.utils.data import Dataset from utils.face_align import FivePointsAligner class MSCeleb1M(Dataset): """MSCeleb1M Dataset compatible with PyTorch DataLoader.""" def __init__(self, images_root_path, image_list_path, transform=None): self.image_list_path = image_list_path self.images_root_path = images_root_path self.identities = {} assert osp.isfile(image_list_path) self.have_landmarks = True self.all_samples_info = self._read_samples_info() self.samples_info = self.all_samples_info self.transform = transform def _read_samples_info(self): """Reads annotation of the dataset""" samples = [] with open(self.image_list_path, 'r') as f: images_file_lines = f.readlines() last_class_id = -1 for i in tqdm(range(len(images_file_lines))): line = images_file_lines[i] terms = line.split('|') if len(terms) < 3: continue # FD has failed on this imsage path, landmarks, bbox = terms image_id, _ = path.split('/') if image_id in self.identities: self.identities[image_id].append(len(samples)) else: last_class_id += 1 self.identities[image_id] = [len(samples)] bbox = [max(int(coord), 0) for coord in bbox.strip().split(' ')] landmarks = [float(coord) for coord in landmarks.strip().split(' ')] assert len(bbox) == 4 assert len(landmarks) == 10 samples.append((osp.join(self.images_root_path, path).strip(), last_class_id, image_id, bbox, landmarks)) return samples def get_weights(self): """Computes weights of the each identity in dataset according to frequency of it's occurance""" weights = [0.]*len(self.all_samples_info) for i, sample in enumerate(self.all_samples_info): weights[i] = float(len(self.all_samples_info)) / len(self.identities[sample[2]]) return weights def get_num_classes(self): """Returns total number of identities""" return len(self.identities) def __len__(self): """Returns total number of samples""" return len(self.samples_info) def __getitem__(self, idx): """Returns sample (image, class id, image id) by index""" img = cv.imread(self.samples_info[idx][0], cv.IMREAD_COLOR) bbox = self.samples_info[idx][-2] landmarks = self.samples_info[idx][-1] img = img[bbox[1]:bbox[1] + bbox[3], bbox[0]:bbox[0] + bbox[2]] img = FivePointsAligner.align(img, landmarks, d_size=(200, 200), normalized=True, show=False) if self.transform: img = self.transform(img) return {'img': img, 'label': self.samples_info[idx][1], 'instance': self.samples_info[idx][2]} ================================================ FILE: datasets/ndg.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import os.path as osp import json import numpy as np from tqdm import tqdm from torch.utils.data import Dataset import cv2 as cv class NDG(Dataset): """NDG Dataset compatible with PyTorch DataLoader.""" def __init__(self, images_root_path, annotation_list, transform=None, test=False): self.test = test self.have_landmarks = True self.images_root_path = images_root_path self.landmarks_file = open(annotation_list, 'r') self.samples_info = self._read_samples_info() self.transform = transform def _read_samples_info(self): """Reads annotation of the dataset""" samples = [] data = json.load(self.landmarks_file) for image_info in tqdm(data): img_name = image_info['path'] img_path = osp.join(self.images_root_path, img_name) landmarks = image_info['lm'] samples.append((img_path, landmarks)) return samples def __len__(self): """Returns total number of samples""" return len(self.samples_info) def __getitem__(self, idx): """Returns sample (image, landmarks) by index""" img = cv.imread(self.samples_info[idx][0], cv.IMREAD_COLOR) landmarks = self.samples_info[idx][1] width, height = img.shape[1], img.shape[0] landmarks = np.array([(float(landmarks[i][0]) / width, float(landmarks[i][1]) / height) for i in range(len(landmarks))]).reshape(-1) data = {'img': img, 'landmarks': landmarks} if self.transform: data = self.transform(data) return data ================================================ FILE: datasets/trillion_pairs.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import os.path as osp import cv2 as cv from tqdm import tqdm from torch.utils.data import Dataset from utils.face_align import FivePointsAligner class TrillionPairs(Dataset): """TrillionPairs Dataset compatible with PyTorch DataLoader. For details visit http://trillionpairs.deepglint.com/data""" def __init__(self, images_root_path, image_list_path, test_mode=False, transform=None): self.image_list_path = image_list_path self.images_root_path = images_root_path self.test_mode = test_mode self.identities = {} assert osp.isfile(image_list_path) self.have_landmarks = True self.all_samples_info = self._read_samples_info() self.samples_info = self.all_samples_info self.transform = transform def _read_samples_info(self): """Reads annotation of the dataset""" samples = [] with open(self.image_list_path, 'r') as f: images_file_lines = f.readlines() for i in tqdm(range(len(images_file_lines))): line = images_file_lines[i].strip() terms = line.split(' ') path = terms[0] if not self.test_mode: label = int(terms[1]) landmarks = terms[2:] if label in self.identities: self.identities[label].append(len(samples)) else: self.identities[label] = [len(samples)] else: label = 0 landmarks = terms[1:] landmarks = [float(coord) for coord in landmarks] assert(len(landmarks) == 10) samples.append((osp.join(self.images_root_path, path).strip(), label, landmarks)) return samples def get_weights(self): """Computes weights of the each identity in dataset according to frequency of it's occurance""" weights = [0.]*len(self.all_samples_info) for i, sample in enumerate(self.all_samples_info): weights[i] = float(len(self.all_samples_info)) / len(self.identities[sample[1]]) return weights def get_num_classes(self): """Returns total number of identities""" return len(self.identities) def __len__(self): """Returns total number of samples""" return len(self.samples_info) def __getitem__(self, idx): """Returns sample (image, class id, image id) by index""" img = cv.imread(self.samples_info[idx][0], cv.IMREAD_COLOR) landmarks = self.samples_info[idx][-1] img = FivePointsAligner.align(img, landmarks, d_size=(200, 200), normalized=False, show=False) if self.transform: img = self.transform(img) return {'img': img, 'label': self.samples_info[idx][1], 'idx': idx} ================================================ FILE: datasets/vggface2.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import os.path as osp import cv2 as cv from tqdm import tqdm import numpy as np from torch.utils.data import Dataset from utils.face_align import FivePointsAligner class VGGFace2(Dataset): """VGGFace2 Dataset compatible with PyTorch DataLoader.""" def __init__(self, images_root_path, image_list_path, landmarks_folder_path='', transform=None, landmarks_training=False): self.image_list_path = image_list_path self.images_root_path = images_root_path self.identities = {} self.landmarks_file = None self.detections_file = None if osp.isdir(landmarks_folder_path): if 'train' in image_list_path: bb_file_name = 'loose_landmark_train.csv' landmarks_file_name = 'loose_bb_train.csv' elif 'test' in image_list_path: bb_file_name = 'loose_landmark_test.csv' landmarks_file_name = 'loose_bb_test.csv' else: bb_file_name = 'loose_landmark_all.csv' landmarks_file_name = 'loose_bb_all.csv' self.landmarks_file = open(osp.join(landmarks_folder_path, bb_file_name), 'r') self.detections_file = open(osp.join(landmarks_folder_path, landmarks_file_name), 'r') self.have_landmarks = not self.landmarks_file is None self.landmarks_training = landmarks_training if self.landmarks_training: assert self.have_landmarks is True self.samples_info = self._read_samples_info() self.transform = transform def _read_samples_info(self): """Reads annotation of the dataset""" samples = [] with open(self.image_list_path, 'r') as f: last_class_id = -1 images_file_lines = f.readlines() if self.have_landmarks: detections_file_lines = self.detections_file.readlines()[1:] landmarks_file_lines = self.landmarks_file.readlines()[1:] assert len(detections_file_lines) == len(landmarks_file_lines) assert len(images_file_lines) == len(detections_file_lines) for i in tqdm(range(len(images_file_lines))): sample = images_file_lines[i].strip() sample_id = int(sample.split('/')[0][1:]) frame_id = int(sample.split('/')[1].split('_')[0]) if sample_id in self.identities: self.identities[sample_id].append(len(samples)) else: last_class_id += 1 self.identities[sample_id] = [len(samples)] if not self.have_landmarks: samples.append((osp.join(self.images_root_path, sample), last_class_id, frame_id)) else: _, bbox = detections_file_lines[i].split('",') bbox = [max(int(coord), 0) for coord in bbox.split(',')] _, landmarks = landmarks_file_lines[i].split('",') landmarks = [float(coord) for coord in landmarks.split(',')] samples.append((osp.join(self.images_root_path, sample), last_class_id, sample_id, bbox, landmarks)) return samples def get_weights(self): """Computes weights of the each identity in dataset according to frequency of it's occurance""" weights = [0.]*len(self.samples_info) for i, sample in enumerate(self.samples_info): weights[i] = len(self.samples_info) / float(len(self.identities[sample[2]])) return weights def get_num_classes(self): """Returns total number of identities""" return len(self.identities) def __len__(self): """Returns total number of samples""" return len(self.samples_info) def __getitem__(self, idx): """Returns sample (image, class id, image id) by index""" img = cv.imread(self.samples_info[idx][0], cv.IMREAD_COLOR) if self.landmarks_training: landmarks = self.samples_info[idx][-1] bbox = self.samples_info[idx][-2] img = img[bbox[1]:bbox[1] + bbox[3], bbox[0]:bbox[0] + bbox[2]] landmarks = [(float(landmarks[2*i]-bbox[0]) / bbox[2], float(landmarks[2*i + 1]-bbox[1])/ bbox[3]) for i in range(len(landmarks)//2)] data = {'img': img, 'landmarks': np.array(landmarks)} if self.transform: data = self.transform(data) return data if self.have_landmarks: landmarks = self.samples_info[idx][-1] img = FivePointsAligner.align(img, landmarks, d_size=(200, 200), normalized=False) if self.transform: img = self.transform(img) return {'img': img, 'label': self.samples_info[idx][1], 'instance': self.samples_info[idx][2]} ================================================ FILE: demo/README.md ================================================ # Face Recognition demo with [OpenVINO™ Toolkit](https://software.intel.com/en-us/openvino-toolkit) ![](./demo.png) ## Demo Preparation 1. Install **OpenVINO Toolkit** - [Linux installation guide](https://software.intel.com/en-us/articles/OpenVINO-Install-Linux) 2. Create virtual python environment: ```bash mkvirtualenv fr --python=python3 ``` 3. Install dependencies: ```bash pip install -r requirements.txt ``` 4. Initialize OpenVINO environment: ```bash source /opt/intel/computer_vision_sdk/bin/setupvars.sh ``` ## Deep Face Recognition 1. Set up `PATH_TO_GALLERY` variable to point to folder with gallery images (faces to be recognized): ```bash export PATH_TO_GALLERY=/path/to/gallery/with/images/ ``` 2. For using OpenVINO pretrained models, please specify `IR_MODELS_ROOT`, otherwise you need to modify running command. ```bash export IR_MODELS_ROOT=$INTEL_CVSDK_DIR/deployment_tools/intel_models/ ``` 3. If you are running from pure console, you need to specify `PYTHONPATH` variable: ```bash export PYTHONPATH=`pwd`:$PYTHONPATH ``` 4. Run Face Recognition demo: ```bash python demo/run_demo.py --path_to_gallery $PATH_TO_GALLERY --cam_id 0 \ --fd_model $IR_MODELS_ROOT/face-detection-retail-0004/FP32/face-detection-retail-0004.xml \ --fr_model $IR_MODELS_ROOT/face-reidentification-retail-0095/FP32/face-reidentification-retail-0095.xml \ --ld_model $IR_MODELS_ROOT/landmarks-regression-retail-0009/FP32/landmarks-regression-retail-0009.xml \ -l libcpu_extension_avx2.so ``` *Note:* `libcpu_extension_avx2.so` is located at the `$INTEL_CVSDK_DIR/inference_engine/lib//intel64/` folder. Here the `` is a name detected by the OpenVINO. It can be for example `ubuntu_16.04` if you are running the demo under Ubuntu 16.04 system. The folder with CPU extensions is already in `LD_LIBRARY_PATH` after initialization of the OpenVINO environment, that's why it can be omitted in the launch command. ================================================ FILE: demo/run_demo.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import argparse import os import os.path as osp import glog as log import cv2 as cv import numpy as np from scipy.spatial.distance import cosine from utils import face_align from utils.ie_tools import load_ie_model class FaceDetector: """Wrapper class for face detector""" def __init__(self, model_path, conf=.6, device='CPU', ext_path=''): self.net = load_ie_model(model_path, device, None, ext_path) self.confidence = conf self.expand_ratio = (1.1, 1.05) def get_detections(self, frame): """Returns all detections on frame""" _, _, h, w = self.net.get_input_shape().shape out = self.net.forward(cv.resize(frame, (w, h))) detections = self.__decode_detections(out, frame.shape) return detections def __decode_detections(self, out, frame_shape): """Decodes raw SSD output""" detections = [] for detection in out[0, 0]: confidence = detection[2] if confidence > self.confidence: left = int(max(detection[3], 0) * frame_shape[1]) top = int(max(detection[4], 0) * frame_shape[0]) right = int(max(detection[5], 0) * frame_shape[1]) bottom = int(max(detection[6], 0) * frame_shape[0]) if self.expand_ratio != (1., 1.): w = (right - left) h = (bottom - top) dw = w * (self.expand_ratio[0] - 1.) / 2 dh = h * (self.expand_ratio[1] - 1.) / 2 left = max(int(left - dw), 0) right = int(right + dw) top = max(int(top - dh), 0) bottom = int(bottom + dh) detections.append(((left, top, right, bottom), confidence)) if len(detections) > 1: detections.sort(key=lambda x: x[1], reverse=True) return detections class VectorCNN: """Wrapper class for a nework returning a vector""" def __init__(self, model_path, device='CPU'): self.net = load_ie_model(model_path, device, None) def forward(self, batch): """Performs forward of the underlying network on a given batch""" _, _, h, w = self.net.get_input_shape().shape outputs = [self.net.forward(cv.resize(frame, (w, h))) for frame in batch] return outputs def get_embeddings(frame, detections, face_reid, landmarks_predictor): """Get embeddings for all detected faces on the frame""" rois = [] embeddings = [] for rect, _ in detections: left, top, right, bottom = rect rois.append(frame[top:bottom, left:right]) if rois: landmarks = landmarks_predictor.forward(rois) assert len(landmarks) == len(rois) for i, _ in enumerate(rois): roi_keypoints = landmarks[i].reshape(-1) rois[i] = face_align.FivePointsAligner.align(rois[i], roi_keypoints, d_size=(rois[i].shape[1], rois[i].shape[0]), normalized=True, show=False) embeddings = face_reid.forward(rois) assert len(rois) == len(embeddings) return embeddings def find_nearest(x, gallery, thr): """Finds the nearest to a given embedding in the gallery""" if gallery: diffs = np.array([cosine(x, y) for y in gallery.values()]) min_pos = diffs.argmin() min_dist = diffs[min_pos] if min_dist < thr: return min_pos, list(gallery.keys())[min_pos] return None, None def match_embeddings(embeds, gallery, thr): """Matches input embeddings with ones in the gallery""" indexes = [] for emb in embeds: _, name = find_nearest(emb, gallery, thr) if name is not None: indexes.append(name) else: indexes.append('Unknown') return indexes, gallery def draw_detections(frame, detections, indexes): """Draws detections and labels""" for i, rect in enumerate(detections): left, top, right, bottom = rect[0] cv.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), thickness=2) label = str(indexes[i]) label_size, base_line = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 1, 1) top = max(top, label_size[1]) cv.rectangle(frame, (left, top - label_size[1]), (left + label_size[0], top + base_line), (255, 255, 255), cv.FILLED) cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0)) return frame def load_gallery(path_to_gallery, face_det, landmarks_detector, face_recognizer): """Computes embeddings for gallery""" gallery = {} files = os.listdir(path_to_gallery) files = [file for file in files if file.endswith('.png') or file.endswith('.jpg')] for file in files: img = cv.imread(osp.join(path_to_gallery, file)) detections = face_det.get_detections(img) if not detections: detections = [[0, 0, img.shape[0], img.shape[1]], 0] log.warn('Warning: failed to detect face on the image ' + file) embed = get_embeddings(img, detections, face_recognizer, landmarks_detector) gallery[file.replace('.png', '').replace('.jpg', '')] = embed[0] return gallery def run(params, capture, face_det, face_recognizer, landmarks_detector): """Starts the face recognition demo""" win_name = 'Deep Face Recognition' gallery = load_gallery(params.path_to_gallery, face_det, landmarks_detector, face_recognizer) while cv.waitKey(1) != 27: has_frame, frame = capture.read() if not has_frame: return detections = face_det.get_detections(frame) embeds = get_embeddings(frame, detections, face_recognizer, landmarks_detector) ids, gallery = match_embeddings(embeds, gallery, params.fr_thresh) frame = draw_detections(frame, detections, ids) cv.imshow(win_name, frame) def main(): """Prepares data for the face recognition demo""" parser = argparse.ArgumentParser(description='Face recognition live demo script') parser.add_argument('--video', type=str, default=None, help='Input video') parser.add_argument('--cam_id', type=int, default=-1, help='Input cam') parser.add_argument('--fd_model', type=str, required=True) parser.add_argument('--fd_thresh', type=float, default=0.6, help='Threshold for FD') parser.add_argument('--fr_model', type=str, required=True) parser.add_argument('--fr_thresh', type=float, default=0.6, help='Threshold for FR') parser.add_argument('--path_to_gallery', type=str, required=True, help='Path to gallery with subjects') parser.add_argument('--ld_model', type=str, default='', help='Path to a snapshots with landmarks detection model') parser.add_argument('--device', type=str, default='CPU') parser.add_argument('-l', '--cpu_extension', help='MKLDNN (CPU)-targeted custom layers.Absolute path to a shared library with the kernels ' 'impl.', type=str, default=None) args = parser.parse_args() if args.cam_id >= 0: log.info('Reading from cam {}'.format(args.cam_id)) cap = cv.VideoCapture(args.cam_id) cap.set(cv.CAP_PROP_FRAME_WIDTH, 1280) cap.set(cv.CAP_PROP_FRAME_HEIGHT, 720) cap.set(cv.CAP_PROP_FOURCC, cv.VideoWriter_fourcc('M', 'J', 'P', 'G')) else: assert args.video log.info('Reading from {}'.format(args.video)) cap = cv.VideoCapture(args.video) assert cap.isOpened() face_detector = FaceDetector(args.fd_model, args.fd_thresh, args.device, args.cpu_extension) face_recognizer = VectorCNN(args.fr_model, args.device) landmarks_detector = VectorCNN(args.ld_model, args.device) run(args, cap, face_detector, face_recognizer, landmarks_detector) if __name__ == '__main__': main() ================================================ FILE: devtools/pylint.rc ================================================ [MASTER] # Specify a configuration file. #rcfile= # Python code to execute, usually for sys.path manipulation such as # pygtk.require(). #init-hook= # Profiled execution. profile=no # Add to the black list. It should be a base name, not a # path. You may set this option multiple times. ignore=CVS # Pickle collected data for later comparisons. persistent=yes # List of plugins (as comma separated values of python modules names) to load, # usually to register additional checkers. load-plugins= [MESSAGES CONTROL] # Enable the message, report, category or checker with the given id(s). You can # either give multiple identifier separated by comma (,) or put this option # multiple time. #enable= # Disable the message, report, category or checker with the given id(s). You # can either give multiple identifier separated by comma (,) or put this option # multiple time. disable=R0903, W0221 [REPORTS] # Set the output format. Available formats are text, parseable, colorized, msvs # (visual studio) and html output-format=text # Include message's id in output include-ids=no # Put messages in a separate file for each module / package specified on the # command line instead of printing them on stdout. Reports (if any) will be # written in a file name "pylint_global.[txt|html]". files-output=no # Tells whether to display a full report or only the messages reports=yes # Python expression which should return a note less than 10 (10 is the highest # note). You have access to the variables errors warning, statement which # respectively contain the number of errors / warnings messages and the total # number of statements analyzed. This is used by the global evaluation report # (R0004). evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) # Add a comment according to your evaluation note. This is used by the global # evaluation report (R0004). comment=no [VARIABLES] # Tells whether we should check for unused import in __init__ files. init-import=no # A regular expression matching names used for dummy variables (i.e. not used). dummy-variables-rgx=_|dummy # List of additional names supposed to be defined in builtins. Remember that # you should avoid to define new builtins when possible. additional-builtins= [BASIC] # Required attributes for module, separated by a comma required-attributes= # List of builtins function names that should not be used, separated by a comma bad-functions=map,filter,apply,input # Regular expression which should only match correct module names module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ # Regular expression which should only match correct module level names const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ # Regular expression which should only match correct class names class-rgx=[A-Z_][a-zA-Z0-9]+$ # Regular expression which should only match correct function names function-rgx=[a-z_][a-z0-9_]{2,40}$ # Regular expression which should only match correct method names method-rgx=[a-z_][a-z0-9_]{2,30}$ # Regular expression which should only match correct instance attribute names attr-rgx=[a-z_][a-z0-9_]{0,30}$ # Regular expression which should only match correct argument names argument-rgx=[a-z_][a-z0-9_]{0,30}$ # Regular expression which should only match correct variable names variable-rgx=[a-z_][a-z0-9_]{0,30}$ # Regular expression which should only match correct list comprehension / # generator expression variable names inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ # Good variable names which should always be accepted, separated by a comma good-names=i,j,k,ex,Run,_ # Bad variable names which should always be refused, separated by a comma bad-names=foo,bar,baz,toto,tutu,tata # Regular expression which should only match functions or classes name which do # not require a docstring no-docstring-rgx=__.*__ [MISCELLANEOUS] # List of note tags to take in consideration, separated by a comma. notes=FIXME,XXX,TODO [FORMAT] # Maximum number of characters on a single line. max-line-length=120 # Maximum number of lines in a module max-module-lines=1000 # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 # tab). indent-string=' ' indent-after-paren=4 [SIMILARITIES] # Minimum lines number of a similarity. min-similarity-lines=4 # Ignore comments when computing similarities. ignore-comments=yes # Ignore docstrings when computing similarities. ignore-docstrings=yes [TYPECHECK] # Tells whether missing members accessed in mixin class should be ignored. A # mixin class is detected if its name ends with "mixin" (case insensitive). ignore-mixin-members=yes # List of classes names for which member attributes should not be checked # (useful for classes with attributes dynamically set). ignored-classes=SQLObject # When zope mode is activated, add a predefined set of Zope acquired attributes # to generated-members. zope=no # List of members which are set dynamically and missed by pylint inference # system, and so shouldn't trigger E0201 when accessed. generated-members=REQUEST,acl_users,aq_parent,torch,cv [DESIGN] # Maximum number of arguments for function / method max-args=5 # Argument names that match this expression will be ignored. Default to name # with leading underscore ignored-argument-names=_.* # Maximum number of locals for function / method body max-locals=15 # Maximum number of return / yield for function / method body max-returns=6 # Maximum number of branch for function / method body max-branchs=12 # Maximum number of statements in function / method body max-statements=50 # Maximum number of parents for a class (see R0901). max-parents=7 # Maximum number of attributes for a class (see R0902). max-attributes=7 # Minimum number of public methods for a class (see R0903). min-public-methods=2 # Maximum number of public methods for a class (see R0904). max-public-methods=20 [IMPORTS] # Deprecated modules which should not be used, separated by a comma deprecated-modules=regsub,string,TERMIOS,Bastion,rexec # Create a graph of every (i.e. internal and external) dependencies in the # given file (report RP0402 must not be disabled) import-graph= # Create a graph of external dependencies in the given file (report RP0402 must # not be disabled) ext-import-graph= # Create a graph of internal dependencies in the given file (report RP0402 must # not be disabled) int-import-graph= extension-pkg-whitelist=cv2 [CLASSES] # List of interface methods to ignore, separated by a comma. This is used for # instance to not check methods defines in Zope's Interface base class. ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by # List of method names used to declare (i.e. assign) instance attributes. defining-attr-methods=__init__,__new__,setUp ================================================ FILE: dump_features.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import sys import argparse import os import os.path as osp from tqdm import tqdm import numpy as np import glog as log import torch import torch.nn.functional as F from torch.utils.data import DataLoader from torchvision import transforms as t from scripts.matio import save_mat from model.common import models_backbones from datasets.megaface import MegaFace from datasets.trillion_pairs import TrillionPairs from utils.utils import load_model_state from utils.augmentation import ResizeNumpy, NumpyToTensor def clean_megaface(filenames, features, noises_list_path): """Filters megaface from outliers""" with open(noises_list_path, 'r') as f: noises_list = f.readlines() noises_list = [line.strip() for line in noises_list] clean_features = np.zeros((features.shape[0], features.shape[1] + 1), dtype=np.float32) for i, filename in enumerate(tqdm(filenames)): clean_features[i, 0: features.shape[1]] = features[i, :] for line in noises_list: if line in filename: clean_features[i, features.shape[1]] = 100.0 break return clean_features def clean_facescrub(filenames, features, noises_list_path): """Replaces wrong instances of identities from the Facescrub with the centroids of these identities""" clean_feature_size = features.shape[1] + 1 with open(noises_list_path, 'r') as f: noises_list = f.readlines() noises_list = [osp.splitext(line.strip())[0] for line in noises_list] clean_features = np.zeros((features.shape[0], clean_feature_size), dtype=np.float32) centroids = {} for i, filename in enumerate(tqdm(filenames)): clean_features[i, 0: features.shape[1]] = features[i, :] id_name = osp.basename(filename).split('_')[0] if not id_name in centroids: centroids[id_name] = np.zeros(clean_feature_size, dtype=np.float32) centroids[id_name] += clean_features[i, :] for i, file_path in enumerate(tqdm(filenames)): filename = osp.basename(file_path) for line in noises_list: if line in filename.replace(' ', '_'): id_name = filename.split('_')[0] clean_features[i, :] = centroids[id_name] + np.random.uniform(-0.001, 0.001, clean_feature_size) clean_features[i, :] /= np.linalg.norm(clean_features[i, :]) break return clean_features @torch.no_grad() def main(args): input_filenames = [] output_filenames = [] input_dir = os.path.abspath(args.input_dir) output_dir = os.path.abspath(args.output_dir) if not args.trillion_format: log.info('Reading info...') with open(os.path.join(args.input_dir, os.path.basename(args.input_list)), 'r') as f: lines = f.readlines() for line in tqdm(lines): info = line.strip().split('|') file = info[0].strip() filename = os.path.join(input_dir, file) path, _ = osp.split(filename) out_folder = path.replace(input_dir, output_dir) if not osp.isdir(out_folder): os.makedirs(out_folder) landmarks = None bbox = None if len(info) > 2: landmarks = info[1].strip().split(' ') landmarks = [float(x) for x in landmarks] bbox = info[2].strip().split(' ') bbox = [int(float(x)) for x in bbox] outname = filename.replace(input_dir, output_dir) + args.file_ending input_filenames.append({'path': filename, 'landmarks': landmarks, 'bbox': bbox}) output_filenames += [outname] nrof_images = len(input_filenames) log.info("Total number of images: ", nrof_images) dataset = MegaFace(input_filenames) else: dataset = TrillionPairs(args.input_dir, osp.join(args.input_dir, 'testdata_lmk.txt'), test_mode=True) nrof_images = len(dataset) emb_array = np.zeros((nrof_images, args.embedding_size), dtype=np.float32) dataset.transform = t.Compose([ResizeNumpy(models_backbones[args.model].get_input_res()), NumpyToTensor(switch_rb=True)]) val_loader = DataLoader(dataset, batch_size=args.batch_size, num_workers=5, shuffle=False) model = models_backbones[args.model](embedding_size=args.embedding_size, feature=True) assert args.snap is not None log.info('Snapshot ' + args.snap + ' ...') log.info('Extracting embeddings ...') model = load_model_state(model, args.snap, args.devices[0], eval_state=True) model = torch.nn.DataParallel(model, device_ids=args.devices, output_device=args.devices[0]) f_output_filenames = [] with torch.cuda.device(args.devices[0]): for i, data in enumerate(tqdm(val_loader), 0): idxs, imgs = data['idx'], data['img'] batch_embeddings = F.normalize(model(imgs), p=2, dim=1).data.cpu().numpy() batch_embeddings = batch_embeddings.reshape(batch_embeddings.shape[0], -1) path_indices = idxs.data.cpu().numpy() start_index = i*args.batch_size end_index = min((i+1)*args.batch_size, nrof_images) assert start_index == path_indices[0] assert end_index == path_indices[-1] + 1 assert emb_array[start_index:end_index, :].shape == batch_embeddings.shape emb_array[start_index:end_index, :] = batch_embeddings if not args.trillion_format: for index in path_indices: f_output_filenames.append(output_filenames[index]) assert len(output_filenames) == len(output_filenames) log.info('Extracting features Done.') if args.trillion_format: save_mat(args.file_ending, emb_array) else: if 'megaface_noises.txt' in args.noises_list: log.info('Cleaning Megaface features') emb_array = clean_megaface(f_output_filenames, emb_array, args.noises_list) elif 'facescrub_noises.txt' in args.noises_list: log.info('Cleaning Facescrub features') emb_array = clean_facescrub(f_output_filenames, emb_array, args.noises_list) else: log.info('Megaface features are not cleaned up.') log.info('Saving features to files...') for i in tqdm(range(len(f_output_filenames))): save_mat(f_output_filenames[i], emb_array[i, :]) def parse_argument(argv): parser = argparse.ArgumentParser(description='Save embeddings to MegaFace features files') parser.add_argument('--model', choices=models_backbones.keys(), type=str, default='rmnet', help='Model type.') parser.add_argument('input_dir', help='Path to MegaFace Features') parser.add_argument('output_dir', help='Path to FaceScrub Features') parser.add_argument('--input_list', default='list.txt', type=str, required=False) parser.add_argument('--batch_size', type=int, default=128) parser.add_argument('--embedding_size', type=int, default=128) parser.add_argument('--devices', type=int, nargs='+', default=[0], help='CUDA devices to use.') parser.add_argument('--snap', type=str, required=True, help='Snapshot to evaluate.') parser.add_argument('--noises_list', type=str, default='', required=False, help='A list of the Megaface or Facescrub noises produced by insightface. \ See https://github.com/deepinsight/insightface/blob/master/src/megaface/README.md') parser.add_argument('--file_ending', help='Ending appended to original photo files. i.e.\ 11084833664_0.jpg_LBP_100x100.bin => _LBP_100x100.bin', default='_rmnet.bin') parser.add_argument('--trillion_format', action='store_true') return parser.parse_args(argv) if __name__ == '__main__': main(parse_argument(sys.argv[1:])) ================================================ FILE: evaluate_landmarks.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import argparse import glog as log import torch import torch.backends.cudnn as cudnn from torch.utils.data import DataLoader from torchvision.transforms import transforms as t from tqdm import tqdm from datasets import VGGFace2, CelebA, NDG from model.common import models_landmarks from utils.landmarks_augmentation import Rescale, ToTensor from utils.utils import load_model_state def evaluate(val_loader, model): """Calculates average error""" total_loss = 0. total_pp_error = 0. failures_num = 0 items_num = 0 for _, data in enumerate(tqdm(val_loader), 0): data, gt_landmarks = data['img'].cuda(), data['landmarks'].cuda() predicted_landmarks = model(data) loss = predicted_landmarks - gt_landmarks items_num += loss.shape[0] n_points = loss.shape[1] // 2 per_point_error = loss.data.view(-1, n_points, 2) per_point_error = torch.norm(per_point_error, p=2, dim=2) avg_error = torch.sum(per_point_error, 1) / n_points eyes_dist = torch.norm(gt_landmarks[:, 0:2] - gt_landmarks[:, 2:4], p=2, dim=1).reshape(-1) per_point_error = torch.div(per_point_error, eyes_dist.view(-1, 1)) total_pp_error += torch.sum(per_point_error, 0) avg_error = torch.div(avg_error, eyes_dist) failures_num += torch.nonzero(avg_error > 0.1).shape[0] total_loss += torch.sum(avg_error) return total_loss / items_num, (total_pp_error / items_num).data.cpu().numpy(), float(failures_num) / items_num def start_evaluation(args): """Launches the evaluation process""" if args.dataset == 'vgg': dataset = VGGFace2(args.val, args.v_list, args.v_land, landmarks_training=True) elif args.dataset == 'celeb': dataset = CelebA(args.val, args.v_land, test=True) else: dataset = NDG(args.val, args.v_land) if dataset.have_landmarks: log.info('Use alignment for the train data') dataset.transform = t.Compose([Rescale((48, 48)), ToTensor(switch_rb=True)]) else: exit() val_loader = DataLoader(dataset, batch_size=args.val_batch_size, num_workers=4, shuffle=False, pin_memory=True) model = models_landmarks['landnet'] assert args.snapshot is not None log.info('Testing snapshot ' + args.snapshot + ' ...') model = load_model_state(model, args.snapshot, args.device, eval_state=True) model.eval() cudnn.benchmark = True model = torch.nn.DataParallel(model, device_ids=[args.device], ) log.info('Face landmarks model:') log.info(model) avg_err, per_point_avg_err, failures_rate = evaluate(val_loader, model) log.info('Avg RMSE error: {}'.format(avg_err)) log.info('Per landmark RMSE error: {}'.format(per_point_avg_err)) log.info('Failure rate: {}'.format(failures_rate)) def main(): """Creates a cl parser""" parser = argparse.ArgumentParser(description='Evaluation script for landmarks detection network') parser.add_argument('--device', '-d', default=0, type=int) parser.add_argument('--val_data_root', dest='val', required=True, type=str, help='Path to val data.') parser.add_argument('--val_list', dest='v_list', required=False, type=str, help='Path to test data image list.') parser.add_argument('--val_landmarks', dest='v_land', default='', required=False, type=str, help='Path to landmarks for test images.') parser.add_argument('--val_batch_size', type=int, default=1, help='Validation batch size.') parser.add_argument('--snapshot', type=str, default=None, help='Snapshot to evaluate.') parser.add_argument('--dataset', choices=['vgg', 'celeb', 'ngd'], type=str, default='vgg', help='Dataset.') arguments = parser.parse_args() with torch.cuda.device(arguments.device): start_evaluation(arguments) if __name__ == '__main__': main() ================================================ FILE: evaluate_lfw.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import argparse import datetime from functools import partial import cv2 as cv import torch import torch.nn.functional as F from torch.utils.data import DataLoader from torchvision import transforms as t from scipy.spatial.distance import cosine import glog as log from tqdm import tqdm import numpy as np from tensorboardX import SummaryWriter from datasets.lfw import LFW from utils.utils import load_model_state, get_model_parameters_number, flip_tensor from utils.augmentation import ResizeNumpy, CenterCropNumpy, NumpyToTensor from utils.face_align import FivePointsAligner from model.common import models_backbones def get_subset(container, subset_bounds): """Returns a subset of the given list with respect to the list of bounds""" subset = [] for bound in subset_bounds: subset += container[bound[0]: bound[1]] return subset def get_roc(scores_with_gt, n_threshs=400): """Computes a ROC cureve on the LFW dataset""" thresholds = np.linspace(0., 4., n_threshs) fp_rates = [] tp_rates = [] for threshold in thresholds: fp = 0 tp = 0 for score_with_gt in scores_with_gt: predict_same = score_with_gt['score'] < threshold actual_same = score_with_gt['is_same'] if predict_same and actual_same: tp += 1 elif predict_same and not actual_same: fp += 1 fp_rates.append(float(fp) / len(scores_with_gt) * 2) tp_rates.append(float(tp) / len(scores_with_gt) * 2) return np.array(fp_rates), np.array(tp_rates) def get_auc(fprs, tprs): """Computes AUC under a ROC curve""" sorted_fprs, sorted_tprs = zip(*sorted(zip(*(fprs, tprs)))) sorted_fprs = list(sorted_fprs) sorted_tprs = list(sorted_tprs) if sorted_fprs[-1] != 1.0: sorted_fprs.append(1.0) sorted_tprs.append(sorted_tprs[-1]) return np.trapz(sorted_tprs, sorted_fprs) def save_roc(fp_rates, tp_rates, fname): assert fp_rates.shape[0] == tp_rates.shape[0] with open(fname + '.txt', 'w') as f: for i in range(fp_rates.shape[0]): f.write('{} {}\n'.format(fp_rates[i], tp_rates[i])) @torch.no_grad() def compute_embeddings_lfw(args, dataset, model, batch_size, dump_embeddings=False, pdist=lambda x, y: 1. - F.cosine_similarity(x, y), flipped_embeddings=False): """Computes embeddings of all images from the LFW dataset using PyTorch""" val_loader = DataLoader(dataset, batch_size=batch_size, num_workers=4, shuffle=False) scores_with_gt = [] embeddings = [] ids = [] for batch_idx, data in enumerate(tqdm(val_loader, 'Computing embeddings')): images_1 = data['img1'] images_2 = data['img2'] is_same = data['is_same'] if torch.cuda.is_available() and args.devices[0] != -1: images_1 = images_1.cuda() images_2 = images_2.cuda() emb_1 = model(images_1) emb_2 = model(images_2) if flipped_embeddings: images_1_flipped = flip_tensor(images_1, 3) images_2_flipped = flip_tensor(images_2, 3) emb_1_flipped = model(images_1_flipped) emb_2_flipped = model(images_2_flipped) emb_1 = (emb_1 + emb_1_flipped)*.5 emb_2 = (emb_2 + emb_2_flipped)*.5 scores = pdist(emb_1, emb_2).data.cpu().numpy() for i, _ in enumerate(scores): scores_with_gt.append({'score': scores[i], 'is_same': is_same[i], 'idx': batch_idx*batch_size + i}) if dump_embeddings: id0 = data['id0'] id1 = data['id1'] ids.append(id0) ids.append(id1) to_dump_1 = emb_1.data.cpu() to_dump_2 = emb_2.data.cpu() embeddings.append(to_dump_1) embeddings.append(to_dump_2) if dump_embeddings: total_emb = np.concatenate(embeddings, axis=0) total_ids = np.concatenate(ids, axis=0) log_path = './logs/{:%Y_%m_%d_%H_%M}'.format(datetime.datetime.now()) writer = SummaryWriter(log_path) writer.add_embedding(torch.from_numpy(total_emb), total_ids) return scores_with_gt def compute_embeddings_lfw_ie(args, dataset, model, batch_size=1, dump_embeddings=False, pdist=cosine, flipped_embeddings=False, lm_model=None): """Computes embeddings of all images from the LFW dataset using Inference Engine""" assert batch_size == 1 scores_with_gt = [] for batch_idx, data in enumerate(tqdm(dataset, 'Computing embeddings')): images_1 = data['img1'] images_2 = data['img2'] if lm_model: lm_input_size = tuple(lm_model.get_input_shape()[2:]) landmarks_1 = lm_model.forward(cv.resize(images_1, lm_input_size)).reshape(-1) images_1 = FivePointsAligner.align(images_1, landmarks_1, *images_1.shape[:2], normalize=False, show=False) landmarks_2 = lm_model.forward(cv.resize(images_2, lm_input_size)).reshape(-1) images_2 = FivePointsAligner.align(images_2, landmarks_2, *images_2.shape[:2], normalize=False) is_same = data['is_same'] emb_1 = model.forward(images_1).reshape(-1) emb_2 = model.forward(images_2).reshape(-1) score = pdist(emb_1, emb_2) scores_with_gt.append({'score': score, 'is_same': is_same, 'idx': batch_idx * batch_size}) return scores_with_gt def compute_optimal_thresh(scores_with_gt): """Computes an optimal threshold for pairwise face verification""" pos_scores = [] neg_scores = [] for score_with_gt in scores_with_gt: if score_with_gt['is_same']: pos_scores.append(score_with_gt['score']) else: neg_scores.append(score_with_gt['score']) hist_pos, bins = np.histogram(np.array(pos_scores), 60) hist_neg, _ = np.histogram(np.array(neg_scores), bins) intersection_bins = [] for i in range(1, len(hist_neg)): if hist_pos[i - 1] >= hist_neg[i - 1] and 0.05 < hist_pos[i] <= hist_neg[i]: intersection_bins.append(bins[i]) if not intersection_bins: intersection_bins.append(0.5) return np.mean(intersection_bins) def evaluate(args, dataset, model, compute_embeddings_fun, val_batch_size=16, dump_embeddings=False, roc_fname='', snap_name='', verbose=True, show_failed=False): """Computes the LFW score of given model""" if verbose and isinstance(model, torch.nn.Module): log.info('Face recognition model config:') log.info(model) log.info('Number of parameters: {}'.format(get_model_parameters_number(model))) scores_with_gt = compute_embeddings_fun(args, dataset, model, val_batch_size, dump_embeddings) num_pairs = len(scores_with_gt) subsets = [] for i in range(10): lower_bnd = i * num_pairs // 10 upper_bnd = (i + 1) * num_pairs // 10 subset_test = [(lower_bnd, upper_bnd)] subset_train = [(0, lower_bnd), (upper_bnd, num_pairs)] subsets.append({'test': subset_test, 'train': subset_train}) same_scores = [] diff_scores = [] val_scores = [] threshs = [] mean_fpr = np.zeros(400) mean_tpr = np.zeros(400) failed_pairs = [] for subset in tqdm(subsets, '{} evaluation'.format(snap_name), disable=not verbose): train_list = get_subset(scores_with_gt, subset['train']) optimal_thresh = compute_optimal_thresh(train_list) threshs.append(optimal_thresh) test_list = get_subset(scores_with_gt, subset['test']) same_correct = 0 diff_correct = 0 pos_pairs_num = neg_pairs_num = len(test_list) // 2 for score_with_gt in test_list: if score_with_gt['score'] < optimal_thresh and score_with_gt['is_same']: same_correct += 1 elif score_with_gt['score'] >= optimal_thresh and not score_with_gt['is_same']: diff_correct += 1 if score_with_gt['score'] >= optimal_thresh and score_with_gt['is_same']: failed_pairs.append(score_with_gt['idx']) if score_with_gt['score'] < optimal_thresh and not score_with_gt['is_same']: failed_pairs.append(score_with_gt['idx']) same_scores.append(float(same_correct) / pos_pairs_num) diff_scores.append(float(diff_correct) / neg_pairs_num) val_scores.append(0.5*(same_scores[-1] + diff_scores[-1])) fprs, tprs = get_roc(test_list, mean_fpr.shape[0]) mean_fpr = mean_fpr + fprs mean_tpr = mean_tpr + tprs mean_fpr /= 10 mean_tpr /= 10 if roc_fname: save_roc(mean_tpr, mean_fpr, roc_fname) same_acc = np.mean(same_scores) diff_acc = np.mean(diff_scores) overall_acc = np.mean(val_scores) auc = get_auc(mean_fpr, mean_tpr) if show_failed: log.info('Number of misclassified pairs: {}'.format(len(failed_pairs))) for pair in failed_pairs: dataset.show_item(pair) if verbose: log.info('Accuracy/Val_same_accuracy mean: {0:.4f}'.format(same_acc)) log.info('Accuracy/Val_diff_accuracy mean: {0:.4f}'.format(diff_acc)) log.info('Accuracy/Val_accuracy mean: {0:.4f}'.format(overall_acc)) log.info('Accuracy/Val_accuracy std dev: {0:.4f}'.format(np.std(val_scores))) log.info('AUC: {0:.4f}'.format(auc)) log.info('Estimated threshold: {0:.4f}'.format(np.mean(threshs))) return same_acc, diff_acc, overall_acc, auc def load_test_dataset(arguments): """Loads and configures the LFW dataset""" input_size = models_backbones[arguments.model].get_input_res() lfw = LFW(arguments.val, arguments.v_list, arguments.v_land) assert lfw.use_landmarks log.info('Using landmarks for the LFW images.') transform = t.Compose([ResizeNumpy(input_size), NumpyToTensor(switch_rb=True)]) lfw.transform = transform return lfw, partial(compute_embeddings_lfw, flipped_embeddings=arguments.flipped_emb) def main(): parser = argparse.ArgumentParser(description='Evaluation script for Face Recognition in PyTorch') parser.add_argument('--devices', type=int, nargs='+', default=[0], help='CUDA devices to use.') parser.add_argument('--embed_size', type=int, default=128, help='Size of the face embedding.') parser.add_argument('--val_data_root', dest='val', required=True, type=str, help='Path to validation data.') parser.add_argument('--val_list', dest='v_list', required=True, type=str, help='Path to train data image list.') parser.add_argument('--val_landmarks', dest='v_land', default='', required=False, type=str, help='Path to landmarks for the test images.') parser.add_argument('--val_batch_size', type=int, default=8, help='Validation batch size.') parser.add_argument('--snap', type=str, required=False, help='Snapshot to evaluate.') parser.add_argument('--roc_fname', type=str, default='', help='ROC file.') parser.add_argument('--dump_embeddings', action='store_true', help='Dump embeddings to summary writer.') parser.add_argument('--dist', choices=['l2', 'cos'], type=str, default='cos', help='Distance.') parser.add_argument('--flipped_emb', action='store_true', help='Flipped embedding concatenation trick.') parser.add_argument('--show_failed', action='store_true', help='Show misclassified pairs.') parser.add_argument('--model', choices=models_backbones.keys(), type=str, default='rmnet', help='Model type.') parser.add_argument('--engine', choices=['pt', 'ie'], type=str, default='pt', help='Framework to use for eval.') # IE-related options parser.add_argument('--fr_model', type=str, required=False) parser.add_argument('--lm_model', type=str, required=False) parser.add_argument('-pp', '--plugin_dir', type=str, default=None, help='Path to a plugin folder') args = parser.parse_args() if args.engine == 'pt': assert args.snap is not None, 'To evaluate PyTorch snapshot, please, specify --snap option.' with torch.cuda.device(args.devices[0]): data, embeddings_fun = load_test_dataset(args) model = models_backbones[args.model](embedding_size=args.embed_size, feature=True) model = load_model_state(model, args.snap, args.devices[0]) evaluate(args, data, model, embeddings_fun, args.val_batch_size, args.dump_embeddings, args.roc_fname, args.snap, True, args.show_failed) else: from utils.ie_tools import load_ie_model assert args.fr_model is not None, 'To evaluate IE model, please, specify --fr_model option.' fr_model = load_ie_model(args.fr_model, 'CPU', args.plugin_dir) lm_model = None if args.lm_model: lm_model = load_ie_model(args.lm_model, 'CPU', args.plugin_dir) input_size = tuple(fr_model.get_input_shape()[2:]) lfw = LFW(args.val, args.v_list, args.v_land) if not lfw.use_landmarks or lm_model: lfw.transform = t.Compose([ResizeNumpy(220), CenterCropNumpy(input_size)]) lfw.use_landmarks = False else: log.info('Using landmarks for the LFW images.') lfw.transform = t.Compose([ResizeNumpy(input_size)]) evaluate(args, lfw, fr_model, partial(compute_embeddings_lfw_ie, lm_model=lm_model), val_batch_size=1, dump_embeddings=False, roc_fname='', snap_name='', verbose=True, show_failed=False) if __name__ == '__main__': main() ================================================ FILE: init_venv.sh ================================================ #!/usr/bin/env bash work_dir=$(realpath "$(dirname $0)") cd ${work_dir} if [[ -e venv ]]; then echo "Please remove a previously virtual environment folder '${work_dir}/venv'." exit fi # Create virtual environment virtualenv venv -p python3 --prompt="(deep=fr) " echo "export PYTHONPATH=\$PYTHONPATH:${work_dir}" >> venv/bin/activate . venv/bin/activate pip install -r ${work_dir}/requirements.txt echo echo "====================================================" echo "To start to work, you need to activate a virtualenv:" echo "$ . venv/bin/activate" echo "====================================================" ================================================ FILE: losses/__init__.py ================================================ ================================================ FILE: losses/alignment.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import math import torch import torch.nn as nn VALID_CORE_FUNC_TYPES = ['l1', 'l2', 'wing'] def wing_core(abs_x, w, eps): """Calculates the wing function from https://arxiv.org/pdf/1711.06753.pdf""" return w*math.log(1. + abs_x / eps) class AlignmentLoss(nn.Module): """Regression loss to train landmarks model""" def __init__(self, loss_type='l2'): super(AlignmentLoss, self).__init__() assert loss_type in VALID_CORE_FUNC_TYPES self.uniform_weights = True self.weights = None self.core_func_type = loss_type self.eps = 0.031 self.w = 0.156 def set_weights(self, weights): """Set weights for the each landmark point in loss""" self.uniform_weights = False self.weights = torch.FloatTensor(weights).cuda() def forward(self, input_values, target): bs = input_values.shape[0] loss = input_values - target n_points = loss.shape[1] // 2 loss = loss.view(-1, n_points, 2) if self.core_func_type == 'l2': loss = torch.norm(loss, p=2, dim=2) loss = loss.pow(2) eyes_dist = (torch.norm(target[:, 0:2] - target[:, 2:4], p=2, dim=1).reshape(-1)).pow_(2) elif self.core_func_type == 'l1': loss = torch.norm(loss, p=1, dim=2) eyes_dist = (torch.norm(target[:, 0:2] - target[:, 2:4], p=1, dim=1).reshape(-1)) elif self.core_func_type == 'wing': wing_const = self.w - wing_core(self.w, self.w, self.eps) loss = torch.abs(loss) loss[loss < wing_const] = self.w*torch.log(1. + loss[loss < wing_const] / self.eps) loss[loss >= wing_const] -= wing_const loss = torch.sum(loss, 2) eyes_dist = (torch.norm(target[:, 0:2] - target[:, 2:4], p=1, dim=1).reshape(-1)) if self.uniform_weights: loss = torch.sum(loss, 1) loss /= n_points else: assert self.weights.shape[0] == loss.shape[1] loss = torch.mul(loss, self.weights) loss = torch.sum(loss, 1) loss = torch.div(loss, eyes_dist) loss = torch.sum(loss) return loss / (2.*bs) ================================================ FILE: losses/am_softmax.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import math import torch import torch.nn as nn import torch.nn.functional as F from torch.nn import Parameter class AngleSimpleLinear(nn.Module): """Computes cos of angles between input vectors and weights vectors""" def __init__(self, in_features, out_features): super(AngleSimpleLinear, self).__init__() self.in_features = in_features self.out_features = out_features self.weight = Parameter(torch.Tensor(in_features, out_features)) self.weight.data.uniform_(-1, 1).renorm_(2, 1, 1e-5).mul_(1e5) def forward(self, x): cos_theta = F.normalize(x, dim=1).mm(F.normalize(self.weight, dim=0)) return cos_theta.clamp(-1, 1) def focal_loss(input_values, gamma): """Computes the focal loss""" p = torch.exp(-input_values) loss = (1 - p) ** gamma * input_values return loss.mean() class AMSoftmaxLoss(nn.Module): """Computes the AM-Softmax loss with cos or arc margin""" margin_types = ['cos', 'arc'] def __init__(self, margin_type='cos', gamma=0., m=0.5, s=30, t=1.): super(AMSoftmaxLoss, self).__init__() assert margin_type in AMSoftmaxLoss.margin_types self.margin_type = margin_type assert gamma >= 0 self.gamma = gamma assert m > 0 self.m = m assert s > 0 self.s = s self.cos_m = math.cos(m) self.sin_m = math.sin(m) self.th = math.cos(math.pi - m) assert t >= 1 self.t = t def forward(self, cos_theta, target): if self.margin_type == 'cos': phi_theta = cos_theta - self.m else: sine = torch.sqrt(1.0 - torch.pow(cos_theta, 2)) phi_theta = cos_theta * self.cos_m - sine * self.sin_m #cos(theta+m) phi_theta = torch.where(cos_theta > self.th, phi_theta, cos_theta - self.sin_m * self.m) index = torch.zeros_like(cos_theta, dtype=torch.uint8) index.scatter_(1, target.data.view(-1, 1), 1) output = torch.where(index, phi_theta, cos_theta) if self.gamma == 0 and self.t == 1.: return F.cross_entropy(self.s*output, target) if self.t > 1: h_theta = self.t - 1 + self.t*cos_theta support_vecs_mask = (1 - index) * \ torch.lt(torch.masked_select(phi_theta, index).view(-1, 1).repeat(1, h_theta.shape[1]) - cos_theta, 0) output = torch.where(support_vecs_mask, h_theta, output) return F.cross_entropy(self.s*output, target) return focal_loss(F.cross_entropy(self.s*output, target, reduction='none'), self.gamma) ================================================ FILE: losses/centroid_based.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import torch.nn as nn import torch import torch.nn.functional as F import numpy as np class CenterLoss(nn.Module): """Implements the Center loss from https://ydwen.github.io/papers/WenECCV16.pdf""" def __init__(self, num_classes, embed_size, cos_dist=True): super().__init__() self.cos_dist = cos_dist self.num_classes = num_classes self.centers = nn.Parameter(torch.randn(self.num_classes, embed_size).cuda()) self.embed_size = embed_size self.mse = nn.MSELoss(reduction='elementwise_mean') def get_centers(self): """Returns estimated centers""" return self.centers def forward(self, features, labels): features = F.normalize(features) batch_size = labels.size(0) features_dim = features.size(1) assert features_dim == self.embed_size if self.cos_dist: self.centers.data = F.normalize(self.centers.data, p=2, dim=1) centers_batch = self.centers[labels, :] if self.cos_dist: cos_sim = nn.CosineSimilarity() cos_diff = 1. - cos_sim(features, centers_batch) center_loss = torch.sum(cos_diff) / batch_size else: center_loss = self.mse(centers_batch, features) return center_loss class MinimumMargin(nn.Module): """Implements the Minimum margin loss from https://arxiv.org/abs/1805.06741""" def __init__(self, margin=.6): super().__init__() self.margin = margin def forward(self, centers, labels): loss_value = 0 batch_centers = centers[labels, :] labels = labels.cpu().data.numpy() all_pairs = labels.reshape([-1, 1]) != labels.reshape([1, -1]) valid_pairs = (all_pairs * np.tri(*all_pairs.shape, k=-1, dtype=np.bool)).astype(np.float32) losses = 1. - torch.mm(batch_centers, torch.t(batch_centers)) - self.margin valid_pairs *= (losses.cpu().data.numpy() > 0.0) num_valid = float(np.sum(valid_pairs)) if num_valid > 0: loss_value = torch.sum(losses * torch.from_numpy(valid_pairs).cuda()) else: return loss_value return loss_value / num_valid class GlobalPushPlus(nn.Module): """Implements the Global Push Plus loss""" def __init__(self, margin=.6): super().__init__() self.min_margin = 0.15 self.max_margin = margin self.num_calls = 0 def forward(self, features, centers, labels): self.num_calls += 1 features = F.normalize(features) loss_value = 0 batch_centers = centers[labels, :] labels = labels.cpu().data.numpy() assert len(labels.shape) == 1 center_ids = np.arange(centers.shape[0], dtype=np.int32) different_class_pairs = labels.reshape([-1, 1]) != center_ids.reshape([1, -1]) pos_distances = 1.0 - torch.sum(features * batch_centers, dim=1) neg_distances = 1.0 - torch.mm(features, torch.t(centers)) margin = self.min_margin + float(self.num_calls) / float(40000) * (self.max_margin - self.min_margin) margin = min(margin, self.max_margin) losses = margin + pos_distances.view(-1, 1) - neg_distances valid_pairs = (different_class_pairs * (losses.cpu().data.numpy() > 0.0)).astype(np.float32) num_valid = float(np.sum(valid_pairs)) if num_valid > 0: loss_value = torch.sum(losses * torch.from_numpy(valid_pairs).cuda()) else: return loss_value return loss_value / num_valid class PushPlusLoss(nn.Module): """Implements the Push Plus loss""" def __init__(self, margin=.7): super().__init__() self.margin = margin def forward(self, features, centers, labels): features = F.normalize(features) loss_value = 0 batch_centers = centers[labels, :] labels = labels.cpu().data.numpy() assert len(labels.shape) == 1 all_pairs = labels.reshape([-1, 1]) != labels.reshape([1, -1]) pos_distances = 1.0 - torch.sum(features * batch_centers, dim=1) neg_distances = 1.0 - torch.mm(features, torch.t(features)) losses = self.margin + pos_distances.view(-1, 1) - neg_distances valid_pairs = (all_pairs * (losses.cpu().data.numpy() > 0.0)).astype(np.float32) num_valid = float(np.sum(valid_pairs)) if num_valid > 0: loss_value = torch.sum(losses * torch.from_numpy(valid_pairs).cuda()) else: return loss_value return loss_value / num_valid class PushLoss(nn.Module): """Implements the Push loss""" def __init__(self, soft=True, margin=0.5): super().__init__() self.soft = soft self.margin = margin def forward(self, features, labels): features = F.normalize(features) loss_value = 0 labels = labels.cpu().data.numpy() assert len(labels.shape) == 1 all_pairs = labels.reshape([-1, 1]) != labels.reshape([1, -1]) valid_pairs = (all_pairs * np.tri(*all_pairs.shape, k=-1, dtype=np.bool)).astype(np.float32) if self.soft: losses = torch.log(1. + torch.exp(torch.mm(features, torch.t(features)) - 1)) num_valid = float(np.sum(valid_pairs)) else: losses = self.margin - (1. - torch.mm(features, torch.t(features))) valid_pairs *= (losses.cpu().data.numpy() > 0.0) num_valid = float(np.sum(valid_pairs)) if num_valid > 0: loss_value = torch.sum(losses * torch.from_numpy(valid_pairs).cuda()) else: return loss_value return loss_value / num_valid ================================================ FILE: losses/metric_losses.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import torch from losses.centroid_based import CenterLoss, PushLoss, MinimumMargin, PushPlusLoss, GlobalPushPlus class MetricLosses: """Class-aggregator for all metric-learning losses""" def __init__(self, classes_num, embed_size, writer): self.writer = writer self.center_loss = CenterLoss(classes_num, embed_size, cos_dist=True) self.optimizer_centloss = torch.optim.SGD(self.center_loss.parameters(), lr=0.5) self.center_coeff = 0.0 self.push_loss = PushLoss(soft=False, margin=0.7) self.push_loss_coeff = 0.0 self.push_plus_loss = PushPlusLoss(margin=0.7) self.push_plus_loss_coeff = 0.0 self.glob_push_plus_loss = GlobalPushPlus(margin=0.7) self.glob_push_plus_loss_coeff = 0.0 self.min_margin_loss = MinimumMargin(margin=.7) self.min_margin_loss_coeff = 0.0 def __call__(self, features, labels, epoch_num, iteration): log_string = '' center_loss_val = 0 if self.center_coeff > 0.: center_loss_val = self.center_loss(features, labels) self.writer.add_scalar('Loss/center_loss', center_loss_val, iteration) log_string += ' Center loss: %.4f' % center_loss_val push_loss_val = 0 if self.push_loss_coeff > 0.0: push_loss_val = self.push_loss(features, labels) self.writer.add_scalar('Loss/push_loss', push_loss_val, iteration) log_string += ' Push loss: %.4f' % push_loss_val push_plus_loss_val = 0 if self.push_plus_loss_coeff > 0.0 and self.center_coeff > 0.0: push_plus_loss_val = self.push_plus_loss(features, self.center_loss.get_centers(), labels) self.writer.add_scalar('Loss/push_plus_loss', push_plus_loss_val, iteration) log_string += ' Push Plus loss: %.4f' % push_plus_loss_val glob_push_plus_loss_val = 0 if self.glob_push_plus_loss_coeff > 0.0 and self.center_coeff > 0.0: glob_push_plus_loss_val = self.glob_push_plus_loss(features, self.center_loss.get_centers(), labels) self.writer.add_scalar('Loss/global_push_plus_loss', glob_push_plus_loss_val, iteration) log_string += ' Global Push Plus loss: %.4f' % glob_push_plus_loss_val min_margin_loss_val = 0 if self.min_margin_loss_coeff > 0.0 and self.center_coeff > 0.0: min_margin_loss_val = self.min_margin_loss(self.center_loss.get_centers(), labels) self.writer.add_scalar('Loss/min_margin_loss', min_margin_loss_val, iteration) log_string += ' Min margin loss: %.4f' % min_margin_loss_val loss_value = self.center_coeff * center_loss_val + self.push_loss_coeff * push_loss_val + \ self.push_plus_loss_coeff * push_plus_loss_val + self.min_margin_loss_coeff * min_margin_loss_val \ + self.glob_push_plus_loss_coeff * glob_push_plus_loss_val if self.min_margin_loss_coeff + self.center_coeff + self.push_loss_coeff + self.push_plus_loss_coeff > 0.: self.writer.add_scalar('Loss/AUX_losses', loss_value, iteration) return loss_value, log_string def init_iteration(self): """Initializes a training iteration""" if self.center_coeff > 0.: self.optimizer_centloss.zero_grad() def end_iteration(self): """Finalizes a training iteration""" if self.center_coeff > 0.: for param in self.center_loss.parameters(): param.grad.data *= (1. / self.center_coeff) self.optimizer_centloss.step() ================================================ FILE: losses/regularizer.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import torch import torch.nn.functional as F def l2_reg_ortho(mdl): """ Function used for Orthogonal Regularization. """ l2_reg = None for w in mdl.parameters(): if w.ndimension() < 2: continue else: cols = w[0].numel() w1 = w.view(-1, cols) wt = torch.transpose(w1, 0, 1) m = torch.matmul(wt, w1) ident = torch.eye(cols, cols).cuda() w_tmp = (m - ident) height = w_tmp.size(0) u = F.normalize(w_tmp.new_empty(height).normal_(0, 1), dim=0, eps=1e-12) v = F.normalize(torch.matmul(w_tmp.t(), u), dim=0, eps=1e-12) u = F.normalize(torch.matmul(w_tmp, v), dim=0, eps=1e-12) sigma = torch.dot(u, torch.matmul(w_tmp, v)) if l2_reg is None: l2_reg = (torch.norm(sigma, 2))**2 else: l2_reg += (torch.norm(sigma, 2))**2 return l2_reg class ODecayScheduler(): """Scheduler for the decay of the orthogonal regularizer""" def __init__(self, schedule, initial_decay, mult_factor): assert len(schedule) > 1 self.schedule = schedule self.epoch_num = 0 self.mult_factor = mult_factor self.decay = initial_decay def step(self): """Switches to the next step""" self.epoch_num += 1 if self.epoch_num in self.schedule: self.decay *= self.mult_factor if self.epoch_num == self.schedule[-1]: self.decay = 0.0 def get_decay(self): """Returns the current value of decay according to th schedule""" return self.decay ================================================ FILE: model/__init__.py ================================================ ================================================ FILE: model/backbones/__init__.py ================================================ ================================================ FILE: model/backbones/resnet.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import torch.nn as nn from model.blocks.resnet_blocks import Bottleneck, BasicBlock from model.blocks.shared_blocks import make_activation class ResNet(nn.Module): def __init__(self, block, layers, num_classes=1000, activation=nn.ReLU): self.inplanes = 64 super(ResNet, self).__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = make_activation(nn.ReLU) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0], activation=activation) self.layer2 = self._make_layer(block, 128, layers[1], stride=2, activation=activation) self.layer3 = self._make_layer(block, 256, layers[2], stride=2, activation=activation) self.layer4 = self._make_layer(block, 512, layers[3], stride=2, activation=activation) self.avgpool = nn.Conv2d(512 * block.expansion, 512 * block.expansion, 7, groups=512 * block.expansion, bias=False) self.fc = nn.Conv2d(512 * block.expansion, num_classes, 1, stride=1, padding=0, bias=False) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out') elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) def _make_layer(self, block, planes, blocks, stride=1, activation=nn.ReLU): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(planes * block.expansion), ) layers = [] layers.append(block(self.inplanes, planes, stride, downsample, activation=activation)) self.inplanes = planes * block.expansion for _ in range(1, blocks): layers.append(block(self.inplanes, planes, activation=activation)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) x = self.avgpool(x) x = self.fc(x) return x def resnet50(**kwargs): model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) return model def resnet34(**kwargs): model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) return model ================================================ FILE: model/backbones/rmnet.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ from collections import OrderedDict import torch.nn as nn from ..blocks.rmnet_blocks import RMBlock class RMNetBody(nn.Module): def __init__(self, block=RMBlock, blocks_per_stage=(None, 4, 8, 10, 11), trunk_width=(32, 32, 64, 128, 256), bottleneck_width=(None, 8, 16, 32, 64)): super(RMNetBody, self).__init__() assert len(blocks_per_stage) == len(trunk_width) == len(bottleneck_width) self.dim_out = trunk_width[-1] stages = [nn.Sequential(OrderedDict([ ('data_bn', nn.BatchNorm2d(3)), ('conv1', nn.Conv2d(3, trunk_width[0], kernel_size=3, stride=2, padding=1, bias=False)), ('bn1', nn.BatchNorm2d(trunk_width[0])), ('relu1', nn.ReLU(inplace=True))])), ] for i, (blocks_num, w, wb) in enumerate(zip(blocks_per_stage, trunk_width, bottleneck_width)): # Zeroth stage is already added. if i == 0: continue stage = [] # Do not downscale input to the first stage. if i > 1: stage.append(block(trunk_width[i - 1], wb, w, downsample=True)) for _ in range(blocks_num): stage.append(block(w, wb, w)) stages.append(nn.Sequential(*stage)) self.stages = nn.Sequential(OrderedDict([('stage_{}'.format(i), stage) for i, stage in enumerate(stages)])) self.init_weights() def init_weights(self): m = self.stages[0][0] # ['data_bn'] nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) m = self.stages[0][1] # ['conv1'] nn.init.kaiming_normal_(m.weight, mode='fan_out') m = self.stages[0][2] # ['bn1'] nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) # All other blocks should be initialized internally during instantiation. def forward(self, x): return self.stages(x) ================================================ FILE: model/backbones/se_resnet.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import math import torch.nn as nn from model.blocks.se_resnet_blocks import SEBottleneck class SEResNet(nn.Module): def __init__(self, block, layers, num_classes=1000, activation=nn.ReLU): self.inplanes = 64 super(SEResNet, self).__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0], activation=activation) self.layer2 = self._make_layer(block, 128, layers[1], stride=2, activation=activation) self.layer3 = self._make_layer(block, 256, layers[2], stride=2, activation=activation) self.layer4 = self._make_layer(block, 512, layers[3], stride=2, activation=activation) self.avgpool = nn.Conv2d(512 * block.expansion, 512 * block.expansion, 7, groups=512 * block.expansion, bias=False) self.fc = nn.Conv2d(512 * block.expansion, num_classes, 1, stride=1, padding=0, bias=False) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() def _make_layer(self, block, planes, blocks, stride=1, activation=nn.ReLU): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(planes * block.expansion), ) layers = [] layers.append(block(self.inplanes, planes, stride, downsample, activation=activation)) self.inplanes = planes * block.expansion for _ in range(1, blocks): layers.append(block(self.inplanes, planes, activation=activation)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) x = self.avgpool(x) x = self.fc(x) return x def se_resnet50(**kwargs): model = SEResNet(SEBottleneck, [3, 4, 6, 3], **kwargs) return model def se_resnet101(**kwargs): model = SEResNet(SEBottleneck, [3, 4, 23, 3], **kwargs) return model def se_resnet152(**kwargs): model = SEResNet(SEBottleneck, [3, 8, 36, 3], **kwargs) return model ================================================ FILE: model/backbones/se_resnext.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import math import torch.nn as nn from model.blocks.se_resnext_blocks import SEBottleneckX class SEResNeXt(nn.Module): def __init__(self, block, layers, cardinality=32, num_classes=1000): super(SEResNeXt, self).__init__() self.cardinality = cardinality self.inplanes = 64 self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) self.avgpool = nn.AdaptiveAvgPool2d(1) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) if m.bias is not None: m.bias.data.zero_() elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() def _make_layer(self, block, planes, blocks, stride=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(planes * block.expansion), ) layers = [] layers.append(block(self.inplanes, planes, self.cardinality, stride, downsample)) self.inplanes = planes * block.expansion for _ in range(1, blocks): layers.append(block(self.inplanes, planes, self.cardinality)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) x = self.avgpool(x) x = x.view(x.size(0), -1) x = self.fc(x) return x def se_resnext50(**kwargs): model = SEResNeXt(SEBottleneckX, [3, 4, 6, 3], **kwargs) return model def se_resnext101(**kwargs): model = SEResNeXt(SEBottleneckX, [3, 4, 23, 3], **kwargs) return model def se_resnext152(**kwargs): model = SEResNeXt(SEBottleneckX, [3, 8, 36, 3], **kwargs) return model ================================================ FILE: model/backbones/shufflenet_v2.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import torch.nn as nn from model.blocks.shufflenet_v2_blocks import ShuffleInvertedResidual, conv_bn, conv_1x1_bn class ShuffleNetV2Body(nn.Module): def __init__(self, input_size=224, width_mult=1.): super(ShuffleNetV2Body, self).__init__() assert input_size % 32 == 0 self.stage_repeats = [4, 8, 4] if width_mult == 0.5: self.stage_out_channels = [-1, 24, 48, 96, 192, 1024] elif width_mult == 1.0: self.stage_out_channels = [-1, 24, 116, 232, 464, 1024] elif width_mult == 1.5: self.stage_out_channels = [-1, 24, 176, 352, 704, 1024] elif width_mult == 2.0: self.stage_out_channels = [-1, 24, 224, 488, 976, 2048] else: raise ValueError("Unsupported width multiplier") # building first layer self.bn_first = nn.BatchNorm2d(3) input_channel = self.stage_out_channels[1] self.conv1 = conv_bn(3, input_channel, 2) self.features = [] # building inverted residual blocks for idxstage in range(len(self.stage_repeats)): numrepeat = self.stage_repeats[idxstage] output_channel = self.stage_out_channels[idxstage+2] for i in range(numrepeat): if i == 0: self.features.append(ShuffleInvertedResidual(input_channel, output_channel, 2, 2, activation=nn.PReLU)) else: self.features.append(ShuffleInvertedResidual(input_channel, output_channel, 1, 1, activation=nn.PReLU)) input_channel = output_channel self.features = nn.Sequential(*self.features) self.conv_last = conv_1x1_bn(input_channel, self.stage_out_channels[-1], activation=nn.PReLU) self.init_weights() @staticmethod def get_downscale_factor(): return 16 def init_weights(self): m = self.bn_first nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) def get_num_output_channels(self): return self.stage_out_channels[-1] def forward(self, x): x = self.conv1(self.bn_first(x)) x = self.features(x) x = self.conv_last(x) return x ================================================ FILE: model/blocks/__init__.py ================================================ ================================================ FILE: model/blocks/mobilenet_v2_blocks.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import torch.nn as nn from model.blocks.shared_blocks import SELayer class InvertedResidual(nn.Module): """Implementation of the modified Inverted residual block""" def __init__(self, in_channels, out_channels, stride, expand_ratio, outp_size=None): super(InvertedResidual, self).__init__() self.stride = stride assert stride in [1, 2] self.use_res_connect = self.stride == 1 and in_channels == out_channels self.inv_block = nn.Sequential( nn.Conv2d(in_channels, in_channels * expand_ratio, 1, 1, 0, bias=False), nn.BatchNorm2d(in_channels * expand_ratio), nn.PReLU(), nn.Conv2d(in_channels * expand_ratio, in_channels * expand_ratio, 3, stride, 1, groups=in_channels * expand_ratio, bias=False), nn.BatchNorm2d(in_channels * expand_ratio), nn.PReLU(), nn.Conv2d(in_channels * expand_ratio, out_channels, 1, 1, 0, bias=False), nn.BatchNorm2d(out_channels), SELayer(out_channels, 8, nn.PReLU, outp_size) ) def forward(self, x): if self.use_res_connect: return x + self.inv_block(x) return self.inv_block(x) ================================================ FILE: model/blocks/resnet_blocks.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import torch.nn as nn from model.blocks.shared_blocks import make_activation class Bottleneck(nn.Module): expansion = 4 def __init__(self, inplanes, planes, stride=1, downsample=None, activation=nn.ReLU): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.act1 = make_activation(activation) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.act2 = make_activation(activation) self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(planes * self.expansion) self.act3 = make_activation(activation) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.act1(out) out = self.conv2(out) out = self.bn2(out) out = self.act2(out) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.act3(out) return out class BasicBlock(nn.Module): expansion = 1 def __init__(self, inplanes, planes, stride=1, downsample=None, activation=nn.ReLU): super(BasicBlock, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.relu = make_activation(activation) self.conv2 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=1, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out ================================================ FILE: model/blocks/rmnet_blocks.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import torch.nn as nn import torch.nn.functional as F from model.blocks.shared_blocks import make_activation class RMBlock(nn.Module): def __init__(self, input_planes, squeeze_planes, output_planes, downsample=False, dropout_ratio=0.1, activation=nn.ELU): super(RMBlock, self).__init__() self.downsample = downsample self.input_planes = input_planes self.output_planes = output_planes self.squeeze_conv = nn.Conv2d(input_planes, squeeze_planes, kernel_size=1, bias=False) self.squeeze_bn = nn.BatchNorm2d(squeeze_planes) self.dw_conv = nn.Conv2d(squeeze_planes, squeeze_planes, groups=squeeze_planes, kernel_size=3, padding=1, stride=2 if downsample else 1, bias=False) self.dw_bn = nn.BatchNorm2d(squeeze_planes) self.expand_conv = nn.Conv2d(squeeze_planes, output_planes, kernel_size=1, bias=False) self.expand_bn = nn.BatchNorm2d(output_planes) self.activation = make_activation(activation) self.dropout_ratio = dropout_ratio if self.downsample: self.skip_conv = nn.Conv2d(input_planes, output_planes, kernel_size=1, bias=False) self.skip_conv_bn = nn.BatchNorm2d(output_planes) self.init_weights() def init_weights(self): for m in self.children(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out') elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) def forward(self, x): residual = x out = self.activation(self.squeeze_bn(self.squeeze_conv(x))) out = self.activation(self.dw_bn(self.dw_conv(out))) out = self.expand_bn(self.expand_conv(out)) if self.dropout_ratio > 0: out = F.dropout(out, p=self.dropout_ratio, training=self.training, inplace=True) if self.downsample: residual = F.max_pool2d(x, kernel_size=2, stride=2, padding=0) residual = self.skip_conv(residual) residual = self.skip_conv_bn(residual) out += residual return self.activation(out) ================================================ FILE: model/blocks/se_resnet_blocks.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import torch.nn as nn from model.blocks.shared_blocks import make_activation class SEBottleneck(nn.Module): expansion = 4 def __init__(self, inplanes, planes, stride=1, downsample=None, activation=nn.ReLU): super(SEBottleneck, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(planes * 4) self.relu = make_activation(activation) # SE self.global_pool = nn.AdaptiveAvgPool2d(1) self.conv_down = nn.Conv2d(planes * 4, planes // 4, kernel_size=1, bias=False) self.conv_up = nn.Conv2d(planes // 4, planes * 4, kernel_size=1, bias=False) self.sig = nn.Sigmoid() # Downsample self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) out1 = self.global_pool(out) out1 = self.conv_down(out1) out1 = self.relu(out1) out1 = self.conv_up(out1) out1 = self.sig(out1) if self.downsample is not None: residual = self.downsample(x) res = out1 * out + residual res = self.relu(res) return res ================================================ FILE: model/blocks/se_resnext_blocks.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import torch.nn as nn from model.blocks.shared_blocks import SELayer class SEBottleneckX(nn.Module): expansion = 4 def __init__(self, inplanes, planes, cardinality, stride=1, downsample=None): super(SEBottleneckX, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes * 2) self.conv2 = nn.Conv2d(planes * 2, planes * 2, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False) self.bn2 = nn.BatchNorm2d(planes * 2) self.conv3 = nn.Conv2d(planes * 2, planes * 4, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(planes * 4) self.selayer = SELayer(planes * 4, 16, nn.ReLU) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) out = self.selayer(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out ================================================ FILE: model/blocks/shared_blocks.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import torch import torch.nn as nn def make_activation(activation): """Factory for activation functions""" if activation != nn.PReLU: return activation(inplace=True) return activation() class SELayer(nn.Module): """Implementation of the Squeeze-Excitaion layer from https://arxiv.org/abs/1709.01507""" def __init__(self, inplanes, squeeze_ratio=8, activation=nn.PReLU, size=None): super(SELayer, self).__init__() assert squeeze_ratio >= 1 assert inplanes > 0 if size is not None: self.global_avgpool = nn.AvgPool2d(size) else: self.global_avgpool = nn.AdaptiveAvgPool2d(1) self.conv1 = nn.Conv2d(inplanes, int(inplanes / squeeze_ratio), kernel_size=1, stride=1) self.conv2 = nn.Conv2d(int(inplanes / squeeze_ratio), inplanes, kernel_size=1, stride=1) self.relu = make_activation(activation) self.sigmoid = nn.Sigmoid() def forward(self, x): out = self.global_avgpool(x) out = self.conv1(out) out = self.relu(out) out = self.conv2(out) out = self.sigmoid(out) return x * out class ScaleFilter(nn.Module): """Implementaion of the ScaleFilter regularizer""" def __init__(self, q): super(ScaleFilter, self).__init__() assert 0 < q < 1 self.q = q def forward(self, x): if not self.training: return x scale_factors = 1. + self.q \ - 2*self.q*torch.rand(x.shape[1], 1, 1, dtype=torch.float32, requires_grad=False).to(x.device) return x * scale_factors ================================================ FILE: model/blocks/shufflenet_v2_blocks.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import torch import torch.nn as nn from model.blocks.shared_blocks import make_activation def conv_bn(inp, oup, stride, activation=nn.ReLU): conv = nn.Sequential( nn.Conv2d(inp, oup, 3, stride, 1, bias=False), nn.BatchNorm2d(oup), make_activation(activation) ) nn.init.kaiming_normal_(conv[0].weight, mode='fan_out') return conv def conv_1x1_bn(inp, oup, activation=nn.ReLU): conv = nn.Sequential( nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup), make_activation(activation) ) nn.init.kaiming_normal_(conv[0].weight, mode='fan_out') return conv def channel_shuffle(x, groups): batchsize, num_channels, height, width = x.data.size() channels_per_group = num_channels // groups # reshape x = x.view(batchsize, groups, channels_per_group, height, width) x = torch.transpose(x, 1, 2).contiguous() # flatten x = x.view(batchsize, -1, height, width) return x class ShuffleInvertedResidual(nn.Module): def __init__(self, inp, oup, stride, benchmodel, activation=nn.ReLU): super(ShuffleInvertedResidual, self).__init__() self.benchmodel = benchmodel self.stride = stride assert stride in [1, 2] oup_inc = oup//2 if self.benchmodel == 1: # assert inp == oup_inc self.branch2 = nn.Sequential( # pw nn.Conv2d(oup_inc, oup_inc, 1, 1, 0, bias=False), nn.BatchNorm2d(oup_inc), make_activation(activation), # dw nn.Conv2d(oup_inc, oup_inc, 3, stride, 1, groups=oup_inc, bias=False), nn.BatchNorm2d(oup_inc), # pw-linear nn.Conv2d(oup_inc, oup_inc, 1, 1, 0, bias=False), nn.BatchNorm2d(oup_inc), make_activation(activation), ) else: self.branch1 = nn.Sequential( # dw nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), nn.BatchNorm2d(inp), # pw-linear nn.Conv2d(inp, oup_inc, 1, 1, 0, bias=False), nn.BatchNorm2d(oup_inc), make_activation(activation), ) self.branch2 = nn.Sequential( # pw nn.Conv2d(inp, oup_inc, 1, 1, 0, bias=False), nn.BatchNorm2d(oup_inc), make_activation(activation), # dw nn.Conv2d(oup_inc, oup_inc, 3, stride, 1, groups=oup_inc, bias=False), nn.BatchNorm2d(oup_inc), # pw-linear nn.Conv2d(oup_inc, oup_inc, 1, 1, 0, bias=False), nn.BatchNorm2d(oup_inc), make_activation(activation), ) self.init_weights() @staticmethod def _concat(x, out): # concatenate along channel axis return torch.cat((x, out), 1) def init_weights(self): for m in self.children(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out') elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) def forward(self, x): if self.benchmodel == 1: x1 = x[:, :(x.shape[1]//2), :, :] x2 = x[:, (x.shape[1]//2):, :, :] out = self._concat(x1, self.branch2(x2)) elif self.benchmodel == 2: out = self._concat(self.branch1(x), self.branch2(x)) return channel_shuffle(out, 2) ================================================ FILE: model/common.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ from abc import abstractmethod import torch.nn as nn class ModelInterface(nn.Module): """Abstract class for models""" @abstractmethod def set_dropout_ratio(self, ratio): """Sets dropout ratio of the model""" @abstractmethod def get_input_res(self): """Returns input resolution""" from .rmnet_angular import RMNetAngular from .mobilefacenet import MobileFaceNet from .landnet import LandmarksNet from .resnet_angular import ResNetAngular from .se_resnet_angular import SEResNetAngular from .shufflenet_v2_angular import ShuffleNetV2Angular models_backbones = {'rmnet': RMNetAngular, 'mobilenet': MobileFaceNet, 'resnet': ResNetAngular, 'shufflenetv2': ShuffleNetV2Angular, 'se_resnet': SEResNetAngular} models_landmarks = {'landnet': LandmarksNet} ================================================ FILE: model/landnet.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import torch.nn as nn from .common import ModelInterface class LandmarksNet(ModelInterface): """Facial landmarks localization network""" def __init__(self): super(LandmarksNet, self).__init__() self.bn_first = nn.BatchNorm2d(3) activation = nn.PReLU self.landnet = nn.Sequential( nn.Conv2d(3, 16, kernel_size=3, padding=1), activation(), nn.MaxPool2d(2, stride=2), nn.BatchNorm2d(16), nn.Conv2d(16, 32, kernel_size=3, padding=1), activation(), nn.MaxPool2d(2, stride=2), nn.BatchNorm2d(32), nn.Conv2d(32, 64, kernel_size=3, padding=1), activation(), nn.MaxPool2d(2, stride=2), nn.BatchNorm2d(64), nn.Conv2d(64, 64, kernel_size=3, padding=1), activation(), nn.BatchNorm2d(64), nn.Conv2d(64, 128, kernel_size=3, padding=1), activation(), nn.BatchNorm2d(128) ) # dw pooling self.bottleneck_size = 256 self.pool = nn.Sequential( nn.Conv2d(128, 128, kernel_size=6, padding=0, groups=128), activation(), nn.BatchNorm2d(128), nn.Conv2d(128, self.bottleneck_size, kernel_size=1, padding=0), activation(), nn.BatchNorm2d(self.bottleneck_size), ) # Regressor for 5 landmarks (10 coordinates) self.fc_loc = nn.Sequential( nn.Conv2d(self.bottleneck_size, 64, kernel_size=1), activation(), nn.Conv2d(64, 10, kernel_size=1), nn.Sigmoid() ) def forward(self, x): xs = self.landnet(self.bn_first(x)) xs = self.pool(xs) xs = self.fc_loc(xs) return xs def get_input_res(self): return 48, 48 def set_dropout_ratio(self, ratio): pass ================================================ FILE: model/mobilefacenet.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import math import torch.nn as nn from losses.am_softmax import AngleSimpleLinear from model.blocks.mobilenet_v2_blocks import InvertedResidual from model.blocks.shared_blocks import make_activation from .common import ModelInterface def init_block(in_channels, out_channels, stride, activation=nn.PReLU): """Builds the first block of the MobileFaceNet""" return nn.Sequential( nn.BatchNorm2d(3), nn.Conv2d(in_channels, out_channels, 3, stride, 1, bias=False), nn.BatchNorm2d(out_channels), make_activation(activation) ) class MobileFaceNet(ModelInterface): """Implements modified MobileFaceNet from https://arxiv.org/abs/1804.07573""" def __init__(self, embedding_size=128, num_classes=1, width_multiplier=1., feature=True): super(MobileFaceNet, self).__init__() assert embedding_size > 0 assert num_classes > 0 assert width_multiplier > 0 self.feature = feature # Set up of inverted residual blocks inverted_residual_setting = [ # t, c, n, s [2, 64, 5, 2], [4, 128, 1, 2], [2, 128, 6, 1], [4, 128, 1, 2], [2, 128, 2, 1] ] first_channel_num = 64 last_channel_num = 512 self.features = [init_block(3, first_channel_num, 2)] self.features.append(nn.Conv2d(first_channel_num, first_channel_num, 3, 1, 1, groups=first_channel_num, bias=False)) self.features.append(nn.BatchNorm2d(64)) self.features.append(nn.PReLU()) # Inverted Residual Blocks in_channel_num = first_channel_num size_h, size_w = MobileFaceNet.get_input_res() size_h, size_w = size_h // 2, size_w // 2 for t, c, n, s in inverted_residual_setting: output_channel = int(c * width_multiplier) for i in range(n): if i == 0: size_h, size_w = size_h // s, size_w // s self.features.append(InvertedResidual(in_channel_num, output_channel, s, t, outp_size=(size_h, size_w))) else: self.features.append(InvertedResidual(in_channel_num, output_channel, 1, t, outp_size=(size_h, size_w))) in_channel_num = output_channel # 1x1 expand block self.features.append(nn.Sequential(nn.Conv2d(in_channel_num, last_channel_num, 1, 1, 0, bias=False), nn.BatchNorm2d(last_channel_num), nn.PReLU())) self.features = nn.Sequential(*self.features) # Depth-wise pooling k_size = (MobileFaceNet.get_input_res()[0] // 16, MobileFaceNet.get_input_res()[1] // 16) self.dw_pool = nn.Conv2d(last_channel_num, last_channel_num, k_size, groups=last_channel_num, bias=False) self.dw_bn = nn.BatchNorm2d(last_channel_num) self.conv1_extra = nn.Conv2d(last_channel_num, embedding_size, 1, stride=1, padding=0, bias=False) if not self.feature: self.fc_angular = AngleSimpleLinear(embedding_size, num_classes) self.init_weights() def forward(self, x): x = self.features(x) x = self.dw_bn(self.dw_pool(x)) x = self.conv1_extra(x) if self.feature or not self.training: return x x = x.view(x.size(0), -1) y = self.fc_angular(x) return x, y @staticmethod def get_input_res(): return 128, 128 def set_dropout_ratio(self, ratio): assert 0 <= ratio < 1. def init_weights(self): """Initializes weights of the model before training""" for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) if m.bias is not None: m.bias.data.zero_() elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): n = m.weight.size(1) m.weight.data.normal_(0, 0.01) m.bias.data.zero_() ================================================ FILE: model/resnet_angular.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import torch.nn as nn from losses.am_softmax import AngleSimpleLinear from model.backbones.resnet import resnet50 from .common import ModelInterface class ResNetAngular(ModelInterface): """Face reid head for the ResNet architecture""" def __init__(self, embedding_size=128, num_classes=0, feature=True): super(ResNetAngular, self).__init__() self.bn_first = nn.BatchNorm2d(3) self.feature = feature self.model = resnet50(num_classes=embedding_size, activation=nn.PReLU) self.embedding_size = embedding_size if not self.feature: self.fc_angular = AngleSimpleLinear(self.embedding_size, num_classes) def forward(self, x): x = self.bn_first(x) x = self.model(x) if self.feature or not self.training: return x x = x.view(x.size(0), -1) y = self.fc_angular(x) return x, y @staticmethod def get_input_res(): return 112, 112 def set_dropout_ratio(self, ratio): assert 0 <= ratio < 1. ================================================ FILE: model/rmnet_angular.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import torch.nn as nn from losses.am_softmax import AngleSimpleLinear from model.backbones.rmnet import RMNetBody from model.blocks.rmnet_blocks import RMBlock from .common import ModelInterface class RMNetAngular(ModelInterface): """Face reid head for the ResMobNet architecture. See https://arxiv.org/pdf/1812.02465.pdf for details about the ResMobNet backbone.""" def __init__(self, embedding_size, num_classes=0, feature=True, body=RMNetBody): super(RMNetAngular, self).__init__() self.feature = feature self.backbone = body() self.global_pooling = nn.MaxPool2d((8, 8)) self.conv1_extra = nn.Conv2d(256, embedding_size, 1, stride=1, padding=0, bias=False) if not feature: self.fc_angular = AngleSimpleLinear(embedding_size, num_classes) def forward(self, x): x = self.backbone(x) x = self.global_pooling(x) x = self.conv1_extra(x) if self.feature or not self.training: return x x = x.view(x.size(0), -1) y = self.fc_angular(x) return x, y def set_dropout_ratio(self, ratio): assert 0 <= ratio < 1. for m in self.backbone.modules(): if isinstance(m, RMBlock): m.dropout_ratio = ratio @staticmethod def get_input_res(): return 128, 128 ================================================ FILE: model/se_resnet_angular.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import torch.nn as nn from losses.am_softmax import AngleSimpleLinear from model.backbones.se_resnet import se_resnet50 from .common import ModelInterface class SEResNetAngular(ModelInterface): """Face reid head for the SE ResNet architecture""" def __init__(self, embedding_size=128, num_classes=0, feature=True): super(SEResNetAngular, self).__init__() self.bn_first = nn.BatchNorm2d(3) self.feature = feature self.model = se_resnet50(num_classes=embedding_size, activation=nn.PReLU) self.embedding_size = embedding_size if not self.feature: self.fc_angular = AngleSimpleLinear(self.embedding_size, num_classes) def forward(self, x): x = self.bn_first(x) x = self.model(x) if self.feature or not self.training: return x x = x.view(x.size(0), -1) y = self.fc_angular(x) return x, y @staticmethod def get_input_res(): return 112, 112 def set_dropout_ratio(self, ratio): assert 0 <= ratio < 1. ================================================ FILE: model/shufflenet_v2_angular.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import torch.nn as nn from losses.am_softmax import AngleSimpleLinear from model.backbones.shufflenet_v2 import ShuffleNetV2Body from .common import ModelInterface class ShuffleNetV2Angular(ModelInterface): """Face reid head for the ShuffleNetV2 architecture""" def __init__(self, embedding_size, num_classes=0, feature=True, body=ShuffleNetV2Body, **kwargs): super(ShuffleNetV2Angular, self).__init__() self.feature = feature kwargs['input_size'] = ShuffleNetV2Angular.get_input_res()[0] kwargs['width_mult'] = 1. self.backbone = body(**kwargs) k_size = int(kwargs['input_size'] / self.backbone.get_downscale_factor()) self.global_pool = nn.Conv2d(self.backbone.stage_out_channels[-1], self.backbone.stage_out_channels[-1], (k_size, k_size), groups=self.backbone.stage_out_channels[-1], bias=False) self.conv1_extra = nn.Conv2d(self.backbone.get_num_output_channels(), embedding_size, 1, padding=0, bias=False) if not feature: self.fc_angular = AngleSimpleLinear(embedding_size, num_classes) def forward(self, x): x = self.backbone(x) x = self.global_pool(x) x = self.conv1_extra(x) if self.feature or not self.training: return x x = x.view(x.size(0), -1) y = self.fc_angular(x) return x, y def set_dropout_ratio(self, ratio): assert 0 <= ratio < 1. @staticmethod def get_input_res(): res = 128 return res, res ================================================ FILE: requirements.txt ================================================ glog==0.3.1 numpy==1.15.4 opencv-python==3.4.4.19 Pillow==5.3.0 protobuf==3.6.1 python-gflags==3.1.2 scipy==1.1.0 six==1.11.0 tensorboardX==1.4 torch==0.4.1 torchvision==0.2.1 tqdm==4.28.1 pyyaml>=3.12 ptflops==0.1 ================================================ FILE: scripts/__init__.py ================================================ ================================================ FILE: scripts/accuracy_check.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import argparse import glog as log import numpy as np import torch from tqdm import tqdm import cv2 as cv from utils.utils import load_model_state from utils.ie_tools import load_ie_model from model.common import models_backbones, models_landmarks def main(): """Runs the accuracy check""" parser = argparse.ArgumentParser(description='Accuracy check script (pt vs caffe)') parser.add_argument('--embed_size', type=int, default=128, help='Size of the face embedding.') parser.add_argument('--snap', type=str, required=True, help='Snapshot to convert.') parser.add_argument('--device', '-d', default=0, type=int, help='Device for model placement.') parser.add_argument('--model', choices=list(models_backbones.keys()) + list(models_landmarks.keys()), type=str, default='rmnet') # IE-related options parser.add_argument('--ie_model', type=str, required=True) parser.add_argument("-l", "--cpu_extension", help="MKLDNN (CPU)-targeted custom layers.Absolute path to a shared library with the kernels " "impl.", type=str, default=None) parser.add_argument("-pp", "--plugin_dir", help="Path to a plugin folder", type=str, default=None) parser.add_argument("-d_ie", "--device_ie", help="Specify the target device to infer on; CPU, GPU, FPGA or MYRIAD is acceptable. Sample " "will look for a suitable plugin for device specified (CPU by default)", default="CPU", type=str) args = parser.parse_args() max_err = 0. with torch.cuda.device(args.device): if args.model in models_landmarks.keys(): pt_model = models_landmarks[args.model] else: pt_model = models_backbones[args.model](embedding_size=args.embed_size, feature=True) pt_model = load_model_state(pt_model, args.snap, args.device) ie_model = load_ie_model(args.ie_model, args.device_ie, args.plugin_dir, args.cpu_extension) np.random.seed(0) for _ in tqdm(range(100)): input_img = np.random.randint(0, high=255, size=(*pt_model.get_input_res(), 3), dtype=np.uint8) input_bgr = cv.cvtColor(input_img, cv.COLOR_BGR2RGB) input_pt = torch.unsqueeze(torch.from_numpy(input_img.transpose(2, 0, 1).astype('float32') / 255.).cuda(), dim=0) pt_output = (pt_model(input_pt)).data.cpu().numpy().reshape(1, -1) ie_output = ie_model.forward(input_bgr).reshape(1, -1) max_err = max(np.linalg.norm(pt_output - ie_output, np.inf), max_err) log.info('Max l_inf error: %e', max_err) if __name__ == '__main__': main() ================================================ FILE: scripts/align_images.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import argparse import os import os.path as osp import json import cv2 as cv import torch from tqdm import tqdm from torchvision.transforms import transforms from model import landnet from utils import utils from utils import augmentation from utils.face_align import FivePointsAligner class LandnetPT: """Wrapper for landmarks regression model""" def __init__(self, model): self.net = model self.transformer = transforms.Compose( [augmentation.ResizeNumpy((48, 48)), augmentation.NumpyToTensor(switch_rb=True)]) def get_landmarks(self, batch): converted_batch = [] for item in batch: converted_batch.append(self.transformer(item)) pt_blob = torch.stack(converted_batch).cuda() landmarks = self.net(pt_blob) return landmarks.data.cpu().numpy() class FaceDetector: """Wrapper class for face detector""" def __init__(self, proto, model, conf=.6, expand_ratio=(1.1, 1.05), size=(300, 300)): self.net = cv.dnn.readNetFromCaffe(proto, model) self.net.setPreferableBackend(cv.dnn.DNN_BACKEND_DEFAULT) self.net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) last_layer_id = self.net.getLayerId(self.net.getLayerNames()[-1]) last_layer = self.net.getLayer(last_layer_id) assert last_layer.type == 'DetectionOutput' self.confidence = conf self.expand_ratio = expand_ratio self.det_res = size def __decode_detections(self, out, frame_shape): """Decodes raw SSD output""" frame_height = frame_shape[0] frame_width = frame_shape[1] detections = [] for detection in out[0, 0]: confidence = detection[2] if confidence > self.confidence: left = int(max(detection[3], 0) * frame_width) top = int(max(detection[4], 0) * frame_height) right = int(max(detection[5], 0) * frame_width) bottom = int(max(detection[6], 0) * frame_height) if self.expand_ratio != (1., 1.): w = (right - left) h = (bottom - top) dw = w * (self.expand_ratio[0] - 1.) / 2 dh = h * (self.expand_ratio[1] - 1.) / 2 left = max(int(left - dw), 0) right = int(right + dw) top = max(int(top - dh), 0) bottom = int(bottom + dh) # classId = int(detection[1]) - 1 # Skip background label detections.append(((left, top, right, bottom), confidence)) if len(detections) > 1: detections.sort(key=lambda x: x[1], reverse=True) return detections def get_detections(self, frame): """Returns all detections on frame""" blob = cv.dnn.blobFromImage(frame, 1., (self.det_res[0], self.det_res[1]), crop=False) self.net.setInput(blob) out = self.net.forward() detections = self.__decode_detections(out, frame.shape) return detections def draw_detections(frame, detections, landmarks): """Draw detections and landmarks on a frame""" for _, rect in enumerate(detections): left, top, right, bottom = rect cv.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), thickness=2) for point in landmarks.reshape(-1, 2): point = (int(left + point[0] * (right - left)), int(top + point[1] * (bottom - top))) cv.circle(frame, point, 5, (255, 0, 0), -1) return frame def run_dumping(images_list, face_det, landmarks_regressor, vis_flag): """Dumps detections and landmarks from images""" detected_num = 0 data = [] for path in tqdm(images_list, 'Dumping data'): image = cv.imread(path, cv.IMREAD_COLOR) if image is None: continue detections = face_det.get_detections(image) landmarks = None if detections: left, top, right, bottom = detections[0][0] roi = image[top:bottom, left:right] landmarks = landmarks_regressor.get_landmarks([roi]).reshape(-1) data.append({'path': path, 'bbox': detections[0][0], 'landmarks': landmarks}) detected_num += 1 if vis_flag: FivePointsAligner.align(roi, landmarks, d_size=(200,200), normalize=False, show=True) else: data.append({'path': path, 'bbox': None, 'landmarks': None}) print('Detection ratio: ', float(detected_num) / len(data)) return data def create_images_list(images_root, imgs_list): input_filenames = [] input_dir = os.path.abspath(images_root) if imgs_list is None: stop = False for path, _, files in os.walk(input_dir): if stop: break for name in files: if name.lower().endswith('.jpg') or name.lower().endswith('.png') \ or name.lower().endswith('.jpeg') or name.lower().endswith('.gif') \ or not '.' in name: filename = os.path.join(path, name) input_filenames.append(filename) else: with open(imgs_list) as f: data = json.load(f) for path in data['path']: filename = osp.join(images_root, path) input_filenames.append(filename) return input_filenames def save_data(data, filename, root_dir): print('Saving data...') with open(filename, 'w') as f: for instance in data: line = osp.relpath(instance['path'], start=root_dir) + ' | ' if instance['bbox'] is not None: for x in instance['landmarks']: line += str(x) + ' ' line += ' | ' left, top, right, bottom = instance['bbox'] line += str(left) + ' ' + str(top) + ' ' + str(right - left) + ' ' + str(bottom - top) f.write(line.strip() + '\n') def main(): parser = argparse.ArgumentParser(description='') parser.add_argument('--images_root', type=str, default=None, required=True) parser.add_argument('--images_list', type=str, default=None, required=False) parser.add_argument('--fd_proto', type=str, default='../demo/face_detector/deploy_fd.prototxt', help='') parser.add_argument('--fd_model', type=str, default='../demo/face_detector/sq_300x300_iter_120000.caffemodel', help='') parser.add_argument('--fr_thresh', type=float, default=0.1) parser.add_argument('--det_res', type=int, nargs=2, default=[300, 300], help='Detection net input resolution.') parser.add_argument('--landnet_model', type=str) parser.add_argument('--device', type=int, default=0) parser.add_argument('--visualize', action='store_true') args = parser.parse_args() face_detector = FaceDetector(args.fd_proto, args.fd_model, conf=args.fr_thresh, size=args.det_res) with torch.cuda.device(args.device): landmarks_regressor = utils.load_model_state(landnet.LandmarksNet(), args.landnet_model, args.device) data = run_dumping(create_images_list(args.images_root, args.images_list), face_detector, LandnetPT(landmarks_regressor), args.visualize) save_data(data, osp.join(args.images_root, 'list.txt'), args.images_root) if __name__ == '__main__': main() ================================================ FILE: scripts/count_flops.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import argparse import torch from model.common import models_backbones, models_landmarks from ptflops import get_model_complexity_info def main(): """Runs flops counter""" parser = argparse.ArgumentParser(description='Evaluation script for Face Recognition in PyTorch') parser.add_argument('--embed_size', type=int, default=128, help='Size of the face embedding.') parser.add_argument('--model', choices=list(models_backbones.keys()) + list(models_landmarks.keys()), type=str, default='rmnet') args = parser.parse_args() with torch.no_grad(): if args.model in models_landmarks.keys(): model = models_landmarks[args.model]() else: model = models_backbones[args.model](embedding_size=args.embed_size, feature=True) flops, params = get_model_complexity_info(model, model.get_input_res(), as_strings=True, print_per_layer_stat=True) print('Flops: {}'.format(flops)) print('Params: {}'.format(params)) if __name__ == '__main__': main() ================================================ FILE: scripts/matio.py ================================================ # pylint: skip-file import struct import numpy as np cv_type_to_dtype = { 5 : np.dtype('float32'), 6 : np.dtype('float64') } dtype_to_cv_type = {v : k for k,v in cv_type_to_dtype.items()} def write_mat(f, m): """Write mat m to file f""" if len(m.shape) == 1: rows = m.shape[0] cols = 1 else: rows, cols = m.shape header = struct.pack('iiii', rows, cols, cols * 4, dtype_to_cv_type[m.dtype]) f.write(header) f.write(m.data) def read_mat(f): """ Reads an OpenCV mat from the given file opened in binary mode """ rows, cols, stride, type_ = struct.unpack('iiii', f.read(4*4)) mat = np.fromstring(f.read(rows*stride),dtype=cv_type_to_dtype[type_]) return mat.reshape(rows,cols) def read_mkl_vec(f): """ Reads an OpenCV mat from the given file opened in binary mode """ # Read past the header information f.read(4*4) length, stride, type_ = struct.unpack('iii', f.read(3*4)) mat = np.fromstring(f.read(length*4),dtype=np.float32) return mat def load_mkl_vec(filename): """ Reads a OpenCV Mat from the given filename """ return read_mkl_vec(open(filename,'rb')) def load_mat(filename): """ Reads a OpenCV Mat from the given filename """ return read_mat(open(filename,'rb')) def save_mat(filename, m): """Saves mat m to the given filename""" return write_mat(open(filename,'wb'), m) def main(): f = open('1_to_0.bin','rb') vx = read_mat(f) vy = read_mat(f) if __name__ == '__main__': main() ================================================ FILE: scripts/plot_roc_curves_lfw.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import argparse import matplotlib.pyplot as plt from evaluate_lfw import get_auc def main(): parser = argparse.ArgumentParser(description='') parser.add_argument('rocs', metavar='ROCs', type=str, nargs='+', help='paths to roc curves') args = parser.parse_args() plt.xlabel("False Positive Rate") plt.ylabel("True Positive Rate") plt.grid(b=True, which='major', color='k', linestyle='-') plt.grid(b=True, which='minor', color='k', linestyle='-', alpha=0.2) plt.minorticks_on() for curve_file in args.rocs: fprs = [] tprs = [] with open(curve_file, 'r') as f: for line in f.readlines(): values = line.strip().split() fprs.append(float(values[1])) tprs.append(float(values[0])) curve_name = curve_file.split('/')[-1].split('.')[0] plt.plot(fprs, tprs, label=curve_name) plt.legend(loc='best', fontsize=10) print('AUC for {}: {}'.format(curve_name, get_auc(fprs, tprs))) plt.show() if __name__ == '__main__': main() ================================================ FILE: scripts/pytorch2onnx.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import argparse import torch from utils.utils import load_model_state from model.common import models_backbones, models_landmarks def main(): parser = argparse.ArgumentParser(description='Conversion script for FR models from PyTorch to ONNX') parser.add_argument('--embed_size', type=int, default=128, help='Size of the face embedding.') parser.add_argument('--snap', type=str, required=True, help='Snapshot to convert.') parser.add_argument('--device', '-d', default=-1, type=int, help='Device for model placement.') parser.add_argument('--output_dir', default='./', type=str, help='Output directory.') parser.add_argument('--model', choices=list(models_backbones.keys()) + list(models_landmarks.keys()), type=str, default='rmnet') args = parser.parse_args() if args.model in models_landmarks.keys(): model = models_landmarks[args.model]() else: model = models_backbones[args.model](embedding_size=args.embed_size, feature=True) model = load_model_state(model, args.snap, args.device, eval_state=True) input_var = torch.rand(1, 3, *model.get_input_res()) dump_name = args.snap[args.snap.rfind('/') + 1:-3] torch.onnx.export(model, input_var, dump_name + '.onnx', verbose=True, export_params=True) if __name__ == '__main__': main() ================================================ FILE: tests/__init__.py ================================================ ================================================ FILE: tests/test_alignment.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import unittest import cv2 as cv import numpy as np from utils.face_align import FivePointsAligner from utils.landmarks_augmentation import RandomRotate class FaceAlignmentTests(unittest.TestCase): """Tests for alignment methods""" def test_align_image(self): """Synthetic test for alignment function""" image = np.zeros((128, 128, 3), dtype=np.float32) for point in FivePointsAligner.ref_landmarks: point_scaled = point * [128, 128] cv.circle(image, tuple(point_scaled.astype(np.int)), 5, (255, 255, 255), cv.FILLED) transform = RandomRotate(40., p=1.) rotated_data = transform({'img': image, 'landmarks': FivePointsAligner.ref_landmarks}) aligned_image = FivePointsAligner.align(rotated_data['img'], \ rotated_data['landmarks'].reshape(-1), d_size=(128, 128), normalized=True) for point in FivePointsAligner.ref_landmarks: point_scaled = (point * [128, 128]).astype(np.int) check_sum = np.mean(aligned_image[point_scaled[1] - 3 : point_scaled[1] + 3, point_scaled[0] - 3 : point_scaled[0] + 3]) self.assertGreaterEqual(check_sum, 220) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/test_models.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import unittest import os import torch from model.common import models_backbones, models_landmarks from utils.utils import save_model_cpu, load_model_state class BackbonesTests(unittest.TestCase): """Tests for backbones""" def test_output_shape(self): """Checks output shape""" embed_size = 256 for model_type in models_backbones.values(): model = model_type(embedding_size=embed_size, feature=True).eval() batch = torch.Tensor(1, 3, *model.get_input_res()).uniform_() output = model(batch) self.assertEqual(list(output.shape), list((1, embed_size, 1, 1))) def test_save_load_snap(self): """Checks an ability to save and load model correctly""" embed_size = 256 snap_name = os.path.join(os.getcwd(), 'test_snap.pt') for model_type in models_backbones.values(): model = model_type(embedding_size=embed_size, feature=True).eval() batch = torch.Tensor(1, 3, *model.get_input_res()).uniform_() output = model(batch) save_model_cpu(model, None, snap_name, 0, write_solverstate=False) model_loaded = model_type(embedding_size=embed_size, feature=True) load_model_state(model_loaded, snap_name, -1, eval_state=True) output_loaded = model_loaded(batch) self.assertEqual(torch.norm(output - output_loaded), 0) class LandnetTests(unittest.TestCase): """Tests for landmark regressor""" def test_output_shape(self): """Checks output shape""" model = models_landmarks['landnet']().eval() batch = torch.Tensor(1, 3, *model.get_input_res()) output = model(batch) self.assertEqual(list(output.shape), list((1, 10, 1, 1))) def test_save_load_snap(self): """Checks an ability to save and load model correctly""" snap_name = os.path.join(os.getcwd(), 'test_snap.pt') model = models_landmarks['landnet']().eval() batch = torch.Tensor(1, 3, *model.get_input_res()).uniform_() output = model(batch) save_model_cpu(model, None, snap_name, 0, write_solverstate=False) model_loaded = models_landmarks['landnet']() load_model_state(model_loaded, snap_name, -1, eval_state=True) output_loaded = model_loaded(batch) self.assertEqual(torch.norm(output - output_loaded), 0) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/test_utils.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import unittest import torch from utils.utils import get_model_parameters_number class UtilsTests(unittest.TestCase): """Tests for utils""" def test_parameters_counter(self): """Checks output of get_model_parameters_number""" class ParamsHolder(torch.nn.Module): """Dummy parameters holder""" def __init__(self, n_params): super(ParamsHolder, self).__init__() self.p1 = torch.nn.Parameter(torch.Tensor(n_params // 2)) self.p2 = torch.nn.Parameter(torch.Tensor(n_params // 2)) self.dummy = -1 params_num = 1000 module = ParamsHolder(params_num) estimated_params = get_model_parameters_number(module, as_string=False) self.assertEqual(estimated_params, params_num) if __name__ == '__main__': unittest.main() ================================================ FILE: train.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import argparse import datetime import os.path as osp import os from pprint import pformat import glog as log import torch import torch.optim as optim from torch.utils.data import DataLoader import torch.backends.cudnn as cudnn from torchvision import transforms as t from tensorboardX import SummaryWriter from datasets import LFW, VGGFace2, MSCeleb1M, IMDBFace, TrillionPairs from losses.am_softmax import AMSoftmaxLoss from losses.metric_losses import MetricLosses from evaluate_lfw import evaluate, compute_embeddings_lfw from utils.utils import load_model_state, save_model_cpu import utils.augmentation as augm from utils.parser_yaml import ArgumentParserWithYaml from model.common import models_backbones def train(args): """Performs training of a face recognition network""" input_size = models_backbones[args.model].get_input_res() if args.train_dataset == 'vgg': assert args.t_list dataset = VGGFace2(args.train, args.t_list, args.t_land) elif args.train_dataset == 'imdbface': dataset = IMDBFace(args.train, args.t_list) elif args.train_dataset == 'trp': dataset = TrillionPairs(args.train, args.t_list) else: dataset = MSCeleb1M(args.train, args.t_list) if dataset.have_landmarks: log.info('Use alignment for the train data') dataset.transform = t.Compose([augm.HorizontalFlipNumpy(p=.5), augm.CutOutWithPrior(p=0.05, max_area=0.1), augm.RandomRotationNumpy(10, p=.95), augm.ResizeNumpy(input_size), augm.BlurNumpy(k=5, p=.2), augm.NumpyToTensor(switch_rb=True)]) else: dataset.transform = t.Compose([augm.ResizeNumpy(input_size), augm.HorizontalFlipNumpy(), augm.RandomRotationNumpy(10), augm.NumpyToTensor(switch_rb=True)]) if args.weighted: train_weights = dataset.get_weights() train_weights = torch.DoubleTensor(train_weights) sampler = torch.utils.data.sampler.WeightedRandomSampler(train_weights, len(train_weights)) train_loader = torch.utils.data.DataLoader(dataset, batch_size=args.train_batch_size, sampler=sampler, num_workers=3, pin_memory=False) else: train_loader = DataLoader(dataset, batch_size=args.train_batch_size, num_workers=4, shuffle=True) lfw = LFW(args.val, args.v_list, args.v_land) if lfw.use_landmarks: log.info('Use alignment for the test data') lfw.transform = t.Compose([augm.ResizeNumpy(input_size), augm.NumpyToTensor(switch_rb=True)]) else: lfw.transform = t.Compose([augm.ResizeNumpy((160, 160)), augm.CenterCropNumpy(input_size), augm.NumpyToTensor(switch_rb=True)]) log_path = './logs/{:%Y_%m_%d_%H_%M}_{}'.format(datetime.datetime.now(), args.snap_prefix) writer = SummaryWriter(log_path) if not osp.exists(args.snap_folder): os.mkdir(args.snap_folder) model = models_backbones[args.model](embedding_size=args.embed_size, num_classes=dataset.get_num_classes(), feature=False) if args.snap_to_resume is not None: log.info('Resuming snapshot ' + args.snap_to_resume + ' ...') model = load_model_state(model, args.snap_to_resume, args.devices[0], eval_state=False) model = torch.nn.DataParallel(model, device_ids=args.devices) else: model = torch.nn.DataParallel(model, device_ids=args.devices, output_device=args.devices[0]) model.cuda() model.train() cudnn.benchmark = True log.info('Face Recognition model:') log.info(model) if args.mining_type == 'focal': softmax_criterion = AMSoftmaxLoss(gamma=args.gamma, m=args.m, margin_type=args.margin_type, s=args.s) else: softmax_criterion = AMSoftmaxLoss(t=args.t, m=0.35, margin_type=args.margin_type, s=args.s) aux_losses = MetricLosses(dataset.get_num_classes(), args.embed_size, writer) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [3, 6, 9, 13]) for epoch_num in range(args.epoch_total_num): scheduler.step() if epoch_num > 6: model.module.set_dropout_ratio(0.) classification_correct = 0 classification_total = 0 for i, data in enumerate(train_loader, 0): iteration = epoch_num * len(train_loader) + i if iteration % args.val_step == 0: snapshot_name = osp.join(args.snap_folder, args.snap_prefix + '_{0}.pt'.format(iteration)) if iteration > 0: log.info('Saving Snapshot: ' + snapshot_name) save_model_cpu(model, optimizer, snapshot_name, epoch_num) log.info('Evaluating Snapshot: ' + snapshot_name) model.eval() same_acc, diff_acc, all_acc, auc = evaluate(args, lfw, model, compute_embeddings_lfw, args.val_batch_size, verbose=False) model.train() log.info('Validation accuracy: {0:.4f}, {1:.4f}'.format(same_acc, diff_acc)) log.info('Validation accuracy mean: {0:.4f}'.format(all_acc)) log.info('Validation AUC: {0:.4f}'.format(auc)) writer.add_scalar('Accuracy/Val_same_accuracy', same_acc, iteration) writer.add_scalar('Accuracy/Val_diff_accuracy', diff_acc, iteration) writer.add_scalar('Accuracy/Val_accuracy', all_acc, iteration) writer.add_scalar('Accuracy/AUC', auc, iteration) data, label = data['img'], data['label'].cuda() features, sm_outputs = model(data) optimizer.zero_grad() aux_losses.init_iteration() aux_loss, aux_log = aux_losses(features, label, epoch_num, iteration) loss_sm = softmax_criterion(sm_outputs, label) loss = loss_sm + aux_loss loss.backward() aux_losses.end_iteration() optimizer.step() _, predicted = torch.max(sm_outputs.data, 1) classification_total += int(label.size(0)) classification_correct += int(torch.sum(predicted.eq(label))) train_acc = float(classification_correct) / classification_total if i % 10 == 0: log.info('Iteration %d, Softmax loss: %.4f, Total loss: %.4f' % (iteration, loss_sm, loss) + aux_log) log.info('Learning rate: %f' % scheduler.get_lr()[0]) writer.add_scalar('Loss/train_loss', loss, iteration) writer.add_scalar('Loss/softmax_loss', loss_sm, iteration) writer.add_scalar('Learning_rate', scheduler.get_lr()[0], iteration) writer.add_scalar('Accuracy/classification', train_acc, iteration) def main(): """Creates a command line parser and starts training""" parser = ArgumentParserWithYaml(description='Training Face Recognition in PyTorch', fromfile_prefix_chars='@', epilog="Please, note that you can parse parameters from a yaml file if \ you add @ to command line") #datasets configuration parser.add_argument('--train_dataset', choices=['vgg', 'ms1m', 'trp', 'imdbface'], type=str, default='vgg', help='Name of the train dataset.') parser.add_argument('--train_data_root', dest='train', required=True, type=str, help='Path to train data.') parser.add_argument('--train_list', dest='t_list', required=False, type=str, help='Path to train data image list.') parser.add_argument('--train_landmarks', default='', dest='t_land', required=False, type=str, help='Path to landmarks for the train images.') parser.add_argument('--val_data_root', dest='val', required=True, type=str, help='Path to val data.') parser.add_argument('--val_step', type=int, default=1000, help='Evaluate model each val_step during each epoch.') parser.add_argument('--val_list', dest='v_list', required=True, type=str, help='Path to test data image list.') parser.add_argument('--val_landmarks', dest='v_land', default='', required=False, type=str, help='Path to landmarks for test images.') #model configuration parser.add_argument('--model', choices=models_backbones.keys(), type=str, default='mobilenet', help='Model type.') parser.add_argument('--embed_size', type=int, default=256, help='Size of the face embedding.') #optimizer configuration parser.add_argument('--train_batch_size', type=int, default=170, help='Train batch size.') parser.add_argument('--epoch_total_num', type=int, default=30, help='Number of epochs to train.') parser.add_argument('--lr', type=float, default=0.4, help='Learning rate.') parser.add_argument('--momentum', type=float, default=0.9, help='Momentum.') parser.add_argument('--weight_decay', type=float, default=0.0001, help='Weight decay.') #loss configuration parser.add_argument('--mining_type', choices=['focal', 'sv'], type=str, default='sv', help='Hard mining method in loss.') parser.add_argument('--t', type=float, default=1.1, help='t in support vector softmax. See https://arxiv.org/abs/1812.11317 for details') parser.add_argument('--gamma', type=float, default=2., help='Gamma in focal loss. See https://arxiv.org/abs/1708.02002 for details') parser.add_argument('--m', type=float, default=0.35, help='Margin size for AMSoftmax.') parser.add_argument('--s', type=float, default=30., help='Scale for AMSoftmax.') parser.add_argument('--margin_type', choices=['cos', 'arc'], type=str, default='cos', help='Margin type for AMSoftmax loss.') #other parameters parser.add_argument('--devices', type=int, nargs='+', default=[0], help='CUDA devices to use.') parser.add_argument('--val_batch_size', type=int, default=20, help='Validation batch size.') parser.add_argument('--snap_folder', type=str, default='./snapshots/', help='Folder to save snapshots.') parser.add_argument('--snap_prefix', type=str, default='FaceReidNet', help='Prefix for snapshots.') parser.add_argument('--snap_to_resume', type=str, default=None, help='Snapshot to resume.') parser.add_argument('--weighted', action='store_true') args = parser.parse_args() log.info('Arguments:\n' + pformat(args.__dict__)) with torch.cuda.device(args.devices[0]): train(args) if __name__ == '__main__': main() ================================================ FILE: train_landmarks.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import argparse import datetime import os.path as osp import numpy as np import glog as log from tensorboardX import SummaryWriter import torch import torch.backends.cudnn as cudnn import torch.optim as optim from torch.utils.data import DataLoader from torchvision.transforms import transforms from datasets import VGGFace2, CelebA, NDG from model.common import models_landmarks from utils import landmarks_augmentation from utils.utils import save_model_cpu, load_model_state from losses.alignment import AlignmentLoss from evaluate_landmarks import evaluate def train(args): """Launches training of landmark regression model""" if args.dataset == 'vgg': drops_schedule = [1, 6, 9, 13] dataset = VGGFace2(args.train, args.t_list, args.t_land, landmarks_training=True) elif args.dataset == 'celeba': drops_schedule = [10, 20] dataset = CelebA(args.train, args.t_land) else: drops_schedule = [90, 140, 200] dataset = NDG(args.train, args.t_land) if dataset.have_landmarks: log.info('Use alignment for the train data') dataset.transform = transforms.Compose([landmarks_augmentation.Rescale((56, 56)), landmarks_augmentation.Blur(k=3, p=.2), landmarks_augmentation.HorizontalFlip(p=.5), landmarks_augmentation.RandomRotate(50), landmarks_augmentation.RandomScale(.8, .9, p=.4), landmarks_augmentation.RandomCrop(48), landmarks_augmentation.ToTensor(switch_rb=True)]) else: log.info('Error: training dataset has no landmarks data') exit() train_loader = DataLoader(dataset, batch_size=args.train_batch_size, num_workers=4, shuffle=True) writer = SummaryWriter('./logs_landm/{:%Y_%m_%d_%H_%M}_'.format(datetime.datetime.now()) + args.snap_prefix) model = models_landmarks['landnet'] if args.snap_to_resume is not None: log.info('Resuming snapshot ' + args.snap_to_resume + ' ...') model = load_model_state(model, args.snap_to_resume, args.device, eval_state=False) model = torch.nn.DataParallel(model, device_ids=[args.device]) else: model = torch.nn.DataParallel(model, device_ids=[args.device]) model.cuda() model.train() cudnn.enabled = True cudnn.benchmark = True log.info('Face landmarks model:') log.info(model) criterion = AlignmentLoss('wing') optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, drops_schedule) for epoch_num in range(args.epoch_total_num): scheduler.step() if epoch_num > 5: model.module.set_dropout_ratio(0.) for i, data in enumerate(train_loader, 0): iteration = epoch_num * len(train_loader) + i data, gt_landmarks = data['img'].cuda(), data['landmarks'].cuda() predicted_landmarks = model(data) optimizer.zero_grad() loss = criterion(predicted_landmarks, gt_landmarks) loss.backward() optimizer.step() if i % 10 == 0: log.info('Iteration %d, Loss: %.4f' % (iteration, loss)) log.info('Learning rate: %f' % scheduler.get_lr()[0]) writer.add_scalar('Loss/train_loss', loss.item(), iteration) writer.add_scalar('Learning_rate', scheduler.get_lr()[0], iteration) if iteration % args.val_step == 0: snapshot_name = osp.join(args.snap_folder, args.snap_prefix + '_{0}.pt'.format(iteration)) log.info('Saving Snapshot: ' + snapshot_name) save_model_cpu(model, optimizer, snapshot_name, epoch_num) model.eval() log.info('Evaluating Snapshot: ' + snapshot_name) avg_err, per_point_avg_err, failures_rate = evaluate(train_loader, model) weights = per_point_avg_err / np.sum(per_point_avg_err) criterion.set_weights(weights) log.info(str(weights)) log.info('Avg train error: {}'.format(avg_err)) log.info('Train failure rate: {}'.format(failures_rate)) writer.add_scalar('Quality/Avg_error', avg_err, iteration) writer.add_scalar('Quality/Failure_rate', failures_rate, iteration) model.train() def main(): """Creates a command line parser""" parser = argparse.ArgumentParser(description='Training Landmarks detector in PyTorch') parser.add_argument('--train_data_root', dest='train', required=True, type=str, help='Path to train data.') parser.add_argument('--train_list', dest='t_list', required=False, type=str, help='Path to train data image list.') parser.add_argument('--train_landmarks', default='', dest='t_land', required=False, type=str, help='Path to landmarks for the train images.') parser.add_argument('--train_batch_size', type=int, default=170, help='Train batch size.') parser.add_argument('--epoch_total_num', type=int, default=30, help='Number of epochs to train.') parser.add_argument('--lr', type=float, default=0.4, help='Learning rate.') parser.add_argument('--momentum', type=float, default=0.9, help='Momentum.') parser.add_argument('--val_step', type=int, default=2000, help='Evaluate model each val_step during each epoch.') parser.add_argument('--weight_decay', type=float, default=0.0001, help='Weight decay.') parser.add_argument('--device', '-d', default=0, type=int) parser.add_argument('--snap_folder', type=str, default='./snapshots/', help='Folder to save snapshots.') parser.add_argument('--snap_prefix', type=str, default='LandmarksNet', help='Prefix for snapshots.') parser.add_argument('--snap_to_resume', type=str, default=None, help='Snapshot to resume.') parser.add_argument('--dataset', choices=['vgg', 'celeb', 'ngd'], type=str, default='vgg', help='Dataset.') arguments = parser.parse_args() with torch.cuda.device(arguments.device): train(arguments) if __name__ == '__main__': main() ================================================ FILE: utils/__init__.py ================================================ ================================================ FILE: utils/augmentation.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import math import torch import numpy as np import cv2 as cv try: from .face_align import FivePointsAligner except (ImportError, SystemError) as exp: from face_align import FivePointsAligner class HorizontalFlipNumpy: """Horizontal flip augmentation with probability p""" def __init__(self, p=.5): assert 0 <= p <= 1. self.p = p def __call__(self, img): if float(torch.FloatTensor(1).uniform_()) < self.p: return cv.flip(img, 1) return img class ShowTransform: """Show image using opencv""" def __call__(self, sample): img = np.array(sample) cv.imshow('image', img) cv.waitKey() return sample class NumpyToTensor: """Converts a numpy array to torch.Tensor with optionally swapping R and B channels""" def __init__(self, switch_rb=False): self.switch_rb = switch_rb def __call__(self, image): # swap color axis because # numpy image: H x W x C # torch image: C X H X W if self.switch_rb: image = cv.cvtColor(image, cv.COLOR_RGB2BGR) image = image.transpose((2, 0, 1)) return torch.from_numpy(image).type(torch.FloatTensor) / 255. class RandomShiftNumpy: """Shifts an image by a randomly generated offset along x and y axes""" def __init__(self, max_rel_shift, p=.5): self.p = p self.max_rel_shift = max_rel_shift def __call__(self, image): if float(torch.FloatTensor(1).uniform_()) < self.p: rel_shift = 2 * (torch.FloatTensor(1).uniform_() - .5) * self.max_rel_shift h, w = image.shape[:2] shift_w = w * rel_shift shift_h = h * rel_shift transl_mat = np.array([[1., 0., shift_w], [0., 1., shift_h]]) image = cv.warpAffine(image, transl_mat, (w, h)) return image class RandomRotationNumpy: """Rotates an image around it's center by a randomly generated angle""" def __init__(self, max_angle, p=.5): self.max_angle = max_angle self.p = p def __call__(self, image): if float(torch.FloatTensor(1).uniform_()) < self.p: angle = 2 * (torch.FloatTensor(1).uniform_() - .5) * self.max_angle h, w = image.shape[:2] rot_mat = cv.getRotationMatrix2D((w * 0.5, h * 0.5), angle, 1.) image = cv.warpAffine(image, rot_mat, (w, h), flags=cv.INTER_LANCZOS4) return image class ResizeNumpy: """Resizes an image in numpy format""" def __init__(self, output_size): assert isinstance(output_size, (int, tuple)) self.output_size = output_size def __call__(self, image): h, w = image.shape[:2] if isinstance(self.output_size, int): if h > w: new_h, new_w = self.output_size * h / w, self.output_size else: new_h, new_w = self.output_size, self.output_size * w / h else: new_h, new_w = self.output_size new_h, new_w = int(new_h), int(new_w) img = cv.resize(image, (new_h, new_w)) return img class CenterCropNumpy: """Performs a center crop of an images""" def __init__(self, output_size): assert isinstance(output_size, (int, tuple)) self.output_size = output_size def __call__(self, image): h, w = image.shape[:2] if isinstance(self.output_size, int): new_h, new_w = self.output_size, self.output_size else: new_h, new_w = self.output_size s_h = int(h / 2 - new_h / 2) s_w = int(w / 2 - new_w / 2) image = image[s_h: s_h + new_h, s_w: s_w + new_w] return image class BlurNumpy: """Blurs an image with the given sigma and probability""" def __init__(self, p, k): self.p = p assert k % 2 == 1 self.k = k def __call__(self, img): if float(torch.FloatTensor(1).uniform_()) < self.p: img = cv.blur(img, (self.k, self.k)) return img class CutOutWithPrior: """Cuts rectangular patches from an image around pre-defined landmark locations""" def __init__(self, p, max_area): self.p = p self.max_area = max_area # use after resize transform def __call__(self, img): height, width = img.shape[:2] keypoints_ref = np.zeros((5, 2), dtype=np.float32) keypoints_ref[:, 0] = FivePointsAligner.ref_landmarks[:, 0] * width keypoints_ref[:, 1] = FivePointsAligner.ref_landmarks[:, 1] * height if float(torch.FloatTensor(1).uniform_()) < self.p: erase_num = torch.LongTensor(1).random_(1, 4) erase_ratio = torch.FloatTensor(1).uniform_(self.max_area / 2, self.max_area) erase_h = math.sqrt(erase_ratio) / float(erase_num) * height erase_w = math.sqrt(erase_ratio) / float(erase_num) * width erased_idx = [] for _ in range(erase_num): erase_pos = int(torch.LongTensor(1).random_(0, 5)) while erase_pos in erased_idx: erase_pos = int(torch.LongTensor(1).random_(0, 5)) left_corner = ( int(keypoints_ref[erase_pos][0] - erase_h / 2), int(keypoints_ref[erase_pos][1] - erase_w / 2)) right_corner = ( int(keypoints_ref[erase_pos][0] + erase_h / 2), int(keypoints_ref[erase_pos][1] + erase_w / 2)) cv.rectangle(img, tuple(left_corner), tuple(right_corner), (0, 0, 0), thickness=-1) erased_idx.append(erase_pos) return img ================================================ FILE: utils/face_align.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import cv2 as cv import numpy as np class FivePointsAligner(): """This class performs face alignmet by five reference points""" ref_landmarks = np.array([30.2946 / 96, 51.6963 / 112, 65.5318 / 96, 51.5014 / 112, 48.0252 / 96, 71.7366 / 112, 33.5493 / 96, 92.3655 / 112, 62.7299 / 96, 92.2041 / 112], dtype=np.float64).reshape(5, 2) @staticmethod def align(img, landmarks, d_size=(400, 400), normalized=False, show=False): """Transforms given image in such a way that landmarks are located near ref_landmarks after transformation""" assert len(landmarks) == 10 assert isinstance(img, np.ndarray) landmarks = np.array(landmarks).reshape(5, 2) dw, dh = d_size keypoints = landmarks.copy().astype(np.float64) if normalized: keypoints[:, 0] *= img.shape[1] keypoints[:, 1] *= img.shape[0] keypoints_ref = np.zeros((5, 2), dtype=np.float64) keypoints_ref[:, 0] = FivePointsAligner.ref_landmarks[:, 0] * dw keypoints_ref[:, 1] = FivePointsAligner.ref_landmarks[:, 1] * dh transform_matrix = transformation_from_points(keypoints_ref, keypoints) output_im = cv.warpAffine(img, transform_matrix, d_size, flags=cv.WARP_INVERSE_MAP) if show: tmp_output = output_im.copy() for point in keypoints_ref: cv.circle(tmp_output, (int(point[0]), int(point[1])), 5, (255, 0, 0), -1) for point in keypoints: cv.circle(img, (int(point[0]), int(point[1])), 5, (255, 0, 0), -1) img = cv.resize(img, d_size) cv.imshow('source/warped', np.hstack((img, tmp_output))) cv.waitKey() return output_im def transformation_from_points(points1, points2): """Builds an affine transformation matrix form points1 to points2""" points1 = points1.astype(np.float64) points2 = points2.astype(np.float64) c1 = np.mean(points1, axis=0) c2 = np.mean(points2, axis=0) points1 -= c1 points2 -= c2 s1 = np.std(points1) s2 = np.std(points2) points1 /= s1 points2 /= s2 u, _, vt = np.linalg.svd(np.matmul(points1.T, points2)) r = np.matmul(u, vt).T return np.hstack(((s2 / s1) * r, (c2.T - (s2 / s1) * np.matmul(r, c1.T)).reshape(2, -1))) ================================================ FILE: utils/ie_tools.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import sys import os import glog as log import numpy as np from openvino.inference_engine import IENetwork, IEPlugin # pylint: disable=import-error,E0611 class IEModel: """Class for inference of models in the Inference Engine format""" def __init__(self, exec_net, inputs_info, input_key, output_key): self.net = exec_net self.inputs_info = inputs_info self.input_key = input_key self.output_key = output_key def forward(self, img): """Performs forward pass of the wrapped IE model""" res = self.net.infer(inputs={self.input_key: np.expand_dims(img.transpose(2, 0, 1), axis=0)}) return np.copy(res[self.output_key]) def get_input_shape(self): """Returns an input shape of the wrapped IE model""" return self.inputs_info[self.input_key] def load_ie_model(model_xml, device, plugin_dir, cpu_extension=''): """Loads a model in the Inference Engine format""" model_bin = os.path.splitext(model_xml)[0] + ".bin" # Plugin initialization for specified device and load extensions library if specified plugin = IEPlugin(device=device, plugin_dirs=plugin_dir) if cpu_extension and 'CPU' in device: plugin.add_cpu_extension(cpu_extension) # Read IR log.info("Loading network files:\n\t%s\n\t%s", model_xml, model_bin) net = IENetwork(model=model_xml, weights=model_bin) if "CPU" in plugin.device: supported_layers = plugin.get_supported_layers(net) not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers] if not_supported_layers: log.error("Following layers are not supported by the plugin for specified device %s:\n %s", plugin.device, ', '.join(not_supported_layers)) log.error("Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) assert len(net.inputs.keys()) == 1, "Checker supports only single input topologies" assert len(net.outputs) == 1, "Checker supports only single output topologies" log.info("Preparing input blobs") input_blob = next(iter(net.inputs)) out_blob = next(iter(net.outputs)) net.batch_size = 1 # Loading model to the plugin log.info("Loading model to the plugin") exec_net = plugin.load(network=net) model = IEModel(exec_net, net.inputs, input_blob, out_blob) del net return model ================================================ FILE: utils/landmarks_augmentation.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import cv2 as cv import numpy as np import torch class Rescale: """Resizes an image and corresponding landmarks""" def __init__(self, output_size): assert isinstance(output_size, (int, tuple)) self.output_size = output_size def __call__(self, sample): image, landmarks = sample['img'], sample['landmarks'] h, w = image.shape[:2] if isinstance(self.output_size, int): if w > h: new_h, new_w = self.output_size, self.output_size * w / h else: new_h, new_w = self.output_size * h / w, self.output_size else: new_h, new_w = self.output_size new_h, new_w = int(new_h), int(new_w) img = cv.resize(image, (new_h, new_w)) return {'img': img, 'landmarks': landmarks} class RandomCrop: """Makes a random crop from the source image with corresponding transformation of landmarks""" def __init__(self, output_size): assert isinstance(output_size, (int, tuple)) if isinstance(output_size, int): self.output_size = (output_size, output_size) else: assert len(output_size) == 2 self.output_size = output_size def __call__(self, sample): image, landmarks = sample['img'], sample['landmarks'].reshape(-1, 2) h, w = image.shape[:2] new_h, new_w = self.output_size top = np.random.randint(0, h - new_h) left = np.random.randint(0, w - new_w) image = image[top: top + new_h, left: left + new_w] landmarks = landmarks - [left / float(w), top / float(h)] for point in landmarks: point[0] *= float(h) / new_h point[1] *= float(w) / new_w return {'img': image, 'landmarks': landmarks} class HorizontalFlip: """Flips an input image and landmarks horizontally with a given probability""" def __init__(self, p=.5): self.p = p def __call__(self, sample): image, landmarks = sample['img'], sample['landmarks'].reshape(-1, 2) if float(torch.FloatTensor(1).uniform_()) < self.p: image = cv.flip(image, 1) landmarks = landmarks.reshape(5, 2) landmarks[:, 0] = 1. - landmarks[:, 0] tmp = np.copy(landmarks[0]) landmarks[0] = landmarks[1] landmarks[1] = tmp tmp = np.copy(landmarks[3]) landmarks[3] = landmarks[4] landmarks[4] = tmp return {'img': image, 'landmarks': landmarks} class Blur: """Blurs an image with the given sigma and probability""" def __init__(self, p, k): self.p = p assert k % 2 == 1 self.k = k def __call__(self, sample): image, landmarks = sample['img'], sample['landmarks'] if float(torch.FloatTensor(1).uniform_()) < self.p: image = cv.blur(image, (self.k, self.k)) return {'img': image, 'landmarks': landmarks} class Show: """Show image using opencv""" def __call__(self, sample): image, landmarks = sample['img'].copy(), sample['landmarks'].reshape(-1, 2) h, w = image.shape[:2] for point in landmarks: cv.circle(image, (int(point[0]*w), int(point[1]*h)), 3, (255, 0, 0), -1) cv.imshow('image', image) cv.waitKey() return sample class RandomRotate: """ Rotates an image around it's center by a randomly generated angle. Also performs the same transformation with landmark points. """ def __init__(self, max_angle, p=.5): self.max_angle = max_angle self.p = p def __call__(self, sample): image, landmarks = sample['img'], sample['landmarks'] if float(torch.FloatTensor(1).uniform_()) < self.p: angle = 2*(torch.FloatTensor(1).uniform_() - .5)*self.max_angle h, w = image.shape[:2] rot_mat = cv.getRotationMatrix2D((w*0.5, h*0.5), angle, 1.) image = cv.warpAffine(image, rot_mat, (w, h), flags=cv.INTER_LANCZOS4) rot_mat_l = cv.getRotationMatrix2D((0.5, 0.5), angle, 1.) landmarks = cv.transform(landmarks.reshape(1, 5, 2), rot_mat_l).reshape(5, 2) return {'img': image, 'landmarks': landmarks} class ToTensor: """Convert ndarrays in sample to Tensors.""" def __init__(self, switch_rb=False): self.switch_rb = switch_rb def __call__(self, sample): image, landmarks = sample['img'], sample['landmarks'] # swap color axis because # numpy image: H x W x C # torch image: C X H X W if self.switch_rb: image = cv.cvtColor(image, cv.COLOR_RGB2BGR) image = image.transpose((2, 0, 1)) return {'img': torch.from_numpy(image).type(torch.FloatTensor) / 255, 'landmarks': torch.from_numpy(landmarks).type(torch.FloatTensor).view(-1, 1, 1)} class RandomScale: """Performs uniform scale with a random magnitude""" def __init__(self, max_scale, min_scale, p=.5): self.max_scale = max_scale self.min_scale = min_scale self.p = p def __call__(self, sample): image, landmarks = sample['img'], sample['landmarks'] if float(torch.FloatTensor(1).uniform_()) < self.p: scale = self.min_scale + torch.FloatTensor(1).uniform_()*(self.max_scale - self.min_scale) h, w = image.shape[:2] rot_mat = cv.getRotationMatrix2D((w*0.5, h*0.5), 0, scale) image = cv.warpAffine(image, rot_mat, (w, h), flags=cv.INTER_LANCZOS4) rot_mat_l = cv.getRotationMatrix2D((0.5, 0.5), 0, scale) landmarks = cv.transform(landmarks.reshape(1, 5, 2), rot_mat_l).reshape(5, 2) return {'img': image, 'landmarks': landmarks} ================================================ FILE: utils/parser_yaml.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ from argparse import ArgumentParser import yaml class ArgumentParserWithYaml(ArgumentParser): """ Attention, this will work with simple yaml files only, and if there is no action=store_false """ @staticmethod def _check_arg_line_repr_None(arg_line, k, v): """ The method is required, since by default python prints None value as None, whereas yaml waiths for null """ s = arg_line.strip() prefixes = [k, "'" + k + "'", '"' + k + '"'] is_ok = False for prefix in prefixes: if s.startswith(prefix): s = s[len(prefix):] is_ok = True break if not is_ok: raise RuntimeError("Unknown prefix in line '{}', k = '{}', v = '{}'".format(arg_line, k, v)) s = s.strip() assert s.startswith(':'), "Bad format of line '{}', k = '{}', v = '{}'".format(arg_line, k, v) s = s[1:] s = s.strip() #print("arg line '{}' repr None = {}, s = '{}'".format(arg_line, s == "None", s)) return s == "None" #note that 'None' will be a string, whereas just None will be None def convert_arg_line_to_args(self, arg_line): arg_line = arg_line.strip() if not arg_line: return [] if arg_line.endswith(','): arg_line = arg_line[:-1] data = yaml.load(arg_line) if data is None: return [] assert type(data) is dict assert len(data) == 1 res = [] for k, v in data.items(): if v == 'None': # default value is None -- skipping if self._check_arg_line_repr_None(arg_line, k, v): #additional check that somebody passed string "None" continue else: print("WARNING: DURING PARSING ARGUMENTS FILE: possible error in the argument line '{}' -- probably None value is missed".format(arg_line)) if type(v) is list: res.append('--' + str(k)) [res.append(str(item)) for item in v] continue if type(v) is bool: # special case, action=store_true, do not use store_false! if v: res.append('--' + str(k)) continue # attention, there may be small issue with converting float -> string -> float -> string res.extend(['--' + str(k), str(v)]) return res ================================================ FILE: utils/utils.py ================================================ """ Copyright (c) 2018 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ from collections import OrderedDict import torch import torch.backends.cudnn as cudnn def save_model_cpu(net, optim, ckpt_fname, epoch, write_solverstate=False): """Saves model weights and optimizer state (optionally) to a file""" state_dict = net.state_dict() for key in state_dict.keys(): state_dict[key] = state_dict[key].cpu() snapshot_dict = { 'epoch': epoch, 'state_dict': state_dict} if write_solverstate: snapshot_dict['optimizer'] = optim torch.save(snapshot_dict, ckpt_fname) def get_model_parameters_number(model, as_string=True): """Returns a total number of trainable parameters in a specified model""" params_num = sum(p.numel() for p in model.parameters() if p.requires_grad) if not as_string: return params_num if params_num // 10 ** 6 > 0: flops_str = str(round(params_num / 10. ** 6, 2)) + 'M' elif params_num // 10 ** 3 > 0: flops_str = str(round(params_num / 10. ** 3, 2)) + 'k' else: flops_str = str(params_num) return flops_str def load_model_state(model, snap, device_id, eval_state=True): """Loads model weight from a file produced by save_model_cpu""" if device_id != -1: location = 'cuda:' + str(device_id) else: location = 'cpu' state_dict = torch.load(snap, map_location=location)['state_dict'] new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v model.load_state_dict(new_state_dict, strict=False) if device_id != -1: model.cuda(device_id) cudnn.benchmark = True if eval_state: model.eval() else: model.train() return model def flip_tensor(x, dim): """Flips a tensor along the specified axis""" xsize = x.size() dim = x.dim() + dim if dim < 0 else dim x = x.view(-1, *xsize[dim:]) x = x.view(x.size(0), x.size(1), -1)[:, getattr(torch.arange(x.size(1) - 1, -1, -1), ('cpu', 'cuda')[x.is_cuda])().long(), :] return x.view(xsize)