Repository: JACKYLUO1991/Face-skin-hair-segmentaiton-and-skin-color-evaluation Branch: master Commit: de2375dc0ebf Files: 21 Total size: 91.8 KB Directory structure: gitextract_ma5zgnhz/ ├── .gitignore ├── LICENSE ├── README.md ├── benchmark.py ├── data_loader.py ├── experiments/ │ ├── cal_histogram.py │ ├── cal_moments.py │ ├── cal_pca.py │ └── utils.py ├── metric.py ├── model/ │ ├── __init__.py │ ├── dfanet.py │ ├── enet.py │ ├── fast_scnn.py │ ├── flops.py │ ├── hlnet.py │ ├── lednet.py │ └── mobilenet.py ├── pipline_test.py ├── test.py └── train.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ ## HLNet: A Unified Framework for Real-Time Segmentation and Facial Skin Tones Evaluation ## Abstract: Real-time semantic segmentation plays a crucial role in industrial applications, such as autonomous driving, the beauty industry, and so on. It is a challenging problem to balance the relationship between speed and segmentation performance. To address such a complex task, this paper introduces an efficient convolutional neural network (CNN) architecture named HLNet for devices with limited resources. Based on high-quality design modules, HLNet better integrates high-dimensional and low-dimensional information while obtaining sufficient receptive fields, which achieves remarkable results on three benchmark datasets. To our knowledge, the accuracy of skin tone classification is usually unsatisfactory due to the influence of external environmental factors such as illumination and background impurities. Therefore, we use HLNet to obtain accurate face regions, and further use color moment algorithm to extract its color features. Specifically, for a 224 × 224 input, using our HLNet, we achieve 78.39% mean IoU on Figaro1k dataset at over 17 FPS in the case of the CPU environment. We further use the masked color moment for skin tone grade evaluation and approximate 80% classification accuracy demonstrate the feasibility of the proposed method. ## The latest open source work: https://github.com/JACKYLUO1991/FaceParsing. ## **Problem correction:** *It is worth noting that some training sets are mistaken for test sets in image file copying, which leads to high results in arXiv. The current version has been corrected.* ## Demos
raw
## Please cited: ``` @article{feng2020hlnet, title={HLNet: A Unified Framework for Real-Time Segmentation and Facial Skin Tones Evaluation}, author={Feng, Xinglong and Gao, Xianwen and Luo, Ling}, journal={Symmetry}, volume={12}, number={11}, pages={1812}, year={2020}, publisher={Multidisciplinary Digital Publishing Institute} } ``` ================================================ FILE: benchmark.py ================================================ import tensorflow as tf tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) import time import numpy as np import argparse from tqdm import tqdm from model.hlnet import HLNet from model.dfanet import DFANet from model.enet import ENet from model.lednet import LEDNet from model.mobilenet import MobileNet from model.fast_scnn import Fast_SCNN parser = argparse.ArgumentParser() parser.add_argument("--image_size", '-i', help="image size", type=int, default=256) parser.add_argument("--batch_size", '-b', help="batch size", type=int, default=3) parser.add_argument("--model_name", help="model's name", choices=['hlnet', 'fastscnn', 'lednet', 'dfanet', 'enet', 'mobilenet'], type=str, default='hlnet') parser.add_argument("--nums", help="output num", type=int, default=1) args = parser.parse_args() IMG_SIZE = args.image_size CLS_NUM = args.nums def get_model(name): if name == 'hlnet': model = HLNet(input_shape=(IMG_SIZE, IMG_SIZE, 3), cls_num=CLS_NUM) elif name == 'fastscnn': model = Fast_SCNN(num_classes=CLS_NUM, input_shape=(IMG_SIZE, IMG_SIZE, 3)).model() elif name == 'lednet': model = LEDNet(groups=2, classes=CLS_NUM, input_shape=(IMG_SIZE, IMG_SIZE, 3)).model() elif name == 'dfanet': model = DFANet(input_shape=(IMG_SIZE, IMG_SIZE, 3), cls_num=CLS_NUM, size_factor=2) elif name == 'enet': model = ENet(input_shape=(IMG_SIZE, IMG_SIZE, 3), cls_num=CLS_NUM) elif name == 'mobilenet': model = MobileNet(input_shape=(IMG_SIZE, IMG_SIZE, 3), cls_num=CLS_NUM) else: raise NameError("No corresponding model...") return model def main(): """Benchmark your model in your local pc.""" model = get_model(args.model_name) inputs = np.random.randn(args.batch_size, args.image_size, args.image_size, 3) time_per_batch = [] for i in tqdm(range(500)): start = time.time() model.predict(inputs, batch_size=args.batch_size) elapsed = time.time() - start time_per_batch.append(elapsed) time_per_batch = np.array(time_per_batch) # Remove the first item print(time_per_batch[1:].mean()) if __name__ == '__main__': main() ================================================ FILE: data_loader.py ================================================ import numpy as np import cv2 import os import random import glob from keras.utils import Sequence from keras.applications.imagenet_utils import preprocess_input as pinput class HairGenerator(Sequence): def __init__(self, transformer, root_dir, mode='Training', nb_classes=3, batch_size=4, backbone=None, shuffle=False): # backbone fit for segmentation_models,have been deleted now... assert mode in ['Training', 'Testing'], "Data set selection error..." self.image_path_list = sorted( glob.glob(os.path.join(root_dir, 'Original', mode, '*'))) self.mask_path_list = sorted( glob.glob(os.path.join(root_dir, 'GT', mode, '*'))) self.transformer = transformer self.batch_size = batch_size self.nb_classes = nb_classes self.shuffle = shuffle self.mode = mode self.backbone = backbone def __getitem__(self, idx): images, masks = [], [] for (image_path, mask_path) in zip(self.image_path_list[idx * self.batch_size: (idx + 1) * self.batch_size], self.mask_path_list[idx * self.batch_size: (idx + 1) * self.batch_size]): image = cv2.imread(image_path, 1) mask = cv2.imread(mask_path, 0) image = self._padding(image) mask = self._padding(mask) # augumentation augmentation = self.transformer(image=image, mask=mask) image = augmentation['image'] mask = self._get_result_map(augmentation['mask']) images.append(image) masks.append(mask) images = np.array(images) masks = np.array(masks) images = pinput(images) return images, masks def __len__(self): """Steps required per epoch""" return len(self.image_path_list) // self.batch_size def _padding(self, image): shape = image.shape h, w = shape[:2] width = np.max([h, w]) padd_h = (width - h) // 2 padd_w = (width - w) // 2 if len(shape) == 3: padd_tuple = ((padd_h, width - h - padd_h), (padd_w, width - w - padd_w), (0, 0)) else: padd_tuple = ((padd_h, width - h - padd_h), (padd_w, width - w - padd_w)) image = np.pad(image, padd_tuple, 'constant') return image def on_epoch_end(self): """Shuffle image order""" if self.shuffle: c = list(zip(self.image_path_list, self.mask_path_list)) random.shuffle(c) self.image_path_list, self.mask_path_list = zip(*c) def _get_result_map(self, mask): """Processing mask data""" # mask.shape[0]: row, mask.shape[1]: column result_map = np.zeros((mask.shape[1], mask.shape[0], self.nb_classes)) # 0 (background pixel), 128 (face area pixel) or 255 (hair area pixel). skin = (mask == 128) hair = (mask == 255) if self.nb_classes == 2: # hair = (mask > 128) background = np.logical_not(hair) result_map[:, :, 0] = np.where(background, 1, 0) result_map[:, :, 1] = np.where(hair, 1, 0) elif self.nb_classes == 3: background = np.logical_not(hair + skin) result_map[:, :, 0] = np.where(background, 1, 0) result_map[:, :, 1] = np.where(skin, 1, 0) result_map[:, :, 2] = np.where(hair, 1, 0) else: raise Exception("error...") return result_map ================================================ FILE: experiments/cal_histogram.py ================================================ # 参考资料: # https://www.cnblogs.com/maybe2030/p/4585705.html # https://blog.csdn.net/zhu_hongji/article/details/80443585 # https://blog.csdn.net/wsp_1138886114/article/details/80660014 # https://blog.csdn.net/gfjjggg/article/details/87919658 # https://baike.baidu.com/item/%E9%A2%9C%E8%89%B2%E7%9F%A9/19426187?fr=aladdin # https://blog.csdn.net/langyuewu/article/details/4144139 from __future__ import print_function, division from sklearn import svm from imblearn.over_sampling import SMOTE from sklearn.metrics import classification_report, confusion_matrix from sklearn.externals import joblib from sklearn.neural_network import MLPClassifier from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import KFold, cross_val_score, train_test_split import cv2 as cv import numpy as np import matplotlib.pyplot as plt import os import sys import time from imutils import paths import logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') from utils import * class RGBHistogram(Histogram): '''RGB Histogram''' def __init__(self, bins): super().__init__(bins) def describe(self, image, mask): hist_b = cv.calcHist([image], [0], mask, self.bins, [0, 256]) hist_g = cv.calcHist([image], [1], mask, self.bins, [0, 256]) hist_r = cv.calcHist([image], [2], mask, self.bins, [0, 256]) hist_b = hist_b / np.sum(hist_b) hist_g = hist_g / np.sum(hist_g) hist_r = hist_r / np.sum(hist_r) # 24 dimensions return np.concatenate([hist_b, hist_g, hist_r], axis=0).reshape(-1) class HSVHistogram(Histogram): '''HSV Histogram''' def __init__(self, bins): super().__init__(bins) def describe(self, image, mask): image = cv.cvtColor(image, cv.COLOR_BGR2HSV) hist_h = cv.calcHist([image], [0], mask, self.bins, [0, 180]) hist_s = cv.calcHist([image], [1], mask, self.bins, [0, 256]) hist_v = cv.calcHist([image], [2], mask, self.bins, [0, 256]) hist_h = hist_h / np.sum(hist_h) hist_s = hist_s / np.sum(hist_s) hist_v = hist_v / np.sum(hist_v) # 24 dimensions return np.concatenate([hist_h, hist_s, hist_v], axis=0).reshape(-1) class YCrCbHistogram(Histogram): '''YCrCb Histogram''' def __init__(self, bins): super().__init__(bins) def describe(self, image, mask): image = cv.cvtColor(image, cv.COLOR_BGR2YCrCb) hist_y = cv.calcHist([image], [0], mask, self.bins, [0, 256]) hist_cr = cv.calcHist([image], [1], mask, self.bins, [0, 256]) hist_cb = cv.calcHist([image], [2], mask, self.bins, [0, 256]) hist_y = hist_y / np.sum(hist_y) hist_cr = hist_cr / np.sum(hist_cr) hist_cb = hist_cb / np.sum(hist_cb) # 24 dimensions return np.concatenate([hist_y, hist_cr, hist_cb], axis=0).reshape(-1) if __name__ == "__main__": logger = logging.getLogger(__name__) CLASSES = 5 images_list = [] masks_list = [] features_list = [] classes_list = [] hist = YCrCbHistogram([8]) s1 = time.time() for i in range(0, CLASSES): for imgpath in sorted(paths.list_images(str(i))): if os.path.splitext(imgpath)[-1] == '.jpg': images_list.append(imgpath) classes_list.append(i) elif os.path.splitext(imgpath)[-1] == '.png': masks_list.append(imgpath) else: raise ValueError("type error...") s2 = time.time() logger.info(f"Time use: {s2 - s1} s") for image_path, mask_path in zip(images_list, masks_list): # print(image_path, mask_path) image = cv.imread(image_path) mask = cv.imread(mask_path, 0) features = hist.describe(image, mask) # print(features) features_list.append(features) logger.info(f"Time use: {time.time() - s2} s") logger.info("Data process ready...") # Resampling sm = SMOTE(sampling_strategy='all', random_state=2019) features_list, classes_list = sm.fit_resample(features_list, classes_list) # Machine learning algorithm # clf = MLPClassifier(solver='lbfgs', alpha=1e-5, # hidden_layer_sizes=(8, ), random_state=2019) clf = RandomForestClassifier(n_estimators=180, random_state=2019) # kf = KFold(n_splits=CLASSES, random_state=2019, shuffle=True).\ # get_n_splits(features_list) # scores = cross_val_score(clf, features_list, classes_list, # scoring='accuracy', cv=kf) # score = scores.mean() # logger.info(f"KFold score: {score}") # Split train and test dataset X_train, X_test, y_train, y_test = train_test_split( features_list, classes_list, test_size=0.2, random_state=2019) y_pred = clf.fit(X_train, y_train).predict(X_test) classify_report = classification_report(y_test, y_pred) logger.info('\n' + classify_report) np.set_printoptions(precision=2) plot_confusion_matrix(y_test, y_pred, classes=['0', '1', '2', '3', '4'], title='Confusion matrix') plt.show() # Save model # https://blog.csdn.net/qiang12qiang12/article/details/81001839 # How to load model: # 1. clf = joblib.load('models/histogram.pkl') # 2. clf.predict(X_test) # joblib.dump(clf, 'models/histogram.pkl') ================================================ FILE: experiments/cal_moments.py ================================================ # https://www.cnblogs.com/klchang/p/6512310.html from __future__ import print_function, division import cv2 as cv import numpy as np import tqdm import time import os import sys import logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') import matplotlib.pyplot as plt from sklearn.externals import joblib from sklearn.metrics import classification_report from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from imblearn.over_sampling import SMOTE from utils import * from imutils import paths def color_moments(image, mask, color_space): """ function: Color Moment Features image: raw image mask: image mask color_space: 'rgb' or 'lab' or 'ycrcb' or 'hsv' """ assert image.shape[:2] == mask.shape assert color_space.lower() in ['lab', 'rgb', 'ycrcb', 'hsv'] if color_space.lower() == 'rgb': image = cv.cvtColor(image, cv.COLOR_BGR2RGB) elif color_space.lower() == 'hsv': image = cv.cvtColor(image, cv.COLOR_BGR2HSV) elif color_space.lower() == 'lab': image = cv.cvtColor(image, cv.COLOR_BGR2LAB) elif color_space.lower() == 'ycrcb': image = cv.cvtColor(image, cv.COLOR_BGR2YCrCb) else: raise ValueError("Color space error...") # Split image channels info c1, c2, c3 = cv.split(image) color_feature = [] # Only process mask != 0 channel region c1 = c1[np.where(mask != 0)] c2 = c2[np.where(mask != 0)] c3 = c3[np.where(mask != 0)] # Extract mean mean_1 = np.mean(c1) mean_2 = np.mean(c2) mean_3 = np.mean(c3) # Extract variance variance_1 = np.std(c1) variance_2 = np.std(c2) variance_3 = np.std(c3) # Extract skewness skewness_1 = np.mean(np.abs(c1 - mean_1) ** 3) ** (1. / 3) skewness_2 = np.mean(np.abs(c1 - mean_2) ** 3) ** (1. / 3) skewness_3 = np.mean(np.abs(c1 - mean_3) ** 3) ** (1. / 3) color_feature.extend( [mean_1, mean_2, mean_3, variance_1, variance_2, variance_3, skewness_1, skewness_2, skewness_3]) return color_feature if __name__ == "__main__": logger = logging.getLogger(__name__) CLASSES = 5 images_list = [] masks_list = [] features_list = [] classes_list = [] s1 = time.time() for i in range(0, CLASSES): for imgpath in sorted(paths.list_images(str(i))): if os.path.splitext(imgpath)[-1] == '.jpg': images_list.append(imgpath) classes_list.append(int(i)) elif os.path.splitext(imgpath)[-1] == '.png': masks_list.append(imgpath) else: raise ValueError("type error...") s2 = time.time() logger.info(f"Time use: {s2 - s1} s") for image_path, mask_path in tqdm.tqdm(zip(images_list, masks_list)): image = cv.imread(image_path) mask = cv.imread(mask_path, 0) features = color_moments(image, mask, color_space='ycrcb') features_list.append(features) logger.info(f"Time use: {time.time() - s2} s") logger.info("Data process ready...") # Resampling sm = SMOTE(sampling_strategy='all', random_state=2019) features_list, classes_list = sm.fit_resample(features_list, classes_list) X_train, X_test, y_train, y_test = train_test_split( features_list, classes_list, test_size=0.2, random_state=2019) clf = RandomForestClassifier(n_estimators=180, random_state=2019) y_pred = clf.fit(X_train, y_train).predict(X_test) joblib.dump(clf, 'skinColor.pkl') classify_report = classification_report(y_test, y_pred) logger.info('\n' + classify_report) np.set_printoptions(precision=2) plot_confusion_matrix(y_test, y_pred, classes=['0', '1', '2', '3', '4'], title='Confusion matrix') plt.show() ================================================ FILE: experiments/cal_pca.py ================================================ # color-auto-correlogram # https://blog.csdn.net/u013066730/article/details/53609859 from __future__ import print_function, division import numpy as np import cv2 as cv import sys import os import tqdm import time import csv import pandas as pd from sklearn import svm from sklearn.cluster import KMeans from sklearn.decomposition import PCA from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report from sklearn.ensemble import RandomForestClassifier import matplotlib.pyplot as plt import logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') from utils import * from imutils import paths class RGBHistogram(Histogram): '''RGB Histogram''' def __init__(self, bins): super().__init__(bins) def describe(self, image, mask): image = cv.cvtColor(image, cv.COLOR_BGR2RGB) hist = cv.calcHist([image], [0, 1, 2], mask, self.bins, [0, 256, 0, 256, 0, 256]) hist = hist / np.sum(hist) # 512 dimensions return hist.flatten() if __name__ == "__main__": logger = logging.getLogger(__name__) CLASSES = 5 K_ClUSTER = 15 images_list = [] masks_list = [] features_list = [] classes_list = [] s1 = time.time() for i in range(0, CLASSES): for imgpath in sorted(paths.list_images(str(i))): if os.path.splitext(imgpath)[-1] == '.jpg': images_list.append(imgpath) classes_list.append(int(i)) elif os.path.splitext(imgpath)[-1] == '.png': masks_list.append(imgpath) else: raise ValueError("type error...") s2 = time.time() logger.info(f"Time use: {s2 - s1} s") hist = RGBHistogram([8, 8, 8]) for image_path, mask_path in tqdm.tqdm(zip(images_list, masks_list)): image = cv.imread(image_path) mask = cv.imread(mask_path, 0) features = hist.describe(image, mask) features_list.append(features) logger.info(f"Time use: {time.time() - s2} s") logger.info("Data process ready...") assert len(features_list) == len(classes_list) # PCA Dimensionality Reduction pca = PCA(n_components=K_ClUSTER, random_state=2019) # pca.fit(features_list) # logger.info(pca.explained_variance_ratio_) newX = pca.fit_transform(features_list) X_train, X_test, y_train, y_test = train_test_split( newX, classes_list, test_size=0.2, random_state=2019) clf = RandomForestClassifier(n_estimators=180, random_state=2019) y_pred = clf.fit(X_train, y_train).predict(X_test) classify_report = classification_report(y_test, y_pred) logger.info('\n' + classify_report) np.set_printoptions(precision=2) plot_confusion_matrix(y_test, y_pred, classes=['0', '1', '2', '3', '4'], title='Confusion matrix') plt.show() ================================================ FILE: experiments/utils.py ================================================ import numpy as np import matplotlib.pyplot as plt from sklearn.metrics import confusion_matrix def plot_confusion_matrix(y_true, y_pred, classes, normalize=False, title=None, cmap=plt.cm.Blues): """ This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. """ if not title: if normalize: title = 'Normalized confusion matrix' else: title = 'Confusion matrix, without normalization' # Compute confusion matrix cm = confusion_matrix(y_true, y_pred) if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Normalized confusion matrix") else: print('Confusion matrix, without normalization') fig, ax = plt.subplots() im = ax.imshow(cm, interpolation='nearest', cmap=cmap) ax.figure.colorbar(im, ax=ax) # We want to show all ticks... ax.set(xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), # ... and label them with the respective list entries xticklabels=classes, yticklabels=classes, title=title, ylabel='True label', xlabel='Predicted label') # Rotate the tick labels and set their alignment. plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") # Loop over data dimensions and create text annotations. fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. for i in range(cm.shape[0]): for j in range(cm.shape[1]): ax.text(j, i, format(cm[i, j], fmt), ha="center", va="center", color="white" if cm[i, j] > thresh else "black") fig.tight_layout() return ax class Histogram: '''Histogram base class''' def __init__(self, bins): self.bins = bins def describe(self, image, mask): raise NotImplementedError ================================================ FILE: metric.py ================================================ import keras.backend as K import tensorflow as tf from keras.utils.generic_utils import get_custom_objects CLS_NUM = 2 # should be modified according to class number SMOOTH = K.epsilon() # https: // blog.csdn.net/majinlei121/article/details/78965435 def mean_iou(y_true, y_pred, cls_num=CLS_NUM): result = 0 nc = tf.cast(tf.shape(y_true)[-1], tf.float32) for i in range(cls_num): # nii = number of pixels of classe i predicted to belong to class i nii = tf.reduce_sum(tf.round(tf.multiply( y_true[:, :, :, i], y_pred[:, :, :, i]))) ti = tf.reduce_sum(y_true[:, :, :, i]) # number of pixels of class i loc_sum = 0 for j in range(cls_num): # number of pixels of classe j predicted to belong to class i nji = tf.reduce_sum(tf.round(tf.multiply( y_true[:, :, :, j], y_pred[:, :, :, i]))) loc_sum += nji result += nii / (ti - nii + loc_sum) return (1 / nc) * result def mean_accuracy(y_true, y_pred, cls_num=CLS_NUM): result = 0 nc = tf.cast(tf.shape(y_true)[-1], tf.float32) for i in range(cls_num): nii = tf.reduce_sum(tf.round(tf.multiply( y_true[:, :, :, i], y_pred[:, :, :, i]))) ti = tf.reduce_sum(y_true[:, :, :, i]) if ti != 0: result += (nii / ti) return (1 / nc) * result def frequency_weighted_iou(y_true, y_pred, cls_num=CLS_NUM): result = 0 for i in range(cls_num): nii = tf.reduce_sum(tf.round(tf.multiply( y_true[:, :, :, i], y_pred[:, :, :, i]))) ti = tf.reduce_sum(y_true[:, :, :, i]) loc_sum = 0 for j in range(cls_num): nji = tf.reduce_sum(tf.round(tf.multiply( y_true[:, :, :, j], y_pred[:, :, :, i]))) loc_sum += nji result += (loc_sum * nii) / (ti - nii + loc_sum) sum_ti = tf.reduce_sum(y_true[:, :, :, :]) return (1 / sum_ti) * result def pixel_accuracy(y_true, y_pred): # nii = number of pixels of classe i predicted to belong to class i sum_nii = tf.reduce_sum(tf.round(tf.multiply( y_true[:, :, :, :], y_pred[:, :, :, :]))) # ti = number of pixels of class i sum_ti = tf.reduce_sum(y_true[:, :, :, :]) return sum_nii / sum_ti get_custom_objects().update({ 'pixel_accuracy': pixel_accuracy, 'frequency_weighted_iou': frequency_weighted_iou, 'mean_accuracy': mean_accuracy, 'mean_iou': mean_iou }) ================================================ FILE: model/__init__.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2020/3/27 20:11 # @Author : JackyLUO # @E-mail : lingluo@stumail.neu.edu.cn # @Site : # @File : __init__.py # @Software: PyCharm ================================================ FILE: model/dfanet.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2020/3/27 19:56 # @Author : JackyLUO # @E-mail : lingluo@stumail.neu.edu.cn # @Site : # @File : dfanet.py # @Software: PyCharm from keras.layers import * from keras.models import Model import keras.backend as K def ConvBlock(inputs, n_filters, kernel_size=3, strides=1): """ Basic conv block for Encoder-Decoder Apply successivly Convolution, BatchNormalization, ReLU nonlinearity """ net = Conv2D(n_filters, kernel_size, strides=strides, padding='same', kernel_initializer='he_normal', use_bias=False)(inputs) net = BatchNormalization()(net) net = Activation('relu')(net) return net def separable_res_block_deep(inputs, nb_filters, filter_size=3, strides=1, dilation=1, ix=0): inputs = Activation('relu')(inputs) # , name=prefix + '_sepconv1_act' ip_nb_filter = K.get_variable_shape(inputs)[-1] if ip_nb_filter != nb_filters or strides != 1: residual = Conv2D(nb_filters, 1, strides=strides, use_bias=False)(inputs) residual = BatchNormalization()(residual) else: residual = inputs x = SeparableConv2D(nb_filters // 4, filter_size, dilation_rate=dilation, padding='same', use_bias=False, kernel_initializer='he_normal', )(inputs) x = BatchNormalization()(x) # name=prefix + '_sepconv1_bn' x = Activation('relu')(x) # , name=prefix + '_sepconv2_act' x = SeparableConv2D(nb_filters // 4, filter_size, dilation_rate=dilation, padding='same', use_bias=False, kernel_initializer='he_normal', )(x) x = BatchNormalization()(x) # name=prefix + '_sepconv2_bn' x = Activation('relu')(x) # , name=prefix + '_sepconv3_act' # if strides != 1: x = SeparableConv2D(nb_filters, filter_size, strides=strides, dilation_rate=dilation, padding='same', use_bias=False, )(x) x = BatchNormalization()(x) # name=prefix + '_sepconv3_bn' x = add([x, residual]) return x def encoder(inputs, nb_filters, stage): rep_nums = 0 if stage == 2 or stage == 4: rep_nums = 4 elif stage == 3: rep_nums = 6 x = separable_res_block_deep(inputs, nb_filters, strides=2) # , ix = rand_nb + stage * 10 for i in range(rep_nums - 1): x = separable_res_block_deep(x, nb_filters, strides=1) # , ix = rand_nb + stage * 10 + i return x def AttentionRefinementModule(inputs): # Global average pooling nb_channels = K.get_variable_shape(inputs)[-1] net = GlobalAveragePooling2D()(inputs) net = Reshape((1, nb_channels))(net) net = Conv1D(nb_channels, kernel_size=1, kernel_initializer='he_normal', )(net) net = BatchNormalization()(net) net = Activation('relu')(net) net = Conv1D(nb_channels, kernel_size=1, kernel_initializer='he_normal', )(net) net = BatchNormalization()(net) net = Activation('sigmoid')(net) # tf.sigmoid(net) net = Multiply()([inputs, net]) return net def xception_backbone(inputs, size_factor=2): x = Conv2D(8, kernel_size=3, strides=2, padding='same', use_bias=False)(inputs) x = BatchNormalization()(x) x = Activation('relu')(x) x = encoder(x, int(16 * size_factor), 2) x = encoder(x, int(32 * size_factor), 3) x = encoder(x, int(64 * size_factor), 4) x = AttentionRefinementModule(x) return x def DFANet(input_shape, cls_num=3, size_factor=2): img_input = Input(input_shape) x = Conv2D(8, kernel_size=5, strides=2, padding='same', use_bias=False)(img_input) x = BatchNormalization()(x) levela_input = Activation('relu')(x) enc2_a = encoder(levela_input, int(16 * size_factor), 2) enc3_a = encoder(enc2_a, int(32 * size_factor), 3) enc4_a = encoder(enc3_a, int(64 * size_factor), 4) enc_attend_a = AttentionRefinementModule(enc4_a) enc_upsample_a = UpSampling2D(size=4, interpolation='bilinear')(enc_attend_a) levelb_input = Concatenate()([enc2_a, enc_upsample_a]) enc2_b = encoder(levelb_input, int(16 * size_factor), 2) enc2_b_combine = Concatenate()([enc3_a, enc2_b]) enc3_b = encoder(enc2_b_combine, int(32 * size_factor), 3) enc3_b_combine = Concatenate()([enc4_a, enc3_b]) enc4_b = encoder(enc3_b_combine, int(64 * size_factor), 4) enc_attend_b = AttentionRefinementModule(enc4_b) enc_upsample_b = UpSampling2D(size=4, interpolation='bilinear')(enc_attend_b) levelc_input = Concatenate()([enc2_b, enc_upsample_b]) enc2_c = encoder(levelc_input, int(16 * size_factor), 2) enc2_c_combine = Concatenate()([enc3_b, enc2_c]) enc3_c = encoder(enc2_c_combine, int(32 * size_factor), 3) enc3_c_combine = Concatenate()([enc4_b, enc3_c]) enc4_c = encoder(enc3_c_combine, int(64 * size_factor), 4) enc_attend_c = AttentionRefinementModule(enc4_c) enc2_a_decoder = ConvBlock(enc2_a, 32, kernel_size=1) enc2_b_decoder = ConvBlock(enc2_b, 32, kernel_size=1) enc2_b_decoder = UpSampling2D(size=2, interpolation='bilinear')(enc2_b_decoder) enc2_c_decoder = ConvBlock(enc2_c, 32, kernel_size=1) enc2_c_decoder = UpSampling2D(size=4, interpolation='bilinear')(enc2_c_decoder) decoder_front = Add()([enc2_a_decoder, enc2_b_decoder, enc2_c_decoder]) decoder_front = ConvBlock(decoder_front, 32, kernel_size=1) att_a_decoder = ConvBlock(enc_attend_a, 32, kernel_size=1) att_a_decoder = UpSampling2D(size=4, interpolation='bilinear')(att_a_decoder) att_b_decoder = ConvBlock(enc_attend_b, 32, kernel_size=1) att_b_decoder = UpSampling2D(size=8, interpolation='bilinear')(att_b_decoder) att_c_decoder = ConvBlock(enc_attend_c, 32, kernel_size=1) att_c_decoder = UpSampling2D(size=16, interpolation='bilinear')(att_c_decoder) decoder_combine = Add()([decoder_front, att_a_decoder, att_b_decoder, att_c_decoder]) decoder_combine = ConvBlock(decoder_combine, cls_num * 2, kernel_size=1) decoder_final = UpSampling2D(size=4, interpolation='bilinear')(decoder_combine) output = Conv2D(cls_num, (1, 1), activation='softmax')(decoder_final) return Model(img_input, output, name='DFAnet') if __name__ == '__main__': from flops import get_flops model = DFANet(input_shape=(256, 256, 3), cls_num=3, size_factor=2) model.summary() get_flops(model) ================================================ FILE: model/enet.py ================================================ # !/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2020/3/27 17:41 # @Author : JackyLUO # @E-mail : lingluo@stumail.neu.edu.cn # @Site : # @File : enet.py # @Software: PyCharm # from keras.layers import * from keras.models import Model class Conv2DTransposeCustom(object): """Fixed output shape bug...""" def __init__(self, filters, kernel_size, strides=(1, 1), padding='same'): self.filters = filters self.kernel_size = kernel_size self.strides = strides self.padding = padding def __call__(self, layer): out = Conv2DTranspose(self.filters, self.kernel_size, strides=self.strides, padding=self.padding)(layer) if not isinstance(self.strides, tuple): self.strides = (self.strides, self.strides) out.set_shape((out.shape[0], layer.shape[1] * self.strides[0], layer.shape[2] * self.strides[1], out.shape[3])) return out def initial_block(inp, nb_filter=13, nb_row=3, nb_col=3, strides=(2, 2)): conv = Conv2D(nb_filter, (nb_row, nb_col), padding='same', strides=strides)(inp) max_pool = MaxPooling2D()(inp) merged = concatenate([conv, max_pool], axis=3) return merged def bottleneck(inp, output, internal_scale=4, asymmetric=0, dilated=0, downsample=False, dropout_rate=0.1): # main branch internal = output // internal_scale encoder = inp # 1x1 input_stride = 2 if downsample else 1 # the 1st 1x1 projection is replaced with a 2x2 convolution when downsampling encoder = Conv2D(internal, (input_stride, input_stride), # padding='same', strides=(input_stride, input_stride), use_bias=False)(encoder) # Batch normalization + PReLU encoder = BatchNormalization(momentum=0.1)(encoder) # enet uses momentum of 0.1, keras default is 0.99 encoder = PReLU(shared_axes=[1, 2])(encoder) # conv if not asymmetric and not dilated: encoder = Conv2D(internal, (3, 3), padding='same')(encoder) elif asymmetric: encoder = Conv2D(internal, (1, asymmetric), padding='same', use_bias=False)(encoder) encoder = Conv2D(internal, (asymmetric, 1), padding='same')(encoder) elif dilated: encoder = Conv2D(internal, (3, 3), dilation_rate=(dilated, dilated), padding='same')(encoder) else: raise (Exception('You shouldn\'t be here')) encoder = BatchNormalization(momentum=0.1)(encoder) # enet uses momentum of 0.1, keras default is 0.99 encoder = PReLU(shared_axes=[1, 2])(encoder) # 1x1 encoder = Conv2D(output, (1, 1), use_bias=False)(encoder) encoder = BatchNormalization(momentum=0.1)(encoder) # enet uses momentum of 0.1, keras default is 0.99 encoder = SpatialDropout2D(dropout_rate)(encoder) other = inp # other branch if downsample: other = MaxPooling2D()(other) other = Permute((1, 3, 2))(other) pad_feature_maps = output - inp.get_shape().as_list()[3] tb_pad = (0, 0) lr_pad = (0, pad_feature_maps) other = ZeroPadding2D(padding=(tb_pad, lr_pad))(other) other = Permute((1, 3, 2))(other) encoder = add([encoder, other]) encoder = PReLU(shared_axes=[1, 2])(encoder) return encoder def en_build(inp, dropout_rate=0.01): enet = initial_block(inp) enet = BatchNormalization(momentum=0.1)(enet) # enet_unpooling uses momentum of 0.1, keras default is 0.99 enet = PReLU(shared_axes=[1, 2])(enet) enet = bottleneck(enet, 64, downsample=True, dropout_rate=dropout_rate) # bottleneck 1.0 for _ in range(4): enet = bottleneck(enet, 64, dropout_rate=dropout_rate) # bottleneck 1.i enet = bottleneck(enet, 128, downsample=True) # bottleneck 2.0 # bottleneck 2.x and 3.x for _ in range(2): enet = bottleneck(enet, 128) # bottleneck 2.1 enet = bottleneck(enet, 128, dilated=2) # bottleneck 2.2 enet = bottleneck(enet, 128, asymmetric=5) # bottleneck 2.3 enet = bottleneck(enet, 128, dilated=4) # bottleneck 2.4 enet = bottleneck(enet, 128) # bottleneck 2.5 enet = bottleneck(enet, 128, dilated=8) # bottleneck 2.6 enet = bottleneck(enet, 128, asymmetric=5) # bottleneck 2.7 enet = bottleneck(enet, 128, dilated=16) # bottleneck 2.8 return enet # decoder def de_bottleneck(encoder, output, upsample=False, reverse_module=False): internal = output // 4 x = Conv2D(internal, (1, 1), use_bias=False)(encoder) x = BatchNormalization(momentum=0.1)(x) x = Activation('relu')(x) if not upsample: x = Conv2D(internal, (3, 3), padding='same', use_bias=True)(x) else: x = Conv2DTransposeCustom(filters=internal, kernel_size=(3, 3), strides=(2, 2), padding='same')(x) x = BatchNormalization(momentum=0.1)(x) x = Activation('relu')(x) x = Conv2D(output, (1, 1), padding='same', use_bias=False)(x) other = encoder if encoder.get_shape()[-1] != output or upsample: other = Conv2D(output, (1, 1), padding='same', use_bias=False)(other) other = BatchNormalization(momentum=0.1)(other) if upsample and reverse_module is not False: other = UpSampling2D(size=(2, 2))(other) if upsample and reverse_module is False: decoder = x else: x = BatchNormalization(momentum=0.1)(x) decoder = add([x, other]) decoder = Activation('relu')(decoder) return decoder def de_build(encoder, nc): enet = de_bottleneck(encoder, 64, upsample=True, reverse_module=True) # bottleneck 4.0 enet = de_bottleneck(enet, 64) # bottleneck 4.1 enet = de_bottleneck(enet, 64) # bottleneck 4.2 enet = de_bottleneck(enet, 16, upsample=True, reverse_module=True) # bottleneck 5.0 enet = de_bottleneck(enet, 16) # bottleneck 5.1 enet = Conv2DTransposeCustom(filters=nc, kernel_size=(2, 2), strides=(2, 2), padding='same')(enet) return enet def ENet(input_shape, cls_num=3): # Make sure the dimensions are multiples of 32 assert input_shape[0] % 32 == 0 assert input_shape[1] % 32 == 0 img_input = Input(input_shape) enet = en_build(img_input) enet = de_build(enet, cls_num) enet = Activation('softmax')(enet) return Model(img_input, enet) if __name__ == '__main__': from flops import get_flops model = ENet(input_shape=(256, 256, 3), cls_num=3) # model.summary() get_flops(model, True) ================================================ FILE: model/fast_scnn.py ================================================ import keras import tensorflow as tf def resize_image(image): return tf.image.resize_images(image, (256, 256)) class Fast_SCNN: def __init__(self, num_classes=3, input_shape=(256, 256, 3)): self.classes = num_classes self.input_shape = input_shape self.height = input_shape[0] self.width = input_shape[1] def conv_block(self, inputs, conv_type, kernel, kernel_size, strides, padding='same', relu=True): if conv_type == 'ds': x = keras.layers.SeparableConv2D(kernel, kernel_size, padding=padding, strides=strides)(inputs) else: x = keras.layers.Conv2D(kernel, kernel_size, padding=padding, strides=strides)(inputs) x = keras.layers.BatchNormalization()(x) if relu: x = keras.layers.ReLU()(x) return x def learning_to_downsample(self): # Input Layer self.input_layer = keras.layers.Input(shape=self.input_shape, name='input_layer') self.lds_layer = self.conv_block(self.input_layer, 'conv', 32, (3, 3), strides=(2, 2)) self.lds_layer = self.conv_block(self.lds_layer, 'ds', 48, (3, 3), strides=(2, 2)) self.lds_layer = self.conv_block(self.lds_layer, 'ds', 64, (3, 3), strides=(2, 2)) def global_feature_extractor(self): self.gfe_layer = self.bottleneck_block(self.lds_layer, 64, (3, 3), t=6, strides=2, n=3) self.gfe_layer = self.bottleneck_block(self.gfe_layer, 96, (3, 3), t=6, strides=2, n=3) self.gfe_layer = self.bottleneck_block(self.gfe_layer, 128, (3, 3), t=6, strides=1, n=3) # self.gfe_layer = self.pyramid_pooling_block(self.gfe_layer, [2, 4, 6, 8]) self.gfe_layer = self.pyramid_pooling_block(self.gfe_layer, [1, 2, 4]) def _res_bottleneck(self, inputs, filters, kernel, t, s, r=False): tchannel = keras.backend.int_shape(inputs)[-1] * t x = self.conv_block(inputs, 'conv', tchannel, (1, 1), strides=(1, 1)) x = keras.layers.DepthwiseConv2D(kernel, strides=(s, s), depth_multiplier=1, padding='same')(x) x = keras.layers.BatchNormalization()(x) x = keras.layers.ReLU()(x) x = self.conv_block(x, 'conv', filters, (1, 1), strides=(1, 1), padding='same', relu=False) if r: x = keras.layers.add([x, inputs]) return x def bottleneck_block(self, inputs, filters, kernel, t, strides, n): x = self._res_bottleneck(inputs, filters, kernel, t, strides) for i in range(1, n): x = self._res_bottleneck(x, filters, kernel, t, 1, True) return x def pyramid_pooling_block(self, input_tensor, bin_sizes): concat_list = [input_tensor] w = self.width // 32 h = self.height // 32 for bin_size in bin_sizes: x = keras.layers.AveragePooling2D(pool_size=(bin_size, bin_size), strides=(bin_size, bin_size))(input_tensor) x = keras.layers.Conv2D(128, (3, 3), strides=2, padding='same')(x) x = keras.layers.BatchNormalization()(x) x = keras.layers.ReLU()(x) x = keras.layers.UpSampling2D(size=(bin_size * 2, bin_size * 2))(x) concat_list.append(x) return keras.layers.concatenate(concat_list) def feature_fusion(self): ff_layer1 = self.conv_block(self.lds_layer, 'conv', 128, (1, 1), padding='same', strides=(1, 1), relu=False) ff_layer2 = keras.layers.UpSampling2D((4, 4))(self.gfe_layer) ff_layer2 = keras.layers.DepthwiseConv2D((3, 3), strides=(1, 1), depth_multiplier=1, padding='same')(ff_layer2) ff_layer2 = keras.layers.BatchNormalization()(ff_layer2) ff_layer2 = keras.layers.ReLU()(ff_layer2) ff_layer2 = keras.layers.Conv2D(128, (1, 1), strides=1, padding='same', activation=None)(ff_layer2) self.ff_final = keras.layers.add([ff_layer1, ff_layer2]) self.ff_final = keras.layers.BatchNormalization()(self.ff_final) self.ff_final = keras.layers.ReLU()(self.ff_final) def classifier(self): self.classifier = keras.layers.SeparableConv2D(128, (3, 3), padding='same', strides=(1, 1), name='DSConv1_classifier')(self.ff_final) self.classifier = keras.layers.BatchNormalization()(self.classifier) self.classifier = keras.layers.ReLU()(self.classifier) self.classifier = keras.layers.SeparableConv2D(128, (3, 3), padding='same', strides=(1, 1), name='DSConv2_classifier')(self.classifier) self.classifier = keras.layers.BatchNormalization()(self.classifier) self.classifier = keras.layers.ReLU()(self.classifier) self.classifier = self.conv_block(self.classifier, 'conv', self.classes, (1, 1), strides=(1, 1), padding='same', relu=False) self.classifier = keras.layers.Lambda(lambda image: resize_image(image), name='Resize')(self.classifier) self.classifier = keras.layers.Dropout(0.3)(self.classifier) def activation(self, activation='softmax'): x = keras.layers.Activation(activation, name=activation)(self.classifier) return x def model(self, activation='softmax'): self.learning_to_downsample() self.global_feature_extractor() self.feature_fusion() self.classifier() self.output_layer = self.activation(activation) model = keras.Model(inputs=self.input_layer, outputs=self.output_layer, name='Fast_SCNN') return model if __name__ == '__main__': from flops import get_flops model = Fast_SCNN(num_classes=3, input_shape=(256, 256, 3)).model() model.summary() get_flops(model) ================================================ FILE: model/flops.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2020/3/27 17:49 # @Author : JackyLUO # @E-mail : lingluo@stumail.neu.edu.cn # @Site : # @File : flops.py # @Software: PyCharm # https://github.com/ckyrkou/Keras_FLOP_Estimator import keras.backend as K def get_flops(model, table=False): if table: print('%25s | %16s | %16s | %16s | %16s | %6s | %6s' % ( 'Layer Name', 'Input Shape', 'Output Shape', 'Kernel Size', 'Filters', 'Strides', 'FLOPS')) print('-' * 170) t_flops = 0 t_macc = 0 for l in model.layers: o_shape, i_shape, strides, ks, filters = ['', '', ''], ['', '', ''], [1, 1], [0, 0], [0, 0] flops = 0 macc = 0 name = l.name factor = 1e9 if 'InputLayer' in str(l): i_shape = l.input.get_shape()[1:4].as_list() o_shape = i_shape if 'Reshape' in str(l): i_shape = l.input.get_shape()[1:4].as_list() o_shape = l.output.get_shape()[1:4].as_list() if 'Add' in str(l) or 'Maximum' in str(l) or 'Concatenate' in str(l): i_shape = l.input[0].get_shape()[1:4].as_list() + [len(l.input)] o_shape = l.output.get_shape()[1:4].as_list() flops = (len(l.input) - 1) * i_shape[0] * i_shape[1] * i_shape[2] if 'Average' in str(l) and 'pool' not in str(l): i_shape = l.input[0].get_shape()[1:4].as_list() + [len(l.input)] o_shape = l.output.get_shape()[1:4].as_list() flops = len(l.input) * i_shape[0] * i_shape[1] * i_shape[2] if 'BatchNormalization' in str(l): i_shape = l.input.get_shape()[1:4].as_list() o_shape = l.output.get_shape()[1:4].as_list() bflops = 1 for i in range(len(i_shape)): bflops *= i_shape[i] flops /= factor if 'Activation' in str(l) or 'activation' in str(l): i_shape = l.input.get_shape()[1:4].as_list() o_shape = l.output.get_shape()[1:4].as_list() bflops = 1 for i in range(len(i_shape)): bflops *= i_shape[i] flops /= factor if 'pool' in str(l) and ('Global' not in str(l)): i_shape = l.input.get_shape()[1:4].as_list() strides = l.strides ks = l.pool_size flops = ((i_shape[0] / strides[0]) * (i_shape[1] / strides[1]) * (ks[0] * ks[1] * i_shape[2])) if 'Flatten' in str(l): i_shape = l.input.shape[1:4].as_list() flops = 1 out_vec = 1 for i in range(len(i_shape)): flops *= i_shape[i] out_vec *= i_shape[i] o_shape = flops flops = 0 if 'Dense' in str(l): print(l.input) i_shape = l.input.shape[1:4].as_list()[0] if i_shape is None: i_shape = out_vec o_shape = l.output.shape[1:4].as_list() flops = 2 * (o_shape[0] * i_shape) macc = flops / 2 if 'Padding' in str(l): flops = 0 if 'Global' in str(l): i_shape = l.input.get_shape()[1:4].as_list() flops = ((i_shape[0]) * (i_shape[1]) * (i_shape[2])) o_shape = [l.output.get_shape()[1:4].as_list(), 1, 1] out_vec = o_shape if 'Conv2D' in str(l) and 'DepthwiseConv2D' not in str(l) and 'SeparableConv2D' not in str(l): strides = l.strides ks = l.kernel_size filters = l.filters # if 'Conv2DTranspose' in str(l): # i_shape = list(K.int_shape(l.input)[1:4]) # o_shape = list(K.int_shape(l.output)[1:4]) # else: i_shape = l.input.get_shape()[1:4].as_list() o_shape = l.output.get_shape()[1:4].as_list() if filters is None: filters = i_shape[2] flops = 2 * ((filters * ks[0] * ks[1] * i_shape[2]) * ( (i_shape[0] / strides[0]) * (i_shape[1] / strides[1]))) macc = flops / 2 if 'Conv2D' in str(l) and 'DepthwiseConv2D' in str(l) and 'SeparableConv2D' not in str(l): strides = l.strides ks = l.kernel_size filters = l.filters i_shape = l.input.get_shape()[1:4].as_list() o_shape = l.output.get_shape()[1:4].as_list() if filters is None: filters = i_shape[2] flops = 2 * ((ks[0] * ks[1] * i_shape[2]) * ((i_shape[0] / strides[0]) * ( i_shape[1] / strides[1]))) / factor macc = flops / 2 t_macc += macc t_flops += flops if table: print('%25s | %16s | %16s | %16s | %16s | %6s | %5.4f' % ( name, str(i_shape), str(o_shape), str(ks), str(filters), str(strides), flops)) t_flops = t_flops / factor print('Total FLOPS (x 10^-9): %10.8f G' % (t_flops)) print('Total MACCs: %10.8f\n' % (t_macc)) return ================================================ FILE: model/hlnet.py ================================================ # Fast-SCNN # HRNet # MobileNetv2-v3 # ASPP from keras.layers import * from keras.models import Model from keras.utils import plot_model import keras.backend as K def _conv_block(inputs, filters, kernel, strides=1, padding='same', use_activation=False): """Convolution Block This function defines a 2D convolution operation with BN and relu. # Arguments inputs: Tensor, input tensor of conv layer. filters: Integer, the dimensionality of the output space. kernel: An integer or tuple/list of 2 integers, specifying the width and height of the 2D convolution window. strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution along the width and height. Can be a single integer to specify the same value for all spatial dimensions. # Returns Output tensor. """ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 x = Conv2D(filters, kernel, padding=padding, strides=strides, use_bias=False)(inputs) x = BatchNormalization(axis=channel_axis)(x) if use_activation: x = Activation('relu')(x) return x def _bottleneck(inputs, filters, kernel, t, s, r=False): """Bottleneck This function defines a basic bottleneck structure. # Arguments inputs: Tensor, input tensor of conv layer. filters: Integer, the dimensionality of the output space. kernel: An integer or tuple/list of 2 integers, specifying the width and height of the 2D convolution window. t: Integer, expansion factor. t is always applied to the input size. s: An integer or tuple/list of 2 integers,specifying the strides of the convolution along the width and height.Can be a single integer to specify the same value for all spatial dimensions. r: Boolean, Whether to use the residuals. # Returns Output tensor. """ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 tchannel = K.int_shape(inputs)[channel_axis] * t x = _conv_block(inputs, tchannel, (1, 1)) x = DepthwiseConv2D(kernel, strides=( s, s), depth_multiplier=1, padding='same')(x) x = BatchNormalization(axis=channel_axis)(x) # relu6 x = ReLU(max_value=6)(x) x = Conv2D(filters, (1, 1), strides=(1, 1), padding='same')(x) x = BatchNormalization(axis=channel_axis)(x) if r: x = add([x, inputs]) return x def _inverted_residual_block(inputs, filters, kernel, t, strides, n): """Inverted Residual Block This function defines a sequence of 1 or more identical layers. # Arguments inputs: Tensor, input tensor of conv layer. filters: Integer, the dimensionality of the output space. kernel: An integer or tuple/list of 2 integers, specifying the width and height of the 2D convolution window. t: Integer, expansion factor. t is always applied to the input size. s: An integer or tuple/list of 2 integers,specifying the strides of the convolution along the width and height.Can be a single integer to specify the same value for all spatial dimensions. n: Integer, layer repeat times. # Returns Output tensor. """ x = _bottleneck(inputs, filters, kernel, t, strides) for i in range(1, n): x = _bottleneck(x, filters, kernel, t, 1, True) return x def _depthwise_separable_block(inputs, kernel, strides, padding='same', depth_multiplier=1): '''Depth separable point convolution module''' channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 x = DepthwiseConv2D(kernel_size=kernel, strides=strides, padding=padding, depth_multiplier=depth_multiplier)(inputs) x = BatchNormalization(axis=channel_axis)(x) return Activation('relu')(x) def HLNet(input_shape, cls_num=3): """Higt-Low Resolution Information fusion Network""" # input_shape: input image shape # cls_num: output class number inputs = Input(input_shape) # Step 1: Feature dimension drops to 1/4 x = _conv_block(inputs, 32, (3, 3), strides=2, use_activation=True) x = _depthwise_separable_block(x, (3, 3), strides=2, depth_multiplier=2) x = _depthwise_separable_block(x, (3, 3), strides=2) # step 2: x21 = _inverted_residual_block( x, 64, kernel=(3, 3), t=6, strides=1, n=3 ) x22 = _inverted_residual_block( x, 96, kernel=(3, 3), t=6, strides=2, n=3 ) x23 = _inverted_residual_block( x, 128, kernel=(3, 3), t=6, strides=4, n=3 ) # step 3: x31_t1 = x21 x31_t2 = UpSampling2D(interpolation='bilinear')( _conv_block(x22, 64, (1, 1), use_activation=True)) x31_t3 = UpSampling2D(size=(4, 4), interpolation='bilinear')( _conv_block(x23, 64, (1, 1), use_activation=True)) x31 = Add()([x31_t1, x31_t2, x31_t3]) x32_t1 = _conv_block(x21, 96, (1, 1), strides=2, use_activation=True) x32_t2 = _conv_block(x22, 96, (1, 1), use_activation=True) x32_t3 = UpSampling2D(interpolation='bilinear')( _conv_block(x23, 96, (1, 1), use_activation=True)) x32 = Add()([x32_t1, x32_t2, x32_t3]) x33_t1 = _conv_block(x21, 128, (1, 1), strides=4, use_activation=True) x33_t2 = _conv_block(x22, 128, (1, 1), strides=2, use_activation=True) x33_t3 = _conv_block(x23, 128, (1, 1), use_activation=True) x33 = Add()([x33_t1, x33_t2, x33_t3]) # step 4: x41 = _conv_block(x33, 96, (1, 1)) x42 = UpSampling2D(interpolation='bilinear')(x41) x43 = Concatenate()([x42, x32]) x44 = _conv_block(x43, 64, (1, 1)) x45 = UpSampling2D(interpolation='bilinear')(x44) x46 = Concatenate()([x45, x31]) # step 5: FFM module in BiSeNet x50 = _conv_block(x46, 64, (3, 3)) x51 = AveragePooling2D(pool_size=(1, 1))(x50) x52 = Conv2D(64, (1, 1), use_bias=False, activation='relu')(x51) x53 = Conv2D(64, (1, 1), use_bias=False, activation='sigmoid')(x52) x54 = Multiply()([x53, x50]) x55 = Add()([x50, x54]) # step6: x61 = Conv2D(32, (3, 3), padding='same', dilation_rate=2)(x55) x62 = Conv2D(32, (3, 3), padding='same', dilation_rate=4)(x55) x63 = Conv2D(32, (3, 3), padding='same', dilation_rate=8)(x55) x64 = Add()([x61, x62, x63]) # x61 = _conv_block(x62, cls_num, (1, 1), use_activation=False) x65 = UpSampling2D(size=(8, 8), interpolation='bilinear')(x64) x66 = _conv_block(x65, cls_num, (1, 1), use_activation=False) out = Activation('softmax')(x66) return Model(inputs, out) if __name__ == "__main__": from flops import get_flops # Testing network design model = HLNet(input_shape=(256, 256, 3), cls_num=3) model.summary() get_flops(model) ================================================ FILE: model/lednet.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2020/3/27 17:42 # @Author : JackyLUO # @E-mail : lingluo@stumail.neu.edu.cn # @Site : # @File : lednet.py # @Software: PyCharm from keras import layers, models import tensorflow as tf class LEDNet: def __init__(self, groups, classes, input_shape): self.groups = groups self.classes = classes self.input_shape = input_shape def ss_bt(self, x, dilation, strides=(1, 1), padding='same'): x1, x2 = self.channel_split(x) filters = (int(x.shape[-1]) // self.groups) x1 = layers.Conv2D(filters, kernel_size=(3, 1), strides=strides, padding=padding)(x1) x1 = layers.Activation('relu')(x1) x1 = layers.Conv2D(filters, kernel_size=(1, 3), strides=strides, padding=padding)(x1) x1 = layers.BatchNormalization()(x1) x1 = layers.Activation('relu')(x1) x1 = layers.Conv2D(filters, kernel_size=(3, 1), strides=strides, padding=padding, dilation_rate=(dilation, 1))( x1) x1 = layers.Activation('relu')(x1) x1 = layers.Conv2D(filters, kernel_size=(1, 3), strides=strides, padding=padding, dilation_rate=(1, dilation))( x1) x1 = layers.BatchNormalization()(x1) x1 = layers.Activation('relu')(x1) x2 = layers.Conv2D(filters, kernel_size=(1, 3), strides=strides, padding=padding)(x2) x2 = layers.Activation('relu')(x2) x2 = layers.Conv2D(filters, kernel_size=(3, 1), strides=strides, padding=padding)(x2) x2 = layers.BatchNormalization()(x2) x2 = layers.Activation('relu')(x2) x2 = layers.Conv2D(filters, kernel_size=(1, 3), strides=strides, padding=padding, dilation_rate=(1, dilation))( x2) x2 = layers.Activation('relu')(x2) x2 = layers.Conv2D(filters, kernel_size=(3, 1), strides=strides, padding=padding, dilation_rate=(dilation, 1))( x2) x2 = layers.BatchNormalization()(x2) x2 = layers.Activation('relu')(x2) x_concat = layers.concatenate([x1, x2], axis=-1) x_add = layers.add([x, x_concat]) output = self.channel_shuffle(x_add) return output def channel_shuffle(self, x): n, h, w, c = x.shape.as_list() x_reshaped = layers.Reshape([h, w, self.groups, int(c // self.groups)])(x) x_transposed = layers.Permute((1, 2, 4, 3))(x_reshaped) output = layers.Reshape([h, w, c])(x_transposed) return output def channel_split(self, x): def splitter(y): # keras Lambda saving bug!!! # x_left = layers.Lambda(lambda y: y[:, :, :, :int(int(y.shape[-1]) // self.groups)])(x) # x_right = layers.Lambda(lambda y: y[:, :, :, int(int(y.shape[-1]) // self.groups):])(x) # return x_left, x_right return tf.split(y, num_or_size_splits=self.groups, axis=-1) return layers.Lambda(lambda y: splitter(y))(x) def down_sample(self, x, filters): x_filters = int(x.shape[-1]) x_conv = layers.Conv2D(filters - x_filters, kernel_size=3, strides=(2, 2), padding='same')(x) x_pool = layers.MaxPool2D()(x) x = layers.concatenate([x_conv, x_pool], axis=-1) x = layers.BatchNormalization()(x) x = layers.Activation('relu')(x) return x def apn_module(self, x): def right(x): x = layers.AveragePooling2D()(x) x = layers.Conv2D(self.classes, kernel_size=1, padding='same')(x) x = layers.BatchNormalization()(x) x = layers.Activation('relu')(x) x = layers.UpSampling2D(interpolation='bilinear')(x) return x def conv(x, filters, kernel_size, stride): x = layers.Conv2D(filters, kernel_size=kernel_size, strides=(stride, stride), padding='same')(x) x = layers.BatchNormalization()(x) x = layers.Activation('relu')(x) return x x_7 = conv(x, int(x.shape[-1]), 7, stride=2) x_5 = conv(x_7, int(x.shape[-1]), 5, stride=2) x_3 = conv(x_5, int(x.shape[-1]), 3, stride=2) x_3_1 = conv(x_3, self.classes, 3, stride=1) x_3_1_up = layers.UpSampling2D(interpolation='bilinear')(x_3_1) x_5_1 = conv(x_5, self.classes, 5, stride=1) x_3_5 = layers.add([x_5_1, x_3_1_up]) x_3_5_up = layers.UpSampling2D(interpolation='bilinear')(x_3_5) x_7_1 = conv(x_7, self.classes, 3, stride=1) x_3_5_7 = layers.add([x_7_1, x_3_5_up]) x_3_5_7_up = layers.UpSampling2D(interpolation='bilinear')(x_3_5_7) x_middle = conv(x, self.classes, 1, stride=1) x_middle = layers.multiply([x_3_5_7_up, x_middle]) x_right = right(x) x_middle = layers.add([x_middle, x_right]) return x_middle def encoder(self, x): x = self.down_sample(x, filters=32) for _ in range(3): x = self.ss_bt(x, dilation=1) x = self.down_sample(x, filters=64) for _ in range(2): x = self.ss_bt(x, dilation=1) x = self.down_sample(x, filters=128) dilation_rate = [1, 2, 5, 9, 2, 5, 9, 17] for dilation in dilation_rate: x = self.ss_bt(x, dilation=dilation) return x def decoder(self, x): x = self.apn_module(x) x = layers.UpSampling2D(size=8, interpolation='bilinear')(x) x = layers.Conv2D(self.classes, kernel_size=3, padding='same')(x) x = layers.BatchNormalization()(x) x = layers.Activation('softmax')(x) return x def model(self): inputs = layers.Input(shape=self.input_shape) encoder_out = self.encoder(inputs) output = self.decoder(encoder_out) return models.Model(inputs, output) if __name__ == '__main__': from flops import get_flops model = LEDNet(2, 3, (256, 256, 3)).model() model.summary() get_flops(model) ================================================ FILE: model/mobilenet.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2020/3/27 17:43 # @Author : JackyLUO # @E-mail : lingluo@stumail.neu.edu.cn # @Site : # @File : mobilenet.py # @Software: PyCharm from keras.models import * from keras.layers import * def conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)): filters = int(filters * alpha) x = ZeroPadding2D(padding=(1, 1), name='conv1_pad')(inputs) x = Conv2D(filters, kernel, padding='valid', use_bias=False, strides=strides, name='conv1')(x) x = BatchNormalization(axis=3, name='conv1_bn')(x) return ReLU(6, name='conv1_relu')(x) def depthwise_conv_block(inputs, pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1), block_id=1): pointwise_conv_filters = int(pointwise_conv_filters * alpha) x = ZeroPadding2D((1, 1), name='conv_pad_%d' % block_id)(inputs) x = DepthwiseConv2D((3, 3), padding='valid', depth_multiplier=depth_multiplier, strides=strides, use_bias=False, name='conv_dw_%d' % block_id)(x) x = BatchNormalization(axis=3, name='conv_dw_%d_bn' % block_id)(x) x = ReLU(6, name='conv_dw_%d_relu' % block_id)(x) x = Conv2D(pointwise_conv_filters, (1, 1), padding='same', use_bias=False, strides=(1, 1), name='conv_pw_%d' % block_id)(x) x = BatchNormalization(axis=3, name='conv_pw_%d_bn' % block_id)(x) return ReLU(6, name='conv_pw_%d_relu' % block_id)(x) def MobileNet(input_shape, cls_num, alpha=0.5): inputs = Input(input_shape) x = conv_block(inputs, 16, alpha, strides=(2, 2)) x = depthwise_conv_block(x, 16, alpha, 6, block_id=1) f1 = x x = depthwise_conv_block(x, 32, alpha, 6, strides=(2, 2), block_id=2) x = depthwise_conv_block(x, 32, alpha, 6, block_id=3) f2 = x x = depthwise_conv_block(x, 64, alpha, 6, strides=(2, 2), block_id=4) x = depthwise_conv_block(x, 64, alpha, 6, block_id=5) f3 = x x = depthwise_conv_block(x, 128, alpha, 6, strides=(2, 2), block_id=6) x = depthwise_conv_block(x, 128, alpha, 6, block_id=7) x = depthwise_conv_block(x, 128, alpha, 6, block_id=8) x = depthwise_conv_block(x, 128, alpha, 6, block_id=9) x = depthwise_conv_block(x, 128, alpha, 6, block_id=10) x = depthwise_conv_block(x, 128, alpha, 6, block_id=11) o = x o = Conv2D(128, (3, 3), activation='relu', padding='same')(o) o = BatchNormalization()(o) # decode o = UpSampling2D((2, 2))(o) o = concatenate([o, f3], axis=-1) o = Conv2D(64, (3, 3), padding='same')(o) o = BatchNormalization()(o) o = UpSampling2D((2, 2))(o) o = concatenate([o, f2], axis=-1) o = Conv2D(32, (3, 3), padding='same')(o) o = BatchNormalization()(o) o = UpSampling2D((2, 2))(o) o = concatenate([o, f1], axis=-1) o = Conv2D(16, (3, 3), padding='same')(o) o = BatchNormalization()(o) o = Conv2D(cls_num, (3, 3), padding='same')(o) o = UpSampling2D((2, 2))(o) o = Activation('softmax')(o) return Model(inputs, o) if __name__ == '__main__': from flops import get_flops model = MobileNet(input_shape=(256, 256, 3), cls_num=3) model.summary() get_flops(model, True) ================================================ FILE: pipline_test.py ================================================ from __future__ import print_function, division from keras.models import load_model import numpy as np import time import cv2 as cv import os import sys import argparse from sklearn.externals import joblib import matplotlib.pyplot as plt from keras.applications.imagenet_utils import preprocess_input as pinput from keras import backend as K import tensorflow as tf tf.logging.set_verbosity(tf.logging.ERROR) from segmentation_models.backbones import get_preprocessing from model.hlnet import HLRNet from model.hrnet import HRNet from segmentation_models import PSPNet, Unet, FPN, Linknet from mtcnn.mtcnn import MTCNN from metric import * from imutils import paths IMG_SIZE = None def color_moments(image, mask, color_space): """ function: Color Moment Features image: raw image mask: image mask color_space: 'rgb' or 'lab' or 'ycrcb' or 'hsv' """ assert image.shape[:2] == mask.shape assert color_space.lower() in ['lab', 'rgb', 'ycrcb', 'hsv'] if color_space.lower() == 'rgb': image = cv.cvtColor(image, cv.COLOR_BGR2RGB) elif color_space.lower() == 'hsv': image = cv.cvtColor(image, cv.COLOR_BGR2HSV) elif color_space.lower() == 'lab': image = cv.cvtColor(image, cv.COLOR_BGR2LAB) elif color_space.lower() == 'ycrcb': image = cv.cvtColor(image, cv.COLOR_BGR2YCrCb) else: raise ValueError("Color space error...") # Split image channels info c1, c2, c3 = cv.split(image) color_feature = [] # Only process mask != 0 channel region c1 = c1[np.where(mask != 0)] c2 = c2[np.where(mask != 0)] c3 = c3[np.where(mask != 0)] # Extract mean mean_1 = np.mean(c1) mean_2 = np.mean(c2) mean_3 = np.mean(c3) # Extract variance variance_1 = np.std(c1) variance_2 = np.std(c2) variance_3 = np.std(c3) # Extract skewness skewness_1 = np.mean(np.abs(c1 - mean_1) ** 3) ** (1. / 3) skewness_2 = np.mean(np.abs(c1 - mean_2) ** 3) ** (1. / 3) skewness_3 = np.mean(np.abs(c1 - mean_3) ** 3) ** (1. / 3) color_feature.extend( [mean_1, mean_2, mean_3, variance_1, variance_2, variance_3, skewness_1, skewness_2, skewness_3]) return color_feature def _result_map_toimg(result_map): '''show result map''' img = np.zeros((IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8) argmax_id = np.argmax(result_map, axis=-1) background = (argmax_id == 0) skin = (argmax_id == 1) hair = (argmax_id == 2) img[:, :, 0] = np.where(background, 255, 0) img[:, :, 1] = np.where(skin, 255, 0) img[:, :, 2] = np.where(hair, 255, 0) return img def imcrop(img, x1, y1, x2, y2): if x1 < 0 or y1 < 0 or x2 > img.shape[1] or y2 > img.shape[0]: img, x1, x2, y1, y2 = pad_img_to_fit_bbox(img, x1, x2, y1, y2) return img[y1:y2, x1:x2, :] def pad_img_to_fit_bbox(img, x1, x2, y1, y2): img = cv.copyMakeBorder(img, - min(0, y1), max(y2 - img.shape[0], 0), -min(0, x1), max(x2 - img.shape[1], 0), cv.BORDER_REPLICATE) y2 += -min(0, y1) y1 += -min(0, y1) x2 += -min(0, x1) x1 += -min(0, x1) return img, x1, x2, y1, y2 if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--image_size", '-is', help="size of image", type=int, default=224) parser.add_argument("--backbone", '-bb', help="backbone of image", type=str, default='seresnet18') parser.add_argument("--model_path", '-mp', help="the path of model", type=str, default='./checkpoints/CelebA/HLNet/model-222-0.159.h5') parser.add_argument("--margin", help="margin of image", type=float, default=0.3) parser.add_argument('--use_design', action='store_false') args = parser.parse_args() IMG_SIZE = args.image_size MODEL_PATH = args.model_path BACKBONE = args.backbone USE_DESIGN = args.use_design detector = MTCNN() clf = joblib.load('./experiments/skinGrade/skinColor.pkl') model = load_model(MODEL_PATH, custom_objects={'mean_accuracy': mean_accuracy, 'mean_iou': mean_iou, 'frequency_weighted_iou': frequency_weighted_iou, 'pixel_accuracy': pixel_accuracy}) colorHue = ['Ivory white', 'Porcelain white', 'natural color', 'Yellowish', 'Black'] for img_path in paths.list_images("./data/Testing"): t = time.time() org_img = cv.imread(img_path) try: org_img.shape except: raise ValueError("Reading image error...") org_img_rgb = org_img[:, :, ::-1] # RGB detected = detector.detect_faces(org_img_rgb) if len(detected) != 1: print("[INFO] multi faces or no face...") continue d = detected[0]['box'] x1, y1, x2, y2, w, h = d[0], d[1], d[0] + d[2], d[1] + d[3], d[2], d[3] xw1 = int(x1 - args.margin * w) yw1 = int(y1 - args.margin * h) xw2 = int(x2 + args.margin * w) yw2 = int(y2 + args.margin * h) cropped_img = imcrop(org_img, xw1, yw1, xw2, yw2) o_h, o_w, _ = cropped_img.shape cropped_img_resize = cv.resize(cropped_img, (IMG_SIZE, IMG_SIZE)) img = cropped_img_resize[np.newaxis, :] # only subtract mean value img = pinput(img) result_map = model.predict(img)[0] mask = _result_map_toimg(result_map) mask = cv.resize(mask, (o_w, o_h)) # Face channel mask_face = mask[:, :, 1] features = color_moments(cropped_img, mask_face, color_space='ycrcb') features = np.array(features, np.float32)[np.newaxis, :] skinHue = colorHue[clf.predict(features)[0]] cv.rectangle(org_img, (x1, y1), (x2, y2), (255, 0, 0), 2) cv.putText(org_img, 'Color: {}'.format(skinHue), (x1, y1+30), cv.FONT_HERSHEY_PLAIN, 1, (0, 255, 0), 1) print(time.time() - t) # testing time cv.imshow("image", org_img) cv.waitKey(-1) ================================================ FILE: test.py ================================================ from __future__ import print_function, division from keras.models import load_model from keras.applications.imagenet_utils import preprocess_input as pinput import cv2 as cv import numpy as np import os import argparse from metric import * import glob from model.fast_scnn import resize_image from segmentation_models.losses import * import warnings warnings.filterwarnings('ignore') import tensorflow as tf tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) IMG_SIZE = None def vis_parsing_maps(im, parsing_anno, data_name): part_colors = [[255, 255, 255], [0, 255, 0], [255, 0, 0]] if data_name == 'figaro1k': part_colors = [[255, 255, 255], [255, 0, 0]] im = np.array(im) vis_im = im.copy().astype(np.uint8) vis_parsing_anno_color = np.zeros( (parsing_anno.shape[0], parsing_anno.shape[1], 3)) for pi in range(len(part_colors)): index = np.where(parsing_anno == pi) vis_parsing_anno_color[index[0], index[1], :] = part_colors[pi] vis_parsing_anno_color = vis_parsing_anno_color.astype(np.uint8) # Guided filter # vis_parsing_anno_color = cv.ximgproc.guidedFilter( # guide=vis_im, src=vis_parsing_anno_color, radius=4, eps=50, dDepth=-1) vis_im = cv.addWeighted(vis_im, 0.7, vis_parsing_anno_color, 0.3, 0) return vis_im if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--image_size", help="size of image", type=int, default=256) parser.add_argument("--model_path", help="the path of model", type=str, default='./weights/celebhair/exper/fastscnn/model.h5') args = parser.parse_args() IMG_SIZE = args.image_size MODEL_PATH = args.model_path if MODEL_PATH.split('/')[-2] == 'lednet': from model.lednet import LEDNet model = LEDNet(2, 3, (256, 256, 3)).model() model.load_weights(MODEL_PATH) else: model = load_model(MODEL_PATH, custom_objects={'mean_accuracy': mean_accuracy, 'mean_iou': mean_iou, 'frequency_weighted_iou': frequency_weighted_iou, 'pixel_accuracy': pixel_accuracy, 'categorical_crossentropy_plus_dice_loss': cce_dice_loss, 'resize_image': resize_image}) data_name = MODEL_PATH.split('/')[2] for img_path in glob.glob(os.path.join("./demo", data_name, "*.jpg")): img_basename = os.path.basename(img_path) name = os.path.splitext(img_basename)[0] org_img = cv.imread(img_path) try: h, w, _ = org_img.shape except: raise IOError("Reading image error...") img_resize = cv.resize(org_img, (IMG_SIZE, IMG_SIZE)) img = img_resize[np.newaxis, :] # pre-processing img = pinput(img) result_map = np.argmax(model.predict(img)[0], axis=-1) out = vis_parsing_maps(img_resize, result_map, data_name) out = cv.resize(out, (w, h), interpolation=cv.INTER_NEAREST) cv.imwrite(os.path.join("./demo", data_name, "{}.png").format(name), out) ================================================ FILE: train.py ================================================ import argparse from data_loader import HairGenerator from keras.callbacks import ModelCheckpoint, CSVLogger, TensorBoard, LearningRateScheduler import os import warnings from keras import optimizers from keras.regularizers import l2 from metric import * from segmentation_models.losses import * import numpy as np from albumentations import * from model.hlnet import HLNet from model.dfanet import DFANet from model.enet import ENet from model.lednet import LEDNet from model.mobilenet import MobileNet from model.fast_scnn import Fast_SCNN warnings.filterwarnings("ignore") os.environ["CUDA_VISIBLE_DEVICES"] = '0' import tensorflow as tf tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) parser = argparse.ArgumentParser() parser.add_argument("--batch_size", '-b', help="batch size", type=int, default=64) parser.add_argument("--image_size", '-i', help="image size", type=int, default=256) parser.add_argument("--backbone", '-bb', help="backbone of the network", type=str, default=None) parser.add_argument("--epoches", '-e', help="epoch size", type=int, default=150) parser.add_argument("--model_name", help="model's name", choices=['hlnet', 'fastscnn', 'lednet', 'dfanet', 'enet', 'mobilenet'], type=str, default='hlnet') parser.add_argument("--learning_rate", help="learning rate", type=float, default=2.5e-3) parser.add_argument("--checkpoints", help="where is the checkpoint", type=str, default='./weights') parser.add_argument("--class_number", help="number of output", type=int, default=3) parser.add_argument("--data_dir", help="path of dataset", type=str, default='./data/CelebA') args = parser.parse_args() def get_model(name): if name == 'hlnet': model = HLNet(input_shape=(IMG_SIZE, IMG_SIZE, 3), cls_num=CLS_NUM) elif name == 'fastscnn': model = Fast_SCNN(num_classes=CLS_NUM, input_shape=(IMG_SIZE, IMG_SIZE, 3)).model() elif name == 'lednet': model = LEDNet(groups=2, classes=CLS_NUM, input_shape=(IMG_SIZE, IMG_SIZE, 3)).model() elif name == 'dfanet': model = DFANet(input_shape=(IMG_SIZE, IMG_SIZE, 3), cls_num=CLS_NUM, size_factor=2) elif name == 'enet': model = ENet(input_shape=(IMG_SIZE, IMG_SIZE, 3), cls_num=CLS_NUM) elif name == 'mobilenet': model = MobileNet(input_shape=(IMG_SIZE, IMG_SIZE, 3), cls_num=CLS_NUM) else: raise NameError("No corresponding model...") return model class PolyDecay: '''Exponential decay strategy implementation''' def __init__(self, initial_lr, power, n_epochs): self.initial_lr = initial_lr self.power = power self.n_epochs = n_epochs def scheduler(self, epoch): return self.initial_lr * np.power(1.0 - 1.0 * epoch / self.n_epochs, self.power) def set_regularization(model, kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None): '''Parameter regularization processing to prevent model overfitting''' for layer in model.layers: if kernel_regularizer is not None and hasattr(layer, 'kernel_regularizer'): layer.kernel_regularizer = kernel_regularizer if bias_regularizer is not None and hasattr(layer, 'bias_regularizer'): layer.bias_regularizer = bias_regularizer if activity_regularizer is not None and hasattr(layer, 'activity_regularizer'): layer.activity_regularizer = activity_regularizer def main(): config = tf.ConfigProto() config.gpu_options.allow_growth = True session = tf.Session(config=config) global IMG_SIZE global CLS_NUM ROOT_DIR = args.data_dir BACKBONE = args.backbone BATCH_SIZE = args.batch_size IMG_SIZE = args.image_size EPOCHS = args.epoches LR = args.learning_rate CHECKPOINT = args.checkpoints CLS_NUM = args.class_number MODEL_NAME = args.model_name train_transformer = Compose([ # GaussNoise(p=0.2), ShiftScaleRotate( shift_limit=0.1, scale_limit=0.2, rotate_limit=20, p=0.5), HorizontalFlip(p=0.5), # HueSaturationValue(p=0.5), # RandomBrightnessContrast(0.5), # GridDistortion(distort_limit=0.2, p=0.5), Resize(height=IMG_SIZE, width=IMG_SIZE, always_apply=True), ]) val_transformer = Compose( [Resize(height=IMG_SIZE, width=IMG_SIZE, always_apply=True)]) train_generator = HairGenerator( train_transformer, ROOT_DIR, mode='Training', batch_size=BATCH_SIZE, nb_classes=CLS_NUM, backbone=BACKBONE, shuffle=True) val_generator = HairGenerator( val_transformer, ROOT_DIR, mode='Testing', batch_size=BATCH_SIZE, nb_classes=CLS_NUM, backbone=BACKBONE) # Loading models model = get_model(MODEL_NAME) set_regularization(model, kernel_regularizer=l2(2e-5)) model.compile(optimizer=optimizers.SGD(lr=LR, momentum=0.98), loss=cce_dice_loss, metrics=[mean_iou, frequency_weighted_iou, mean_accuracy, pixel_accuracy]) CHECKPOINT = CHECKPOINT + '/' + MODEL_NAME if not os.path.exists(CHECKPOINT): os.makedirs(CHECKPOINT) checkpoint = ModelCheckpoint(filepath=os.path.join(CHECKPOINT, 'model-{epoch:03d}.h5'), monitor='val_loss', save_best_only=True, verbose=1) tensorboard = TensorBoard(log_dir=os.path.join(CHECKPOINT, 'logs')) csvlogger = CSVLogger( os.path.join(CHECKPOINT, "result.csv")) lr_decay = LearningRateScheduler(PolyDecay(LR, 0.9, EPOCHS).scheduler) model.fit_generator( train_generator, len(train_generator), validation_data=val_generator, validation_steps=len(val_generator), epochs=EPOCHS, verbose=1, callbacks=[checkpoint, tensorboard, csvlogger, lr_decay] ) K.clear_session() if __name__ == '__main__': main()