Repository: hellochick/Indoor-segmentation Branch: master Commit: 8f204708e8e7 Files: 7 Total size: 44.9 KB Directory structure: gitextract_ubdnr53j/ ├── README.md ├── color150.mat ├── download_models.sh ├── inference.py ├── model.py ├── network.py └── requirements.txt ================================================ FILE CONTENTS ================================================ ================================================ FILE: README.md ================================================ # Indoor-segmentation ## Introduction This is an implementation of TensorFlow-based (TF1) DeepLab-ResNet for Indoor-scene segmentation. The provided model is trained on the [ade20k](http://sceneparsing.csail.mit.edu/) dataset. The code is inherited from [tensorflow-deeplab-resnet](https://github.com/DrSleep/tensorflow-deeplab-resnet) by [Drsleep](https://drsleep.github.io/). Since this model is for `robot navigating`, we `re-label 150 classes into 27 classes` in order to easily classify obstacles and road. ### Re-label list: ``` 1 (wall) <- 9(window), 15(door), 33(fence), 43(pillar), 44(sign board), 145(bullertin board) 4 (floor) <- 7(road), 14(ground, 30(field), 53(path), 55(runway) 5 (tree) <- 18(plant) 8 (furniture) <- 8(bed), 11(cabinet), 14(sofa), 16(table), 19(curtain), 20(chair), 25(shelf), 34(desk) 7 (stairs) <- 54(stairs) 26(others) <- class number larger than 26 ``` ## Quick Start ### Install dependency The codes are test on `Python 3.7`. Please run the following script to install the packages. ```bash pip install -r requirements.txt ``` ### Download pretrained model Run the following script to download the provided pretrained model from Google Drive. ```bash ./download_models.sh ``` Or directly get the pretrained model from [Google Drive](https://drive.google.com/file/d/1o7QrlNxH6BX6uYatlR06-A_cutWD9sNg/view?usp=sharing). ### Demo Run the following sample command for inference ``` python inference.py --img_path input/IMG_0416_640x480.png --restore_from=pretrained_models/ResNet101/ ``` ## Result ### Video [![Demo video](https://img.youtube.com/vi/4OqW3M-eqaQ/0.jpg)](https://youtu.be/4OqW3M-eqaQ) ### Image Input image | Output image :-------------------------:|:-------------------------: ![](https://github.com/hellochick/Indoor-segmentation/blob/master/input/IMG_0416_640x480.png) | ![](https://github.com/hellochick/Indoor-segmentation/blob/master/output/IMG_0416_640x480.png) ![](https://github.com/hellochick/Indoor-segmentation/blob/master/input/IMG_0417_640x480.png) | ![](https://github.com/hellochick/Indoor-segmentation/blob/master/output/IMG_0417_640x480.png) ![](https://github.com/hellochick/Indoor-segmentation/blob/master/input/IMG_0418_640x480.png) | ![](https://github.com/hellochick/Indoor-segmentation/blob/master/output/IMG_0418_640x480.png) ================================================ FILE: download_models.sh ================================================ gdown --fuzzy 1o7QrlNxH6BX6uYatlR06-A_cutWD9sNg unzip pretrained_models.zip ================================================ FILE: inference.py ================================================ from __future__ import print_function import argparse import os import sys import time import scipy.io as sio from PIL import Image import tensorflow as tf import numpy as np from model import DeepLabResNetModel IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32) NUM_CLASSES = 27 SAVE_DIR = './output/' RESTORE_PATH = './restore_weights/' matfn = 'color150.mat' def get_arguments(): parser = argparse.ArgumentParser(description="Indoor segmentation parser.") parser.add_argument("--img_path", type=str, default='', help="Path to the RGB image file.") parser.add_argument("--restore_from", type=str, default=RESTORE_PATH, help="checkpoint location") return parser.parse_args() def read_labelcolours(matfn): mat = sio.loadmat(matfn) color_table = mat['colors'] shape = color_table.shape color_list = [tuple(color_table[i]) for i in range(shape[0])] return color_list def decode_labels(mask, num_images=1, num_classes=150): label_colours = read_labelcolours(matfn) n, h, w, c = mask.shape assert(n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' % (n, num_images) outputs = np.zeros((num_images, h, w, 3), dtype=np.uint8) for i in range(num_images): img = Image.new('RGB', (len(mask[i, 0]), len(mask[i]))) pixels = img.load() for j_, j in enumerate(mask[i, :, :, 0]): for k_, k in enumerate(j): if k < num_classes: pixels[k_,j_] = label_colours[k] outputs[i] = np.array(img) return outputs def load(saver, sess, ckpt_path): saver.restore(sess, ckpt_path) print("Restored model parameters from {}".format(ckpt_path)) def main(): args = get_arguments() filename = args.img_path.split('/')[-1] file_type = filename.split('.')[-1] if os.path.isfile(args.img_path): print('successful load img: {0}'.format(args.img_path)) else: print('not found file: {0}'.format(args.img_path)) sys.exit(0) # Prepare image. if file_type.lower() == 'png': img = tf.image.decode_png(tf.read_file(args.img_path), channels=3) elif file_type.lower() == 'jpg': img = tf.image.decode_jpeg(tf.read_file(args.img_path), channels=3) else: print('cannot process {0} file.'.format(file_type)) # Convert RGB to BGR. img_r, img_g, img_b = tf.split(axis=2, num_or_size_splits=3, value=img) img = tf.cast(tf.concat(axis=2, values=[img_b, img_g, img_r]), dtype=tf.float32) # Extract mean. img -= IMG_MEAN # Create network. net = DeepLabResNetModel({'data': tf.expand_dims(img, dim=0)}, is_training=False, num_classes=NUM_CLASSES) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.layers['fc_out'] raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(img)[0:2,]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Set up TF session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Load weights. ckpt = tf.train.get_checkpoint_state(args.restore_from) if ckpt and ckpt.model_checkpoint_path: loader = tf.train.Saver(var_list=restore_var) load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1]) load(loader, sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found.') load_step = 0 # Perform inference. preds = sess.run(pred) msk = decode_labels(preds, num_classes=NUM_CLASSES) im = Image.fromarray(msk[0]) if not os.path.exists(SAVE_DIR): os.makedirs(SAVE_DIR) im.save(SAVE_DIR + filename) print('The output file has been saved to {0}'.format(SAVE_DIR + filename)) if __name__ == '__main__': main() ================================================ FILE: model.py ================================================ # Converted to TensorFlow .caffemodel # with the DeepLab-ResNet configuration. # The batch normalisation layer is provided by # the slim library (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim). from network import Network import tensorflow as tf class DeepLabResNetModel(Network): def setup(self, is_training, num_classes): '''Network definition. Args: is_training: whether to update the running mean and variance of the batch normalisation layer. If the batch size is small, it is better to keep the running mean and variance of the-pretrained model frozen. num_classes: number of classes to predict (including background). ''' (self.feed('data') .conv(7, 7, 64, 2, 2, biased=False, relu=False, name='conv1') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn_conv1') .max_pool(3, 3, 2, 2, name='pool1') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch1') .batch_normalization(is_training=is_training, activation_fn=None, name='bn2a_branch1')) (self.feed('pool1') .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2a_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2a_branch2a') .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2a_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2a_branch2b') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn2a_branch2c')) (self.feed('bn2a_branch1', 'bn2a_branch2c') .add(name='res2a') .relu(name='res2a_relu') .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2b_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2b_branch2a') .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2b_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2b_branch2b') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2b_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn2b_branch2c')) (self.feed('res2a_relu', 'bn2b_branch2c') .add(name='res2b') .relu(name='res2b_relu') .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2c_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2c_branch2a') .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2c_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2c_branch2b') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2c_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn2c_branch2c')) (self.feed('res2b_relu', 'bn2c_branch2c') .add(name='res2c') .relu(name='res2c_relu') .conv(1, 1, 512, 2, 2, biased=False, relu=False, name='res3a_branch1') .batch_normalization(is_training=is_training, activation_fn=None, name='bn3a_branch1')) (self.feed('res2c_relu') .conv(1, 1, 128, 2, 2, biased=False, relu=False, name='res3a_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3a_branch2a') .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3a_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3a_branch2b') .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3a_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn3a_branch2c')) (self.feed('bn3a_branch1', 'bn3a_branch2c') .add(name='res3a') .relu(name='res3a_relu') .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b1_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b1_branch2a') .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b1_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b1_branch2b') .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b1_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn3b1_branch2c')) (self.feed('res3a_relu', 'bn3b1_branch2c') .add(name='res3b1') .relu(name='res3b1_relu') .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b2_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b2_branch2a') .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b2_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b2_branch2b') .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b2_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn3b2_branch2c')) (self.feed('res3b1_relu', 'bn3b2_branch2c') .add(name='res3b2') .relu(name='res3b2_relu') .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b3_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b3_branch2a') .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b3_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b3_branch2b') .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b3_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn3b3_branch2c')) (self.feed('res3b2_relu', 'bn3b3_branch2c') .add(name='res3b3') .relu(name='res3b3_relu') .conv(1, 1, 1024, 2, 2, biased=False, relu=False, name='res4a_branch1') .batch_normalization(is_training=is_training, activation_fn=None, name='bn4a_branch1')) ### block 4 (self.feed('res3b3_relu') .conv(1, 1, 256, 2, 2, biased=False, relu=False, name='res4a_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4a_branch2a') .atrous_conv(3, 3, 256, 1, padding='SAME', biased=False, relu=False, name='res4a_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4a_branch2b') .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4a_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn4a_branch2c')) (self.feed('bn4a_branch1', 'bn4a_branch2c') .add(name='res4a') .relu(name='res4a_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b1_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b1_branch2a') .atrous_conv(3, 3, 256, 4, padding='SAME', biased=False, relu=False, name='res4b1_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b1_branch2b') .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b1_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b1_branch2c')) (self.feed('res4a_relu', 'bn4b1_branch2c') .add(name='res4b1') .relu(name='res4b1_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b2_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b2_branch2a') .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b2_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b2_branch2b') .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b2_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b2_branch2c')) ### block 4 ### block 5 (self.feed('res4b1_relu', 'bn4b2_branch2c') .add(name='res4b2') .relu(name='res4b2_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b3_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b3_branch2a') .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b3_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b3_branch2b') .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b3_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b3_branch2c')) (self.feed('res4b2_relu', 'bn4b3_branch2c') .add(name='res4b3') .relu(name='res4b3_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b4_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b4_branch2a') .atrous_conv(3, 3, 256, 4, padding='SAME', biased=False, relu=False, name='res4b4_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b4_branch2b') .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b4_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b4_branch2c')) (self.feed('res4b3_relu', 'bn4b4_branch2c') .add(name='res4b4') .relu(name='res4b4_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b5_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b5_branch2a') .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b5_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b5_branch2b') .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b5_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b5_branch2c')) ### block 5 ### block 6 (self.feed('res4b4_relu', 'bn4b5_branch2c') .add(name='res4b5') .relu(name='res4b5_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b6_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b6_branch2a') .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b6_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b6_branch2b') .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b6_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b6_branch2c')) (self.feed('res4b5_relu', 'bn4b6_branch2c') .add(name='res4b6') .relu(name='res4b6_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b7_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b7_branch2a') .atrous_conv(3, 3, 256, 4, padding='SAME', biased=False, relu=False, name='res4b7_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b7_branch2b') .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b7_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b7_branch2c')) (self.feed('res4b6_relu', 'bn4b7_branch2c') .add(name='res4b7') .relu(name='res4b7_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b8_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b8_branch2a') .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b8_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b8_branch2b') .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b8_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b8_branch2c')) ### block 6 ### block 7 (self.feed('res4b7_relu', 'bn4b8_branch2c') .add(name='res4b8') .relu(name='res4b8_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b9_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b9_branch2a') .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b9_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b9_branch2b') .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b9_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b9_branch2c')) (self.feed('res4b8_relu', 'bn4b9_branch2c') .add(name='res4b9') .relu(name='res4b9_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b10_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b10_branch2a') .atrous_conv(3, 3, 256, 4, padding='SAME', biased=False, relu=False, name='res4b10_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b10_branch2b') .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b10_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b10_branch2c')) (self.feed('res4b9_relu', 'bn4b10_branch2c') .add(name='res4b10') .relu(name='res4b10_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b11_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b11_branch2a') .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b11_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b11_branch2b') .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b11_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b11_branch2c')) ### block 7 (self.feed('res4b10_relu', 'bn4b11_branch2c') .add(name='res4b11') .relu(name='res4b11_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b12_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b12_branch2a') .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b12_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b12_branch2b') .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b12_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b12_branch2c')) (self.feed('res4b11_relu', 'bn4b12_branch2c') .add(name='res4b12') .relu(name='res4b12_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b13_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b13_branch2a') .atrous_conv(3, 3, 256, 4, padding='SAME', biased=False, relu=False, name='res4b13_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b13_branch2b') .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b13_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b13_branch2c')) (self.feed('res4b12_relu', 'bn4b13_branch2c') .add(name='res4b13') .relu(name='res4b13_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b14_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b14_branch2a') .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b14_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b14_branch2b') .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b14_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b14_branch2c')) (self.feed('res4b13_relu', 'bn4b14_branch2c') .add(name='res4b14') .relu(name='res4b14_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b15_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b15_branch2a') .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b15_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b15_branch2b') .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b15_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b15_branch2c')) (self.feed('res4b14_relu', 'bn4b15_branch2c') .add(name='res4b15') .relu(name='res4b15_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b16_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b16_branch2a') .atrous_conv(3, 3, 256, 4, padding='SAME', biased=False, relu=False, name='res4b16_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b16_branch2b') .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b16_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b16_branch2c')) (self.feed('res4b15_relu', 'bn4b16_branch2c') .add(name='res4b16') .relu(name='res4b16_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b17_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b17_branch2a') .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b17_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b17_branch2b') .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b17_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b17_branch2c')) (self.feed('res4b16_relu', 'bn4b17_branch2c') .add(name='res4b17') .relu(name='res4b17_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b18_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b18_branch2a') .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b18_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b18_branch2b') .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b18_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b18_branch2c')) (self.feed('res4b17_relu', 'bn4b18_branch2c') .add(name='res4b18') .relu(name='res4b18_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b19_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b19_branch2a') .atrous_conv(3, 3, 256, 4, padding='SAME', biased=False, relu=False, name='res4b19_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b19_branch2b') .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b19_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b19_branch2c')) (self.feed('res4b18_relu', 'bn4b19_branch2c') .add(name='res4b19') .relu(name='res4b19_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b20_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b20_branch2a') .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b20_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b20_branch2b') .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b20_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b20_branch2c')) (self.feed('res4b19_relu', 'bn4b20_branch2c') .add(name='res4b20') .relu(name='res4b20_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b21_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b21_branch2a') .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b21_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b21_branch2b') .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b21_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b21_branch2c')) (self.feed('res4b20_relu', 'bn4b21_branch2c') .add(name='res4b21') .relu(name='res4b21_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b22_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b22_branch2a') .atrous_conv(3, 3, 256, 4, padding='SAME', biased=False, relu=False, name='res4b22_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b22_branch2b') .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b22_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b22_branch2c')) (self.feed('res4b21_relu', 'bn4b22_branch2c') .add(name='res4b22') .relu(name='res4b22_relu') .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5a_branch1') .batch_normalization(is_training=is_training, activation_fn=None, name='bn5a_branch1')) (self.feed('res4b22_relu') .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5a_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5a_branch2a') .atrous_conv(3, 3, 512, 4, padding='SAME', biased=False, relu=False, name='res5a_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5a_branch2b') .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5a_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn5a_branch2c')) (self.feed('bn5a_branch1', 'bn5a_branch2c') .add(name='res5a') .relu(name='res5a_relu') .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5b_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5b_branch2a') .atrous_conv(3, 3, 512, 8, padding='SAME', biased=False, relu=False, name='res5b_branch2b') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5b_branch2b') .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5b_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn5b_branch2c')) (self.feed('res5a_relu', 'bn5b_branch2c') .add(name='res5b') .relu(name='res5b_relu') .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5c_branch2a') .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5c_branch2a') .atrous_conv(3, 3, 512, 16, padding='SAME', biased=False, relu=False, name='res5c_branch2b') .batch_normalization(activation_fn=tf.nn.relu, name='bn5c_branch2b', is_training=is_training) .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5c_branch2c') .batch_normalization(is_training=is_training, activation_fn=None, name='bn5c_branch2c')) (self.feed('res5b_relu', 'bn5c_branch2c') .add(name='res5c') .relu(name='res5c_relu') .atrous_conv(3, 3, 256, 6, padding='SAME', relu=False, name='fc1_voc12_c0') .batch_normalization(is_training=is_training, activation_fn=None, name='fc1_voc12_c0_bn')) (self.feed('res5c_relu') .atrous_conv(3, 3, 256, 12, padding='SAME', relu=False, name='fc1_voc12_c1') .batch_normalization(is_training=is_training, activation_fn=None, name='fc1_voc12_c1_bn')) (self.feed('res5c_relu') .atrous_conv(3, 3, 256, 18, padding='SAME', relu=False, name='fc1_voc12_c2') .batch_normalization(is_training=is_training, activation_fn=None, name='fc1_voc12_c2_bn')) (self.feed('res5c_relu') .atrous_conv(1, 1, 256, 1, padding='SAME', relu=False, name='fc1_voc12_c3') .batch_normalization(is_training=is_training, activation_fn=None, name='fc1_voc12_c3_bn')) (self.feed('fc1_voc12_c0_bn', 'fc1_voc12_c1_bn', 'fc1_voc12_c2_bn', 'fc1_voc12_c3_bn') .add(name='fc1_voc12') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='fc_oooo') .batch_normalization(is_training=is_training, activation_fn=None, name='fc_oooo_bn') .conv(1, 1, num_classes, 1, 1, biased=False, relu=False, name='fc_out')) ================================================ FILE: network.py ================================================ import numpy as np import tensorflow as tf slim = tf.contrib.slim DEFAULT_PADDING = 'SAME' def layer(op): '''Decorator for composable network layers.''' def layer_decorated(self, *args, **kwargs): # Automatically set a name if not provided. name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) # Figure out the layer inputs. if len(self.terminals) == 0: raise RuntimeError('No input variables found for layer %s.' % name) elif len(self.terminals) == 1: layer_input = self.terminals[0] else: layer_input = list(self.terminals) # Perform the operation and get the output. layer_output = op(self, layer_input, *args, **kwargs) # Add to layer LUT. self.layers[name] = layer_output # This output is now the input for the next layer. self.feed(layer_output) # Return self for chained calls. return self return layer_decorated class Network(object): def __init__(self, inputs, trainable=True, is_training=False, num_classes=21): # The input nodes for this network self.inputs = inputs # The current list of terminal nodes self.terminals = [] # Mapping from layer names to layers self.layers = dict(inputs) # If true, the resulting variables are set as trainable self.trainable = trainable # Switch variable for dropout self.use_dropout = tf.placeholder_with_default(tf.constant(1.0), shape=[], name='use_dropout') self.setup(is_training, num_classes) def setup(self, is_training): '''Construct the network. ''' raise NotImplementedError('Must be implemented by the subclass.') def load(self, data_path, session, ignore_missing=False): '''Load network weights. data_path: The path to the numpy-serialized network weights session: The current TensorFlow session ignore_missing: If true, serialized weights for missing layers are ignored. ''' data_dict = np.load(data_path).item() for op_name in data_dict: with tf.variable_scope(op_name, reuse=True): for param_name, data in data_dict[op_name].iteritems(): try: var = tf.get_variable(param_name) session.run(var.assign(data)) except ValueError: if not ignore_missing: raise def feed(self, *args): '''Set the input(s) for the next operation by replacing the terminal nodes. The arguments can be either layer names or the actual layers. ''' assert len(args) != 0 self.terminals = [] for fed_layer in args: if isinstance(fed_layer, str): try: fed_layer = self.layers[fed_layer] except KeyError: raise KeyError('Unknown layer name fed: %s' % fed_layer) self.terminals.append(fed_layer) return self def get_output(self): '''Returns the current network output.''' return self.terminals[-1] def get_unique_name(self, prefix): '''Returns an index-suffixed unique name for the given prefix. This is used for auto-generating layer names based on the type-prefix. ''' ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 return '%s_%d' % (prefix, ident) def make_var(self, name, shape): '''Creates a new TensorFlow variable.''' return tf.get_variable(name, shape, trainable=self.trainable) def validate_padding(self, padding): '''Verifies that the padding is one of the supported ones.''' assert padding in ('SAME', 'VALID') @layer def conv(self, input, k_h, k_w, c_o, s_h, s_w, name, relu=True, padding=DEFAULT_PADDING, group=1, biased=True): # Verify that the padding is acceptable self.validate_padding(padding) # Get the number of channels in the input c_i = input.get_shape()[-1] # Verify that the grouping parameter is valid assert c_i % group == 0 assert c_o % group == 0 # Convolution for a given input and kernel convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) with tf.variable_scope(name) as scope: kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o]) if group == 1: # This is the common-case. Convolve the input without any further complications. output = convolve(input, kernel) else: # Split the input into groups and then convolve each of them independently input_groups = tf.split(3, group, input) kernel_groups = tf.split(3, group, kernel) output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)] # Concatenate the groups output = tf.concat(3, output_groups) # Add the biases if biased: biases = self.make_var('biases', [c_o]) output = tf.nn.bias_add(output, biases) if relu: # ReLU non-linearity output = tf.nn.relu(output, name=scope.name) return output @layer def atrous_conv(self, input, k_h, k_w, c_o, dilation, name, relu=True, padding=DEFAULT_PADDING, group=1, biased=True): # Verify that the padding is acceptable self.validate_padding(padding) # Get the number of channels in the input c_i = input.get_shape()[-1] # Verify that the grouping parameter is valid assert c_i % group == 0 assert c_o % group == 0 # Convolution for a given input and kernel convolve = lambda i, k: tf.nn.atrous_conv2d(i, k, dilation, padding=padding) with tf.variable_scope(name) as scope: kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o]) if group == 1: # This is the common-case. Convolve the input without any further complications. output = convolve(input, kernel) else: # Split the input into groups and then convolve each of them independently input_groups = tf.split(3, group, input) kernel_groups = tf.split(3, group, kernel) output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)] # Concatenate the groups output = tf.concat(3, output_groups) # Add the biases if biased: biases = self.make_var('biases', [c_o]) output = tf.nn.bias_add(output, biases) if relu: # ReLU non-linearity output = tf.nn.relu(output, name=scope.name) return output @layer def relu(self, input, name): return tf.nn.relu(input, name=name) @layer def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): self.validate_padding(padding) return tf.nn.max_pool(input, ksize=[1, k_h, k_w, 1], strides=[1, s_h, s_w, 1], padding=padding, name=name) @layer def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): self.validate_padding(padding) return tf.nn.avg_pool(input, ksize=[1, k_h, k_w, 1], strides=[1, s_h, s_w, 1], padding=padding, name=name) @layer def lrn(self, input, radius, alpha, beta, name, bias=1.0): return tf.nn.local_response_normalization(input, depth_radius=radius, alpha=alpha, beta=beta, bias=bias, name=name) @layer def concat(self, inputs, axis, name): return tf.concat(axis=axis, values=inputs, name=name) @layer def add(self, inputs, name): return tf.add_n(inputs, name=name) @layer def fc(self, input, num_out, name, relu=True): with tf.variable_scope(name) as scope: input_shape = input.get_shape() if input_shape.ndims == 4: # The input is spatial. Vectorize it first. dim = 1 for d in input_shape[1:].as_list(): dim *= d feed_in = tf.reshape(input, [-1, dim]) else: feed_in, dim = (input, input_shape[-1].value) weights = self.make_var('weights', shape=[dim, num_out]) biases = self.make_var('biases', [num_out]) op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b fc = op(feed_in, weights, biases, name=scope.name) return fc @layer def softmax(self, input, name): input_shape = map(lambda v: v.value, input.get_shape()) if len(input_shape) > 2: # For certain models (like NiN), the singleton spatial dimensions # need to be explicitly squeezed, since they're not broadcast-able # in TensorFlow's NHWC ordering (unlike Caffe's NCHW). if input_shape[1] == 1 and input_shape[2] == 1: input = tf.squeeze(input, squeeze_dims=[1, 2]) else: raise ValueError('Rank 2 tensor input expected for softmax!') return tf.nn.softmax(input, name) @layer def batch_normalization(self, input, name, is_training, activation_fn=None, scale=True): with tf.variable_scope(name) as scope: output = slim.batch_norm( input, activation_fn=activation_fn, is_training=is_training, updates_collections=None, scale=scale, scope=scope) return output @layer def dropout(self, input, keep_prob, name): keep = 1 - self.use_dropout + (self.use_dropout * keep_prob) return tf.nn.dropout(input, keep, name=name) @layer def resize_bilinear(self, input, size, name): return tf.image.resize_bilinear(input, size=size, name=name) @layer def global_average_pooling(self, input, name): """ shape = input.get_shape().as_list() _mean = tf.reduce_mean(input, [1, 2], name='GAP', keep_dims=True) _conv = self.conv(_mean, 1, 1, 256, 1, 1, biased=False, relu=False, name='GAP_conv') _bn = self.batch_normalization(_conv, is_training=is_training, activation_fn=None, name='GAP_conv_bn') _resize = tf.image.resize_bilinear(_bn, size=shape, name=name) """ return tf.reduce_mean(input, [1, 2], name=name, keep_dims=True) ================================================ FILE: requirements.txt ================================================ tensorflow-gpu==1.15.0 scipy==1.7.3 pillow==9.1.0 gdown