Repository: xie9187/Monocular-Obstacle-Avoidance Branch: master Commit: ebb84b9e4f75 Files: 16 Total size: 163.3 KB Directory structure: gitextract_2nhjbmq_/ ├── D3QN/ │ ├── D3QN_testing.py │ ├── D3QN_training.py │ ├── GazeboWorld.py │ ├── RealWorld.py │ ├── models/ │ │ ├── __init__.py │ │ ├── fcrn.py │ │ └── network.py │ └── world/ │ └── SquareWorld.world ├── Depth/ │ ├── DataPreprocess.py │ └── FCRN/ │ ├── models/ │ │ ├── __init__.py │ │ ├── fcrn.py │ │ └── network.py │ ├── testing.py │ └── training.py ├── LICENSE └── README.md ================================================ FILE CONTENTS ================================================ ================================================ FILE: D3QN/D3QN_testing.py ================================================ from __future__ import print_function from RealWorld import RealWorld import tensorflow as tf import random import numpy as np import time import rospy import models import cv2 ACTIONS = 7 # number of valid actions SPEED = 2 # DoF of speed GAMMA = 0.99 # decay rate of past observations OBSERVE = 10. # timesteps to observe before training EXPLORE = 20000. # frames over which to anneal epsilon FINAL_EPSILON = 0.0001 # final value of epsilon INITIAL_EPSILON = 0.001 # starting value of epsilon REPLAY_MEMORY = 50000 # number of previous transitions to remember BATCH = 8 # size of minibatch MAX_EPISODE = 20000 DEPTH_IMAGE_WIDTH = 160 DEPTH_IMAGE_HEIGHT = 128 RGB_IMAGE_HEIGHT = 228 RGB_IMAGE_WIDTH = 304 CHANNEL = 3 TAU = 0.001 # Rate to update target network toward primary network H_SIZE = 8*10*64 IMAGE_HIST = 4 def variable_summaries(var): """Attach a lot of summaries to a Tensor (for TensorBoard visualization).""" with tf.name_scope('summaries'): mean = tf.reduce_mean(var) tf.summary.scalar('mean', mean) with tf.name_scope('stddev'): stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) tf.summary.scalar('stddev', stddev) tf.summary.scalar('max', tf.reduce_max(var)) tf.summary.scalar('min', tf.reduce_min(var)) tf.summary.histogram('histogram', var) def weight_variable(shape): initial = tf.truncated_normal(shape, stddev = 0.01) return tf.Variable(initial, name="weights") def bias_variable(shape): initial = tf.constant(0., shape = shape) return tf.Variable(initial, name="bias") def conv2d(x, W, stride_h, stride_w): return tf.nn.conv2d(x, W, strides = [1, stride_h, stride_w, 1], padding = "SAME") class QNetwork(object): """docstring for ClassName""" def __init__(self, sess, depth_predict): # network weights # input 128x160x1 with tf.name_scope("Conv1"): W_conv1 = weight_variable([10, 14, IMAGE_HIST, 32]) variable_summaries(W_conv1) b_conv1 = bias_variable([32]) # 16x20x32 with tf.name_scope("Conv2"): W_conv2 = weight_variable([4, 4, 32, 64]) variable_summaries(W_conv2) b_conv2 = bias_variable([64]) # 8x10x64 with tf.name_scope("Conv3"): W_conv3 = weight_variable([3, 3, 64, 64]) variable_summaries(W_conv3) b_conv3 = bias_variable([64]) # 8x10x64 with tf.name_scope("FCValue"): W_value = weight_variable([H_SIZE, 512]) variable_summaries(W_value) b_value = bias_variable([512]) # variable_summaries(b_ob_value) with tf.name_scope("FCAdv"): W_adv = weight_variable([H_SIZE, 512]) variable_summaries(W_adv) b_adv = bias_variable([512]) # variable_summaries(b_adv) with tf.name_scope("FCValueOut"): W_value_out = weight_variable([512, 1]) variable_summaries(W_value_out) b_value_out = bias_variable([1]) # variable_summaries(b_ob_value_out) with tf.name_scope("FCAdvOut"): W_adv_out = weight_variable([512, ACTIONS]) variable_summaries(W_adv_out) b_adv_out = bias_variable([ACTIONS]) # variable_summaries(b_ob_adv_out) # input layer self.state = tf.placeholder("float", [None, DEPTH_IMAGE_HEIGHT, DEPTH_IMAGE_WIDTH, IMAGE_HIST]) # Conv1 layer h_conv1 = tf.nn.relu(conv2d(self.state, W_conv1, 8, 8) + b_conv1) # print('conv1:', h_conv1) # Conv2 layer h_conv2 = tf.nn.relu(conv2d(h_conv1, W_conv2, 2, 2) + b_conv2) # print('conv2:', h_conv2) # Conv2 layer h_conv3 = tf.nn.relu(conv2d(h_conv2, W_conv3, 1, 1) + b_conv3) # print('conv3:', h_conv3) h_conv3_flat = tf.reshape(h_conv3, [-1, H_SIZE]) # FC ob value layer h_fc_value = tf.nn.relu(tf.matmul(h_conv3_flat, W_value) + b_value) value = tf.matmul(h_fc_value, W_value_out) + b_value_out # FC ob adv layer h_fc_adv = tf.nn.relu(tf.matmul(h_conv3_flat, W_adv) + b_adv) advantage = tf.matmul(h_fc_adv, W_adv_out) + b_adv_out # Q = value + (adv - advAvg) advAvg = tf.expand_dims(tf.reduce_mean(advantage, axis=1), axis=1) advIdentifiable = tf.subtract(advantage, advAvg) self.readout = tf.add(value, advIdentifiable) # define the ob cost function self.a = tf.placeholder("float", [None, ACTIONS]) self.y = tf.placeholder("float", [None]) self.readout_action = tf.reduce_sum(tf.multiply(self.readout, self.a), axis=1) self.td_error = tf.square(self.y - self.readout_action) self.cost = tf.reduce_mean(self.td_error) self.train_step = tf.train.AdamOptimizer(1e-4).minimize(self.cost) def updateTargetGraph(tfVars,tau): total_vars = len(tfVars) op_holder = [] for idx,var in enumerate(tfVars[0:total_vars/2]): op_holder.append(tfVars[idx+total_vars/2].assign((var.value()*tau) + ((1-tau)*tfVars[idx+total_vars/2].value()))) return op_holder def updateTarget(op_holder,sess): for op in op_holder: sess.run(op) def TestNetwork(): sess = tf.InteractiveSession() # define depth_net depth_state = tf.placeholder("float", [None, RGB_IMAGE_HEIGHT, RGB_IMAGE_WIDTH, CHANNEL]) depth_net = models.ResNet50UpProj({'data': depth_state}, 1, 1, True) depth_predict = tf.divide(depth_net.get_output(), 5.) depth_net_var = tf.trainable_variables() print(" [*] printing DepthNet variables") for idx, v in enumerate(depth_net_var): print(" var {:3}: {:15} {}".format(idx, str(v.get_shape()), v.name)) depth_net_saver = tf.train.Saver(depth_net_var, max_to_keep=1) # define q network with tf.device("/cpu:0"): with tf.name_scope("OnlineNetwork"): online_net = QNetwork(sess, depth_predict) with tf.name_scope("TargetNetwork"): target_net = QNetwork(sess, depth_predict) rospy.sleep(1.) reward_var = tf.Variable(0., trainable=False) reward_epi = tf.summary.scalar('reward', reward_var) # define summary merged_summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter('./logs', sess.graph) # Initialize the World and variables env = RealWorld() print('Environment initialized') # init_op = tf.global_variables_initializer() # sess.run(init_op) # get the first state rgb_img_t1 = env.GetRGBImageObservation() terminal = False # Loading depth network depth_checkpoint = tf.train.get_checkpoint_state('saved_networks/depth') if depth_checkpoint and depth_checkpoint.model_checkpoint_path: print('Loading from checkpoint:', depth_checkpoint) depth_net_saver.restore(sess, depth_checkpoint.model_checkpoint_path) print("Depth network model loaded:", depth_checkpoint.model_checkpoint_path) # saving and loading Q networks episode = 0 q_net_params = [] # Find variables of q network for variable in tf.trainable_variables(): variable_name = variable.name if variable_name.find('OnlineNetwork') >= 0: q_net_params.append(variable) for variable in tf.trainable_variables(): variable_name = variable.name if variable_name.find('TargetNetwork') >= 0: q_net_params.append(variable) print(" [*] printing QNetwork variables") for idx, v in enumerate(q_net_params): print(" var {:3}: {:15} {}".format(idx, str(v.get_shape()), v.name)) q_net_saver = tf.train.Saver(q_net_params, max_to_keep=1) checkpoint = tf.train.get_checkpoint_state("saved_networks/Q") print('checkpoint:', checkpoint) if checkpoint and checkpoint.model_checkpoint_path: q_net_saver.restore(sess, checkpoint.model_checkpoint_path) print("Q network successfully loaded:", checkpoint.model_checkpoint_path) else: print("Could not find old Q network weights") # start training epsilon = INITIAL_EPSILON r_epi = 0. r_cache = [] random_flag = False t_max = 0 t = 0 rate = rospy.Rate(5) loop_time = time.time() last_loop_time = loop_time while not rospy.is_shutdown(): episode += 1 env.ResetWorld() t = 0 terminal = False reset = False action_index = 0 # first observation rgb_img_t = env.GetRGBImageObservation() depth_img_t1 = sess.run(depth_predict, feed_dict={depth_state: [rgb_img_t]})[0] depth_img_t1 = np.reshape(depth_img_t1, (DEPTH_IMAGE_HEIGHT, DEPTH_IMAGE_WIDTH)) depth_imgs_t1 = np.stack((depth_img_t1, depth_img_t1, depth_img_t1, depth_img_t1), axis=2) while not rospy.is_shutdown(): rgb_img_t1 = env.GetRGBImageObservation() reward_t, terminal, reset = env.GetRewardAndTerminate(t) depth_imgs_t = depth_imgs_t1 rgb_img_t = rgb_img_t1 depth_img_t1 = sess.run(depth_predict, feed_dict={depth_state: [rgb_img_t1]})[0] env.PublishDepthPrediction(depth_img_t1*5.) depth_img_t1 = np.reshape(depth_img_t1, (DEPTH_IMAGE_HEIGHT, DEPTH_IMAGE_WIDTH, 1)) depth_imgs_t1 = np.append(depth_img_t1, depth_imgs_t1[:, :, :(IMAGE_HIST - 1)], axis=2) # choose an action epsilon greedily a = sess.run(online_net.readout, feed_dict = {online_net.state : [depth_imgs_t1]}) readout_t = a[0] a_t = np.zeros([ACTIONS]) action_index = np.argmax(readout_t) a_t[action_index] = 1 # Control the agent env.Control(action_index) print('action:', action_index) t += 1 last_loop_time = loop_time loop_time = time.time() rate.sleep() def main(): TestNetwork() if __name__ == "__main__": main() ================================================ FILE: D3QN/D3QN_training.py ================================================ from __future__ import print_function from GazeboWorld import GazeboWorld import tensorflow as tf import random import numpy as np import time import rospy from collections import deque GAME = 'GazeboWorld' ACTIONS = 7 # number of valid actions SPEED = 2 # DoF of speed GAMMA = 0.99 # decay rate of past observations OBSERVE = 10. # timesteps to observe before training EXPLORE = 20000. # frames over which to anneal epsilon FINAL_EPSILON = 0.0001 # final value of epsilon INITIAL_EPSILON = 0.1 # starting value of epsilon REPLAY_MEMORY = 50000 # number of previous transitions to remember BATCH = 8 # size of minibatch MAX_EPISODE = 20000 MAX_T = 200 DEPTH_IMAGE_WIDTH = 160 DEPTH_IMAGE_HEIGHT = 128 RGB_IMAGE_HEIGHT = 228 RGB_IMAGE_WIDTH = 304 CHANNEL = 3 TAU = 0.001 # Rate to update target network toward primary network H_SIZE = 8*10*64 IMAGE_HIST = 4 def variable_summaries(var): """Attach a lot of summaries to a Tensor (for TensorBoard visualization).""" with tf.name_scope('summaries'): mean = tf.reduce_mean(var) tf.summary.scalar('mean', mean) with tf.name_scope('stddev'): stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) tf.summary.scalar('stddev', stddev) tf.summary.scalar('max', tf.reduce_max(var)) tf.summary.scalar('min', tf.reduce_min(var)) tf.summary.histogram('histogram', var) def weight_variable(shape): initial = tf.truncated_normal(shape, stddev = 0.01) return tf.Variable(initial, name="weights") def bias_variable(shape): initial = tf.constant(0., shape = shape) return tf.Variable(initial, name="bias") def conv2d(x, W, stride_h, stride_w): return tf.nn.conv2d(x, W, strides = [1, stride_h, stride_w, 1], padding = "SAME") class QNetwork(object): """docstring for ClassName""" def __init__(self, sess): # network weights # input 128x160x1 with tf.name_scope("Conv1"): W_conv1 = weight_variable([10, 14, IMAGE_HIST, 32]) variable_summaries(W_conv1) b_conv1 = bias_variable([32]) # 16x20x32 with tf.name_scope("Conv2"): W_conv2 = weight_variable([4, 4, 32, 64]) variable_summaries(W_conv2) b_conv2 = bias_variable([64]) # 8x10x64 with tf.name_scope("Conv3"): W_conv3 = weight_variable([3, 3, 64, 64]) variable_summaries(W_conv3) b_conv3 = bias_variable([64]) # 8x10x64 with tf.name_scope("FCValue"): W_value = weight_variable([H_SIZE, 512]) variable_summaries(W_value) b_value = bias_variable([512]) # variable_summaries(b_ob_value) with tf.name_scope("FCAdv"): W_adv = weight_variable([H_SIZE, 512]) variable_summaries(W_adv) b_adv = bias_variable([512]) # variable_summaries(b_adv) with tf.name_scope("FCValueOut"): W_value_out = weight_variable([512, 1]) variable_summaries(W_value_out) b_value_out = bias_variable([1]) # variable_summaries(b_ob_value_out) with tf.name_scope("FCAdvOut"): W_adv_out = weight_variable([512, ACTIONS]) variable_summaries(W_adv_out) b_adv_out = bias_variable([ACTIONS]) # variable_summaries(b_ob_adv_out) # input layer self.state = tf.placeholder("float", [None, DEPTH_IMAGE_HEIGHT, DEPTH_IMAGE_WIDTH, IMAGE_HIST]) # Conv1 layer h_conv1 = tf.nn.relu(conv2d(self.state, W_conv1, 8, 8) + b_conv1) # Conv2 layer h_conv2 = tf.nn.relu(conv2d(h_conv1, W_conv2, 2, 2) + b_conv2) # Conv2 layer h_conv3 = tf.nn.relu(conv2d(h_conv2, W_conv3, 1, 1) + b_conv3) h_conv3_flat = tf.reshape(h_conv3, [-1, H_SIZE]) # FC ob value layer h_fc_value = tf.nn.relu(tf.matmul(h_conv3_flat, W_value) + b_value) value = tf.matmul(h_fc_value, W_value_out) + b_value_out # FC ob adv layer h_fc_adv = tf.nn.relu(tf.matmul(h_conv3_flat, W_adv) + b_adv) advantage = tf.matmul(h_fc_adv, W_adv_out) + b_adv_out # Q = value + (adv - advAvg) advAvg = tf.expand_dims(tf.reduce_mean(advantage, axis=1), axis=1) advIdentifiable = tf.subtract(advantage, advAvg) self.readout = tf.add(value, advIdentifiable) # define the ob cost function self.a = tf.placeholder("float", [None, ACTIONS]) self.y = tf.placeholder("float", [None]) self.readout_action = tf.reduce_sum(tf.multiply(self.readout, self.a), axis=1) self.td_error = tf.square(self.y - self.readout_action) self.cost = tf.reduce_mean(self.td_error) self.train_step = tf.train.AdamOptimizer(1e-4).minimize(self.cost) def updateTargetGraph(tfVars,tau): total_vars = len(tfVars) op_holder = [] for idx,var in enumerate(tfVars[0:total_vars/2]): op_holder.append(tfVars[idx+total_vars/2].assign((var.value()*tau) + ((1-tau)*tfVars[idx+total_vars/2].value()))) return op_holder def updateTarget(op_holder,sess): for op in op_holder: sess.run(op) def trainNetwork(): sess = tf.InteractiveSession() with tf.name_scope("OnlineNetwork"): online_net = QNetwork(sess) with tf.name_scope("TargetNetwork"): target_net = QNetwork(sess) rospy.sleep(1.) reward_var = tf.Variable(0., trainable=False) reward_epi = tf.summary.scalar('reward', reward_var) # define summary merged_summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter('./logs', sess.graph) # Initialize the World env = GazeboWorld() print('Environment initialized') # Initialize the buffer D = deque() # get the first state depth_img_t1 = env.GetDepthImageObservation() depth_imgs_t1 = np.stack((depth_img_t1, depth_img_t1, depth_img_t1, depth_img_t1), axis=2) terminal = False # saving and loading networks trainables = tf.trainable_variables() trainable_saver = tf.train.Saver(trainables) sess.run(tf.global_variables_initializer()) checkpoint = tf.train.get_checkpoint_state("saved_networks/Q") print('checkpoint:', checkpoint) if checkpoint and checkpoint.model_checkpoint_path: trainable_saver.restore(sess, checkpoint.model_checkpoint_path) print("Successfully loaded:", checkpoint.model_checkpoint_path) else: print("Could not find old network weights") # start training episode = 0 epsilon = INITIAL_EPSILON r_epi = 0. t = 0 T = 0 rate = rospy.Rate(5) print('Number of trainable variables:', len(trainables)) targetOps = updateTargetGraph(trainables,TAU) loop_time = time.time() last_loop_time = loop_time while episode < MAX_EPISODE and not rospy.is_shutdown(): env.ResetWorld() t = 0 r_epi = 0. terminal = False reset = False loop_time_buf = [] action_index = 0 while not reset and not rospy.is_shutdown(): depth_img_t1 = env.GetDepthImageObservation() depth_img_t1 = np.reshape(depth_img_t1, (DEPTH_IMAGE_HEIGHT, DEPTH_IMAGE_WIDTH, 1)) depth_imgs_t1 = np.append(depth_img_t1, depth_imgs_t1[:, :, :(IMAGE_HIST - 1)], axis=2) reward_t, terminal, reset = env.GetRewardAndTerminate(t) if t > 0 : D.append((depth_imgs_t, a_t, reward_t, depth_imgs_t1, terminal)) if len(D) > REPLAY_MEMORY: D.popleft() depth_imgs_t = depth_imgs_t1 # choose an action epsilon greedily a = sess.run(online_net.readout, feed_dict = {online_net.state : [depth_imgs_t1]}) readout_t = a[0] a_t = np.zeros([ACTIONS]) if episode <= OBSERVE: action_index = random.randrange(ACTIONS) a_t[action_index] = 1 else: if random.random() <= epsilon: print("----------Random Action----------") action_index = random.randrange(ACTIONS) a_t[action_index] = 1 else: action_index = np.argmax(readout_t) a_t[action_index] = 1 # Control the agent env.Control(action_index) if episode > OBSERVE : # # sample a minibatch to train on minibatch = random.sample(D, BATCH) y_batch = [] # get the batch variables depth_imgs_t_batch = [d[0] for d in minibatch] a_batch = [d[1] for d in minibatch] r_batch = [d[2] for d in minibatch] depth_imgs_t1_batch = [d[3] for d in minibatch] Q1 = online_net.readout.eval(feed_dict = {online_net.state : depth_imgs_t1_batch}) Q2 = target_net.readout.eval(feed_dict = {target_net.state : depth_imgs_t1_batch}) for i in range(0, len(minibatch)): terminal_batch = minibatch[i][4] # if terminal, only equals reward if terminal_batch: y_batch.append(r_batch[i]) else: y_batch.append(r_batch[i] + GAMMA * Q2[i, np.argmax(Q1[i])]) #Update the network with our target values. online_net.train_step.run(feed_dict={online_net.y : y_batch, online_net.a : a_batch, online_net.state : depth_imgs_t_batch }) updateTarget(targetOps, sess) # Set the target network to be equal to the primary network. r_epi = r_epi + reward_t t += 1 T += 1 last_loop_time = loop_time loop_time = time.time() loop_time_buf.append(loop_time - last_loop_time) rate.sleep() # scale down epsilon if epsilon > FINAL_EPSILON and episode > OBSERVE: epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE # write summaries if episode > OBSERVE: summary_str = sess.run(merged_summary, feed_dict={reward_var: r_epi}) summary_writer.add_summary(summary_str, episode - OBSERVE) # save progress every 500 episodes if (episode+1) % 500 == 0 : trainable_saver.save(sess, 'saved_networks/' + GAME + '-dqn', global_step = episode) if len(loop_time_buf) == 0: print("EPISODE", episode, "/ REWARD", r_epi, "/ steps ", T) else: print("EPISODE", episode, "/ REWARD", r_epi, "/ steps ", T, "/ LoopTime:", np.mean(loop_time_buf)) episode = episode + 1 def main(): trainNetwork() if __name__ == "__main__": main() ================================================ FILE: D3QN/GazeboWorld.py ================================================ import rospy import math import time import numpy as np import cv2 import copy import tf import random from geometry_msgs.msg import Twist from geometry_msgs.msg import Quaternion from gazebo_msgs.msg import ModelStates from gazebo_msgs.msg import ModelState from sensor_msgs.msg import Image from cv_bridge import CvBridge, CvBridgeError from sensor_msgs.msg import LaserScan from nav_msgs.msg import Odometry from kobuki_msgs.msg import BumperEvent class GazeboWorld(): def __init__(self): # initiliaze rospy.init_node('GazeboWorld', anonymous=False) #-----------Default Robot State----------------------- self.set_self_state = ModelState() self.set_self_state.model_name = 'mobile_base' self.set_self_state.pose.position.x = 0.5 self.set_self_state.pose.position.y = 0. self.set_self_state.pose.position.z = 0. self.set_self_state.pose.orientation.x = 0.0 self.set_self_state.pose.orientation.y = 0.0 self.set_self_state.pose.orientation.z = 0.0 self.set_self_state.pose.orientation.w = 1.0 self.set_self_state.twist.linear.x = 0. self.set_self_state.twist.linear.y = 0. self.set_self_state.twist.linear.z = 0. self.set_self_state.twist.angular.x = 0. self.set_self_state.twist.angular.y = 0. self.set_self_state.twist.angular.z = 0. self.set_self_state.reference_frame = 'world' #------------Params-------------------- self.depth_image_size = [160, 128] self.rgb_image_size = [304, 228] self.bridge = CvBridge() self.object_state = [0, 0, 0, 0] self.object_name = [] # 0. | left 90/s | left 45/s | right 45/s | right 90/s | acc 1/s | slow down -1/s self.action_table = [0.4, 0.2, np.pi/6, np.pi/12, 0., -np.pi/12, -np.pi/6] self.self_speed = [.4, 0.0] self.default_states = None self.start_time = time.time() self.max_steps = 10000 self.depth_image = None self.bump = False #-----------Publisher and Subscriber------------- self.cmd_vel = rospy.Publisher('cmd_vel_mux/input/navi', Twist, queue_size = 10) self.set_state = rospy.Publisher('gazebo/set_model_state', ModelState, queue_size = 10) self.resized_depth_img = rospy.Publisher('camera/depth/image_resized',Image, queue_size = 10) self.resized_rgb_img = rospy.Publisher('camera/rgb/image_resized',Image, queue_size = 10) self.object_state_sub = rospy.Subscriber('gazebo/model_states', ModelStates, self.ModelStateCallBack) self.depth_image_sub = rospy.Subscriber('camera/rgb/image_raw', Image, self.RGBImageCallBack) self.rgb_image_sub = rospy.Subscriber('camera/depth/image_raw', Image, self.DepthImageCallBack) self.laser_sub = rospy.Subscriber('scan', LaserScan, self.LaserScanCallBack) self.odom_sub = rospy.Subscriber('odom', Odometry, self.OdometryCallBack) self.bumper_sub = rospy.Subscriber('mobile_base/events/bumper', BumperEvent, self.BumperCallBack) rospy.sleep(2.) # What function to call when you ctrl + c rospy.on_shutdown(self.shutdown) def ModelStateCallBack(self, data): # self state idx = data.name.index("mobile_base") quaternion = (data.pose[idx].orientation.x, data.pose[idx].orientation.y, data.pose[idx].orientation.z, data.pose[idx].orientation.w) euler = tf.transformations.euler_from_quaternion(quaternion) roll = euler[0] pitch = euler[1] yaw = euler[2] self.self_state = [data.pose[idx].position.x, data.pose[idx].position.y, yaw, data.twist[idx].linear.x, data.twist[idx].linear.y, data.twist[idx].angular.z] for lp in xrange(len(self.object_name)): idx = data.name.index(self.object_name[lp]) quaternion = (data.pose[idx].orientation.x, data.pose[idx].orientation.y, data.pose[idx].orientation.z, data.pose[idx].orientation.w) euler = tf.transformations.euler_from_quaternion(quaternion) roll = euler[0] pitch = euler[1] yaw = euler[2] self.object_state[lp] = [data.pose[idx].position.x, data.pose[idx].position.y, yaw] if self.default_states is None: self.default_states = copy.deepcopy(data) def DepthImageCallBack(self, img): self.depth_image = img def RGBImageCallBack(self, img): self.rgb_image = img def LaserScanCallBack(self, scan): self.scan_param = [scan.angle_min, scan.angle_max, scan.angle_increment, scan.time_increment, scan.scan_time, scan.range_min, scan. range_max] self.scan = np.array(scan.ranges) def OdometryCallBack(self, odometry): self.self_linear_x_speed = odometry.twist.twist.linear.x self.self_linear_y_speed = odometry.twist.twist.linear.y self.self_rotation_z_speed = odometry.twist.twist.angular.z def BumperCallBack(self, bumper_data): if bumper_data.state == BumperEvent.PRESSED: self.bump = True else: self.bump = False def GetDepthImageObservation(self): # ros image to cv2 image try: cv_img = self.bridge.imgmsg_to_cv2(self.depth_image, "32FC1") except Exception as e: raise e try: cv_rgb_img = self.bridge.imgmsg_to_cv2(self.rgb_image, "bgr8") except Exception as e: raise e cv_img = np.array(cv_img, dtype=np.float32) # resize dim = (self.depth_image_size[0], self.depth_image_size[1]) cv_img = cv2.resize(cv_img, dim, interpolation = cv2.INTER_AREA) cv_img[np.isnan(cv_img)] = 0. cv_img[cv_img < 0.4] = 0. cv_img/=(10./255.) # cv_img/=(10000./255.) # print 'max:', np.amax(cv_img), 'min:', np.amin(cv_img) # cv_img[cv_img > 5.] = -1. # # inpainting # mask = copy.deepcopy(cv_img) # mask[mask == 0.] = 1. # mask[mask != 1.] = 0. # mask = np.uint8(mask) # cv_img = cv2.inpaint(np.uint8(cv_img), mask, 3, cv2.INPAINT_TELEA) # # guassian noise # gauss = np.random.normal(0., 0.5, dim) # gauss = gauss.reshape(dim[1], dim[0]) # cv_img = np.array(cv_img, dtype=np.float32) # cv_img = cv_img + gauss # cv_img[cv_img<0.00001] = 0. # # smoothing # kernel = np.ones((4,4),np.float32)/16 # cv_img = cv2.filter2D(cv_img,-1,kernel) cv_img = np.array(cv_img, dtype=np.float32) cv_img*=(10./255.) # cv2 image to ros image and publish try: resized_img = self.bridge.cv2_to_imgmsg(cv_img, "passthrough") except Exception as e: raise e self.resized_depth_img.publish(resized_img) return(cv_img/5.) def GetRGBImageObservation(self): # ros image to cv2 image try: cv_img = self.bridge.imgmsg_to_cv2(self.rgb_image, "bgr8") except Exception as e: raise e # resize dim = (self.rgb_image_size[0], self.rgb_image_size[1]) cv_resized_img = cv2.resize(cv_img, dim, interpolation = cv2.INTER_AREA) # cv2 image to ros image and publish try: resized_img = self.bridge.cv2_to_imgmsg(cv_resized_img, "bgr8") except Exception as e: raise e self.resized_rgb_img.publish(resized_img) return(cv_resized_img) def PublishDepthPrediction(self, depth_img): # cv2 image to ros image and publish cv_img = np.array(depth_img, dtype=np.float32) try: resized_img = self.bridge.cv2_to_imgmsg(cv_img, "passthrough") except Exception as e: raise e self.resized_depth_img.publish(resized_img) def GetLaserObservation(self): scan = copy.deepcopy(self.scan) scan[np.isnan(scan)] = 30. return scan def GetSelfState(self): return self.self_state; def GetSelfLinearXSpeed(self): return self.self_linear_x_speed def GetSelfOdomeSpeed(self): v = np.sqrt(self.self_linear_x_speed**2 + self.self_linear_y_speed**2) return [v, self.self_rotation_z_speed] def GetTargetState(self, name): return self.object_state[self.TargetName.index(name)] def GetSelfSpeed(self): return np.array(self.self_speed) def GetBump(self): return self.bump def SetObjectPose(self, name='mobile_base', random_flag=False): quaternion = tf.transformations.quaternion_from_euler(0., 0., np.random.uniform(-np.pi, np.pi)) if name is 'mobile_base' : object_state = copy.deepcopy(self.set_self_state) object_state.pose.orientation.x = quaternion[0] object_state.pose.orientation.y = quaternion[1] object_state.pose.orientation.z = quaternion[2] object_state.pose.orientation.w = quaternion[3] else: object_state = self.States2State(self.default_states, name) self.set_state.publish(object_state) def States2State(self, states, name): to_state = ModelState() from_states = copy.deepcopy(states) idx = from_states.name.index(name) to_state.model_name = name to_state.pose = from_states.pose[idx] to_state.twist = from_states.twist[idx] to_state.reference_frame = 'world' return to_state def ResetWorld(self): self.SetObjectPose() # reset robot for x in xrange(len(self.object_name)): self.SetObjectPose(self.object_name[x]) # reset target self.self_speed = [.4, 0.0] self.step_target = [0., 0.] self.step_r_cnt = 0. self.start_time = time.time() rospy.sleep(0.5) def Control(self, action): if action <2: self.self_speed[0] = self.action_table[action] # self.self_speed[1] = 0. else: self.self_speed[1] = self.action_table[action] move_cmd = Twist() move_cmd.linear.x = self.self_speed[0] move_cmd.linear.y = 0. move_cmd.linear.z = 0. move_cmd.angular.x = 0. move_cmd.angular.y = 0. move_cmd.angular.z = self.self_speed[1] self.cmd_vel.publish(move_cmd) def shutdown(self): # stop turtlebot rospy.loginfo("Stop Moving") self.cmd_vel.publish(Twist()) rospy.sleep(1) def GetRewardAndTerminate(self, t): terminate = False reset = False [v, theta] = self.GetSelfOdomeSpeed() laser = self.GetLaserObservation() reward = v * np.cos(theta) * 0.2 - 0.01 if self.GetBump() or np.amin(laser) < 0.46 or np.amin(laser) == 30.: reward = -10. terminate = True reset = True if t > 500: reset = True return reward, terminate, reset ================================================ FILE: D3QN/RealWorld.py ================================================ import rospy import math import time import numpy as np import cv2 import copy import tf import random from geometry_msgs.msg import Twist from geometry_msgs.msg import Quaternion from gazebo_msgs.msg import ModelStates from gazebo_msgs.msg import ModelState from sensor_msgs.msg import Image from cv_bridge import CvBridge, CvBridgeError from sensor_msgs.msg import LaserScan from nav_msgs.msg import Odometry from kobuki_msgs.msg import BumperEvent class RealWorld(): def __init__(self): # initiliaze rospy.init_node('RealWorld', anonymous=False) #-----------Default Robot State----------------------- self.set_self_state = ModelState() self.set_self_state.model_name = 'mobile_base' self.set_self_state.pose.position.x = 0.5 self.set_self_state.pose.position.y = 0. self.set_self_state.pose.position.z = 0. self.set_self_state.pose.orientation.x = 0.0 self.set_self_state.pose.orientation.y = 0.0 self.set_self_state.pose.orientation.z = 0.0 self.set_self_state.pose.orientation.w = 1.0 self.set_self_state.twist.linear.x = 0. self.set_self_state.twist.linear.y = 0. self.set_self_state.twist.linear.z = 0. self.set_self_state.twist.angular.x = 0. self.set_self_state.twist.angular.y = 0. self.set_self_state.twist.angular.z = 0. self.set_self_state.reference_frame = 'world' #------------Params-------------------- self.depth_image_size = [160, 128] self.rgb_image_size = [304, 228] self.bridge = CvBridge() self.object_state = [0, 0, 0, 0] self.object_name = [] self.action_table = [0.4, 0.2, np.pi/6, np.pi/12, 0., -np.pi/12, -np.pi/6] self.self_speed = [.4, 0.0] self.default_states = None self.start_time = time.time() self.max_steps = 10000 self.depth_image = None self.laser_cb_num = 0 self.rot_counter = 0 self.now_phase = 1 self.next_phase = 4 self.step_target = [0., 0.] self.step_r_cnt = 0. self.bump = None self.action = 0 #-----------Publisher and Subscriber------------- self.cmd_vel = rospy.Publisher('cmd_vel_mux/input/navi', Twist, queue_size = 10) self.set_state = rospy.Publisher('gazebo/set_model_state', ModelState, queue_size = 10) self.resized_depth_img = rospy.Publisher('camera/depth/image_resized',Image, queue_size = 10) self.resized_rgb_img = rospy.Publisher('camera/rgb/image_resized',Image, queue_size = 10) self.object_state_sub = rospy.Subscriber('gazebo/model_states', ModelStates, self.ModelStateCallBack) self.depth_image_sub = rospy.Subscriber('camera/rgb/image_raw', Image, self.RGBImageCallBack) self.rgb_image_sub = rospy.Subscriber('camera/depth/image_raw', Image, self.DepthImageCallBack) self.laser_sub = rospy.Subscriber('scan', LaserScan, self.LaserScanCallBack) self.odom_sub = rospy.Subscriber('odom', Odometry, self.OdometryCallBack) self.bumper_sub = rospy.Subscriber('mobile_base/events/bumper', BumperEvent, self.BumperCallBack) # Wait until the first callback # while self.depth_image is None: # pass rospy.sleep(2.) # What function to call when you ctrl + c rospy.on_shutdown(self.shutdown) def ModelStateCallBack(self, data): # self state idx = data.name.index("mobile_base") quaternion = (data.pose[idx].orientation.x, data.pose[idx].orientation.y, data.pose[idx].orientation.z, data.pose[idx].orientation.w) euler = tf.transformations.euler_from_quaternion(quaternion) roll = euler[0] pitch = euler[1] yaw = euler[2] self.self_state = [data.pose[idx].position.x, data.pose[idx].position.y, yaw, data.twist[idx].linear.x, data.twist[idx].linear.y, data.twist[idx].angular.z] for lp in xrange(len(self.object_name)): idx = data.name.index(self.object_name[lp]) quaternion = (data.pose[idx].orientation.x, data.pose[idx].orientation.y, data.pose[idx].orientation.z, data.pose[idx].orientation.w) euler = tf.transformations.euler_from_quaternion(quaternion) roll = euler[0] pitch = euler[1] yaw = euler[2] self.object_state[lp] = [data.pose[idx].position.x, data.pose[idx].position.y, yaw] if self.default_states is None: self.default_states = copy.deepcopy(data) def DepthImageCallBack(self, img): self.depth_image = img def RGBImageCallBack(self, img): self.rgb_image = img def LaserScanCallBack(self, scan): self.scan_param = [scan.angle_min, scan.angle_max, scan.angle_increment, scan.time_increment, scan.scan_time, scan.range_min, scan. range_max] self.scan = np.array(scan.ranges) self.laser_cb_num += 1 def OdometryCallBack(self, odometry): self.self_linear_x_speed = odometry.twist.twist.linear.x self.self_linear_y_speed = odometry.twist.twist.linear.y self.self_rotation_z_speed = odometry.twist.twist.angular.z def BumperCallBack(self, bumper_data): if bumper_data.state == BumperEvent.PRESSED: self.bump = True else: self.bump = False def sim_noise(self, depthFile, rgbFile): img=depthFile imgcol=rgbFile edges = cv2.Canny(img,100,200,apertureSize = 3) edgescol = cv2.Canny(imgcol,100,200,apertureSize = 3) edges += edgescol mask=img.copy() mask.fill(0) minLineLength = 10 maxLineGap = 10 lines = cv2.HoughLinesP(edges,1,np.pi/180,20,100,10) if lines is not None: for line in lines: for x1,y1,x2,y2 in line: cv2.line(mask,(x1,y1),(x2,y2),255,1) for i in range(480): for j in range(640): if mask[i][j]>0: cv2.circle(img,(j,i),2, (0,0,0), -1) if random.random()>0.8: cv2.circle(img,(j,i), random.randint(2,6), (0,0,0), -1) return img def GetDepthImageObservation(self): # ros image to cv2 image try: cv_img = self.bridge.imgmsg_to_cv2(self.depth_image, "32FC1") except Exception as e: raise e # try: # cv_rgb_img = self.bridge.imgmsg_to_cv2(self.rgb_image, "bgr8") # except Exception as e: # raise e cv_img = np.array(cv_img, dtype=np.float32) cv_img[np.isnan(cv_img)] = 0. # cv_img/=(10./255.) cv_img/=(10000./255.) # print 'max:', np.amax(cv_img), 'min:', np.amin(cv_img) # cv_img[cv_img > 5.] = -1. # cv_img[cv_img < 0.4] = 0. # inpainting mask = copy.deepcopy(cv_img) mask[mask == 0.] = 1. mask[mask != 1.] = 0. # print 'mask sum:', np.sum(mask) mask = np.uint8(mask) cv_img = cv2.inpaint(np.uint8(cv_img), mask, 3, cv2.INPAINT_TELEA) cv_img = np.array(cv_img, dtype=np.float32) # cv_img*=(10./255.) cv_img*=(10./255.) # resize dim = (self.depth_image_size[0], self.depth_image_size[1]) cv_img = cv2.resize(cv_img, dim, interpolation = cv2.INTER_AREA) # cv2 image to ros image and publish try: resized_img = self.bridge.cv2_to_imgmsg(cv_img, "passthrough") except Exception as e: raise e self.resized_depth_img.publish(resized_img) return(cv_img/5.) def GetRGBImageObservation(self): # ros image to cv2 image try: cv_img = self.bridge.imgmsg_to_cv2(self.rgb_image, "bgr8") except Exception as e: raise e # resize dim = (self.rgb_image_size[0], self.rgb_image_size[1]) cv_resized_img = cv2.resize(cv_img, dim, interpolation = cv2.INTER_AREA) # cv2 image to ros image and publish try: resized_img = self.bridge.cv2_to_imgmsg(cv_resized_img, "bgr8") except Exception as e: raise e self.resized_rgb_img.publish(resized_img) return(cv_resized_img) def PublishDepthPrediction(self, depth_img): # cv2 image to ros image and publish cv_img = np.array(depth_img, dtype=np.float32) try: resized_img = self.bridge.cv2_to_imgmsg(cv_img, "passthrough") except Exception as e: raise e self.resized_depth_img.publish(resized_img) def GetLaserObservation(self): scan = copy.deepcopy(self.scan) scan[np.isnan(scan)] = 30. return scan def GetSelfState(self): return self.self_state; def GetSelfLinearXSpeed(self): return self.self_linear_x_speed def GetSelfOdomeSpeed(self): v = np.sqrt(self.self_linear_x_speed**2 + self.self_linear_y_speed**2) return [v, self.self_rotation_z_speed] def GetTargetState(self, name): return self.object_state[self.TargetName.index(name)] def GetSelfSpeed(self): return np.array(self.self_speed) def GetBump(self): return self.bump def SetObjectPose(self, name='mobile_base', random_flag=False): quaternion = tf.transformations.quaternion_from_euler(0., 0., np.random.uniform(-np.pi, np.pi)) if name is 'mobile_base' : object_state = copy.deepcopy(self.set_self_state) object_state.pose.orientation.x = quaternion[0] object_state.pose.orientation.y = quaternion[1] object_state.pose.orientation.z = quaternion[2] object_state.pose.orientation.w = quaternion[3] else: object_state = self.States2State(self.default_states, name) self.set_state.publish(object_state) def States2State(self, states, name): to_state = ModelState() from_states = copy.deepcopy(states) idx = from_states.name.index(name) to_state.model_name = name to_state.pose = from_states.pose[idx] to_state.twist = from_states.twist[idx] to_state.reference_frame = 'world' return to_state def ResetWorld(self): self.SetObjectPose() # reset robot for x in xrange(len(self.object_name)): self.SetObjectPose(self.object_name[x]) # reset target self.self_speed = [.4, 0.0] self.step_target = [0., 0.] self.step_r_cnt = 0. self.start_time = time.time() rospy.sleep(0.5) def Control(self, action): if action <2: self.self_speed[0] = self.action_table[0] else: self.self_speed[1] = self.action_table[action] move_cmd = Twist() move_cmd.linear.x = self.self_speed[0]/2 move_cmd.linear.y = 0. move_cmd.linear.z = 0. move_cmd.angular.x = 0. move_cmd.angular.y = 0. move_cmd.angular.z = self.self_speed[1]/2 self.cmd_vel.publish(move_cmd) def shutdown(self): # stop turtlebot rospy.loginfo("Stop Moving") self.cmd_vel.publish(Twist()) rospy.sleep(1) def GetRewardAndTerminate(self, t): terminate = False reset = False [v, theta] = self.GetSelfOdomeSpeed() # laser = self.GetLaserObservation() reward = v * np.cos(theta * 2) * 0.2 - 0.01 if self.GetBump() : reward = -10. terminate = True reset = True if t > 500: reset = True return reward, terminate, reset def GetTargetPoint(self): # r = random.uniform(8., 10.) # theta = random.uniform(-np.pi, np.py) # x = np.cos(theta) * r # y = np.sin(theta) * r x = random.uniform(-4.5, 4.5) if np.fabs(x) >= 3.5: y = random.uniform(-4.5, 4.5) else: y = random.uniform(3.5, 4.5) if random.uniform(-1., 1.) < 0.: y = -y self.target_point = [x, y] self.init_dist = np.sqrt(x**2 + y**2) self.last_distance = np.sqrt(x**2 + y**2) return self.target_point ================================================ FILE: D3QN/models/__init__.py ================================================ from .fcrn import ResNet50UpProj ================================================ FILE: D3QN/models/fcrn.py ================================================ from .network import Network class ResNet50UpProj(Network): def setup(self): # 1 trainable_flag = True (self.feed('data') .conv(7, 7, 64, 2, 2, relu=False, name='conv1', trainable=trainable_flag) .batch_normalization(relu=True, name='bn_conv1', trainable=trainable_flag) .max_pool(3, 3, 2, 2, name='pool1') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch1', trainable=trainable_flag) .batch_normalization(name='bn2a_branch1', trainable=trainable_flag)) # 2 trainable_flag = True (self.feed('pool1') .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2a_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn2a_branch2a', trainable=trainable_flag) .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2a_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn2a_branch2b', trainable=trainable_flag) .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch2c', trainable=trainable_flag) .batch_normalization(name='bn2a_branch2c', trainable=trainable_flag)) # 3 trainable_flag = True (self.feed('bn2a_branch1', 'bn2a_branch2c') .add(name='res2a') .relu(name='res2a_relu') .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2b_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn2b_branch2a', trainable=trainable_flag) .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2b_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn2b_branch2b', trainable=trainable_flag) .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2b_branch2c', trainable=trainable_flag) .batch_normalization(name='bn2b_branch2c', trainable=trainable_flag)) # 4 trainable_flag = True (self.feed('res2a_relu', 'bn2b_branch2c') .add(name='res2b') .relu(name='res2b_relu') .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2c_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn2c_branch2a', trainable=trainable_flag) .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2c_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn2c_branch2b', trainable=trainable_flag) .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2c_branch2c', trainable=trainable_flag) .batch_normalization(name='bn2c_branch2c', trainable=trainable_flag)) # 5 trainable_flag = True (self.feed('res2b_relu', 'bn2c_branch2c') .add(name='res2c') .relu(name='res2c_relu') .conv(1, 1, 512, 2, 2, biased=False, relu=False, name='res3a_branch1', trainable=trainable_flag) .batch_normalization(name='bn3a_branch1', trainable=trainable_flag)) # 6 trainable_flag = True (self.feed('res2c_relu') .conv(1, 1, 128, 2, 2, biased=False, relu=False, name='res3a_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn3a_branch2a', trainable=trainable_flag) .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3a_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn3a_branch2b', trainable=trainable_flag) .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3a_branch2c', trainable=trainable_flag) .batch_normalization(name='bn3a_branch2c', trainable=trainable_flag)) # 7 trainable_flag = True (self.feed('bn3a_branch1', 'bn3a_branch2c') .add(name='res3a') .relu(name='res3a_relu') .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn3b_branch2a', trainable=trainable_flag) .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn3b_branch2b', trainable=trainable_flag) .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b_branch2c', trainable=trainable_flag) .batch_normalization(name='bn3b_branch2c', trainable=trainable_flag)) # 8 trainable_flag = True (self.feed('res3a_relu', 'bn3b_branch2c') .add(name='res3b') .relu(name='res3b_relu') .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3c_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn3c_branch2a', trainable=trainable_flag) .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3c_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn3c_branch2b', trainable=trainable_flag) .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3c_branch2c', trainable=trainable_flag) .batch_normalization(name='bn3c_branch2c', trainable=trainable_flag)) # 9 trainable_flag = True (self.feed('res3b_relu', 'bn3c_branch2c') .add(name='res3c') .relu(name='res3c_relu') .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3d_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn3d_branch2a', trainable=trainable_flag) .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3d_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn3d_branch2b', trainable=trainable_flag) .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3d_branch2c', trainable=trainable_flag) .batch_normalization(name='bn3d_branch2c', trainable=trainable_flag)) # 10 trainable_flag = True (self.feed('res3c_relu', 'bn3d_branch2c') .add(name='res3d') .relu(name='res3d_relu') .conv(1, 1, 1024, 2, 2, biased=False, relu=False, name='res4a_branch1', trainable=trainable_flag) .batch_normalization(name='bn4a_branch1', trainable=trainable_flag)) # 11 trainable_flag = True (self.feed('res3d_relu') .conv(1, 1, 256, 2, 2, biased=False, relu=False, name='res4a_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn4a_branch2a', trainable=trainable_flag) .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4a_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn4a_branch2b', trainable=trainable_flag) .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4a_branch2c', trainable=trainable_flag) .batch_normalization(name='bn4a_branch2c', trainable=trainable_flag)) # 12 (self.feed('bn4a_branch1', 'bn4a_branch2c') .add(name='res4a') .relu(name='res4a_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn4b_branch2a', trainable=trainable_flag) .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4b_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn4b_branch2b', trainable=trainable_flag) .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b_branch2c', trainable=trainable_flag) .batch_normalization(name='bn4b_branch2c', trainable=trainable_flag)) # 13 trainable_flag = True (self.feed('res4a_relu', 'bn4b_branch2c') .add(name='res4b') .relu(name='res4b_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4c_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn4c_branch2a', trainable=trainable_flag) .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4c_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn4c_branch2b', trainable=trainable_flag) .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4c_branch2c', trainable=trainable_flag) .batch_normalization(name='bn4c_branch2c', trainable=trainable_flag)) # 14 (self.feed('res4b_relu', 'bn4c_branch2c') .add(name='res4c') .relu(name='res4c_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4d_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn4d_branch2a', trainable=trainable_flag) .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4d_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn4d_branch2b', trainable=trainable_flag) .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4d_branch2c', trainable=trainable_flag) .batch_normalization(name='bn4d_branch2c', trainable=trainable_flag)) # 15 trainable_flag = True (self.feed('res4c_relu', 'bn4d_branch2c') .add(name='res4d') .relu(name='res4d_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4e_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn4e_branch2a', trainable=trainable_flag) .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4e_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn4e_branch2b', trainable=trainable_flag) .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4e_branch2c', trainable=trainable_flag) .batch_normalization(name='bn4e_branch2c', trainable=trainable_flag)) # 16 (self.feed('res4d_relu', 'bn4e_branch2c') .add(name='res4e') .relu(name='res4e_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4f_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn4f_branch2a', trainable=trainable_flag) .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4f_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn4f_branch2b', trainable=trainable_flag) .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4f_branch2c', trainable=trainable_flag) .batch_normalization(name='bn4f_branch2c', trainable=trainable_flag)) # 17 trainable_flag = True (self.feed('res4e_relu', 'bn4f_branch2c') .add(name='res4f') .relu(name='res4f_relu') .conv(1, 1, 2048, 2, 2, biased=False, relu=False, name='res5a_branch1', trainable=trainable_flag) .batch_normalization(name='bn5a_branch1', trainable=trainable_flag)) # 18 (self.feed('res4f_relu') .conv(1, 1, 512, 2, 2, biased=False, relu=False, name='res5a_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn5a_branch2a', trainable=trainable_flag) .conv(3, 3, 512, 1, 1, biased=False, relu=False, name='res5a_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn5a_branch2b', trainable=trainable_flag) .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5a_branch2c', trainable=trainable_flag) .batch_normalization(name='bn5a_branch2c', trainable=trainable_flag)) # 19 trainable_flag = True (self.feed('bn5a_branch1', 'bn5a_branch2c') .add(name='res5a') .relu(name='res5a_relu') .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5b_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn5b_branch2a', trainable=trainable_flag) .conv(3, 3, 512, 1, 1, biased=False, relu=False, name='res5b_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn5b_branch2b', trainable=trainable_flag) .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5b_branch2c', trainable=trainable_flag) .batch_normalization(name='bn5b_branch2c', trainable=trainable_flag)) # 20 (self.feed('res5a_relu', 'bn5b_branch2c') .add(name='res5b') .relu(name='res5b_relu') .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5c_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn5c_branch2a', trainable=trainable_flag) .conv(3, 3, 512, 1, 1, biased=False, relu=False, name='res5c_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn5c_branch2b', trainable=trainable_flag) .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5c_branch2c', trainable=trainable_flag) .batch_normalization(name='bn5c_branch2c', trainable=trainable_flag)) # 21 trainable_flag = True (self.feed('res5b_relu', 'bn5c_branch2c') .add(name='res5c') .relu(name='res5c_relu') .conv(1, 1, 1024, 1, 1, biased=True, relu=False, name='layer1', trainable=trainable_flag) .batch_normalization(relu=False, name='layer1_BN', trainable=trainable_flag) .up_project([3, 3, 1024, 512], id = '2x', stride = 1, BN=True, trainable=trainable_flag) .up_project([3, 3, 512, 256], id = '4x', stride = 1, BN=True, trainable=trainable_flag) .up_project([3, 3, 256, 128], id = '8x', stride = 1, BN=True, trainable=trainable_flag) .up_project([3, 3, 128, 64], id = '16x', stride = 1, BN=True, trainable=trainable_flag) .dropout(name = 'drop', keep_prob = 1.) .conv(3, 3, 1, 1, 1, name = 'ConvPred', trainable=trainable_flag)) ================================================ FILE: D3QN/models/network.py ================================================ import numpy as np import tensorflow as tf # ---------------------------------------------------------------------------------- # Commonly used layers and operations based on ethereon's implementation # https://github.com/ethereon/caffe-tensorflow # Slight modifications may apply. FCRN-specific operations have also been appended. # ---------------------------------------------------------------------------------- # Thanks to *Helisa Dhamo* for the model conversion and integration into TensorFlow. # ---------------------------------------------------------------------------------- DEFAULT_PADDING = 'SAME' def get_incoming_shape(incoming): """ Returns the incoming data shape """ if isinstance(incoming, tf.Tensor): return incoming.get_shape().as_list() elif type(incoming) in [np.array, list, tuple]: return np.shape(incoming) else: raise Exception("Invalid incoming layer.") def interleave(tensors, axis): old_shape = get_incoming_shape(tensors[0])[1:] new_shape = [-1] + old_shape new_shape[axis] *= len(tensors) return tf.reshape(tf.stack(tensors, axis + 1), new_shape) def layer(op): '''Decorator for composable network layers.''' def layer_decorated(self, *args, **kwargs): # Automatically set a name if not provided. name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) # Figure out the layer inputs. if len(self.terminals) == 0: raise RuntimeError('No input variables found for layer %s.' % name) elif len(self.terminals) == 1: layer_input = self.terminals[0] else: layer_input = list(self.terminals) # Perform the operation and get the output. layer_output = op(self, layer_input, *args, **kwargs) # Add to layer LUT. self.layers[name] = layer_output # This output is now the input for the next layer. self.feed(layer_output) # Return self for chained calls. return self return layer_decorated class Network(object): def __init__(self, inputs, batch, keep_prob, is_training, trainable=False): # The input nodes for this network self.inputs = inputs # The current list of terminal nodes self.terminals = [] # Mapping from layer names to layers self.layers = dict(inputs) # If true, the resulting variables are set as trainable self.trainable = trainable self.batch_size = batch self.keep_prob = keep_prob self.is_training = is_training self.setup() def setup(self): '''Construct the network. ''' raise NotImplementedError('Must be implemented by the subclass.') def load(self, data_path, session, ignore_missing=False): '''Load network weights. data_path: The path to the numpy-serialized network weights session: The current TensorFlow session ignore_missing: If true, serialized weights for missing layers are ignored. ''' data_dict = np.load(data_path, encoding='latin1').item() for op_name in data_dict: with tf.variable_scope(op_name, reuse=True): for param_name, data in iter(data_dict[op_name].items()): try: var = tf.get_variable(param_name) session.run(var.assign(data)) except ValueError: if not ignore_missing: raise def feed(self, *args): '''Set the input(s) for the next operation by replacing the terminal nodes. The arguments can be either layer names or the actual layers. ''' assert len(args) != 0 self.terminals = [] for fed_layer in args: if isinstance(fed_layer, str): try: fed_layer = self.layers[fed_layer] except KeyError: raise KeyError('Unknown layer name fed: %s' % fed_layer) self.terminals.append(fed_layer) return self def get_output(self): '''Returns the current network output.''' return self.terminals[-1] def get_layer_output(self, name): return self.layers[name] def get_unique_name(self, prefix): '''Returns an index-suffixed unique name for the given prefix. This is used for auto-generating layer names based on the type-prefix. ''' ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 return '%s_%d' % (prefix, ident) def make_var(self, name, shape, trainable=False): '''Creates a new TensorFlow variable.''' return tf.get_variable(name, shape, dtype = 'float32', trainable=trainable) def validate_padding(self, padding): '''Verifies that the padding is one of the supported ones.''' assert padding in ('SAME', 'VALID') @layer def conv(self, input_data, k_h, k_w, c_o, s_h, s_w, name, relu=True, padding=DEFAULT_PADDING, group=1, biased=True, trainable=False): # Verify that the padding is acceptable self.validate_padding(padding) # Get the number of channels in the input c_i = input_data.get_shape()[-1] if (padding == 'SAME'): input_data = tf.pad(input_data, [[0, 0], [(k_h - 1)//2, (k_h - 1)//2], [(k_w - 1)//2, (k_w - 1)//2], [0, 0]], "CONSTANT") # Verify that the grouping parameter is valid assert c_i % group == 0 assert c_o % group == 0 # Convolution for a given input and kernel convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding='VALID') with tf.variable_scope(name) as scope: kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o], trainable=trainable) if group == 1: # This is the common-case. Convolve the input without any further complications. output = convolve(input_data, kernel) else: # Split the input into groups and then convolve each of them independently input_groups = tf.split(3, group, input_data) kernel_groups = tf.split(3, group, kernel) output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)] # Concatenate the groups output = tf.concat(3, output_groups) # Add the biases if biased: biases = self.make_var('biases', [c_o], trainable=trainable) output = tf.nn.bias_add(output, biases) if relu: # ReLU non-linearity output = tf.nn.relu(output, name=scope.name) return output @layer def relu(self, input_data, name): return tf.nn.relu(input_data, name=name) @layer def max_pool(self, input_data, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): self.validate_padding(padding) return tf.nn.max_pool(input_data, ksize=[1, k_h, k_w, 1], strides=[1, s_h, s_w, 1], padding=padding, name=name) @layer def avg_pool(self, input_data, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): self.validate_padding(padding) return tf.nn.avg_pool(input_data, ksize=[1, k_h, k_w, 1], strides=[1, s_h, s_w, 1], padding=padding, name=name) @layer def lrn(self, input_data, radius, alpha, beta, name, bias=1.0): return tf.nn.local_response_normalization(input_data, depth_radius=radius, alpha=alpha, beta=beta, bias=bias, name=name) @layer def concat(self, inputs, axis, name): return tf.concat(concat_dim=axis, values=inputs, name=name) @layer def add(self, inputs, name): return tf.add_n(inputs, name=name) @layer def fc(self, input_data, num_out, name, relu=True): with tf.variable_scope(name) as scope: input_shape = input_data.get_shape() if input_shape.ndims == 4: # The input is spatial. Vectorize it first. dim = 1 for d in input_shape[1:].as_list(): dim *= d feed_in = tf.reshape(input_data, [-1, dim]) else: feed_in, dim = (input_data, input_shape[-1].value) weights = self.make_var('weights', shape=[dim, num_out]) biases = self.make_var('biases', [num_out]) op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b fc = op(feed_in, weights, biases, name=scope.name) return fc @layer def softmax(self, input_data, name): input_shape = map(lambda v: v.value, input_data.get_shape()) if len(input_shape) > 2: # For certain models (like NiN), the singleton spatial dimensions # need to be explicitly squeezed, since they're not broadcast-able # in TensorFlow's NHWC ordering (unlike Caffe's NCHW). if input_shape[1] == 1 and input_shape[2] == 1: input_data = tf.squeeze(input_data, squeeze_dims=[1, 2]) else: raise ValueError('Rank 2 tensor input expected for softmax!') return tf.nn.softmax(input_data, name) @layer def batch_normalization(self, input_data, name, scale_offset=True, relu=False, trainable=False): with tf.variable_scope(name) as scope: shape = [input_data.get_shape()[-1]] pop_mean = tf.get_variable("mean", shape, initializer = tf.constant_initializer(0.0), trainable=trainable) pop_var = tf.get_variable("variance", shape, initializer = tf.constant_initializer(1.0), trainable=trainable) epsilon = 1e-4 decay = 0.999 if scale_offset: scale = tf.get_variable("scale", shape, initializer = tf.constant_initializer(1.0), trainable=trainable) offset = tf.get_variable("offset", shape, initializer = tf.constant_initializer(0.0), trainable=trainable) else: scale, offset = (None, None) if self.is_training: batch_mean, batch_var = tf.nn.moments(input_data, [0, 1, 2]) train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay)) train_var = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay)) with tf.control_dependencies([train_mean, train_var]): output = tf.nn.batch_normalization(input_data, batch_mean, batch_var, offset, scale, epsilon, name = name) else: output = tf.nn.batch_normalization(input_data, pop_mean, pop_var, offset, scale, epsilon, name = name) if relu: output = tf.nn.relu(output) return output @layer def dropout(self, input_data, keep_prob, name): return tf.nn.dropout(input_data, keep_prob, name=name) def unpool_as_conv(self, size, input_data, id, stride = 1, ReLU = False, BN = True, trainable=False): # Model upconvolutions (unpooling + convolution) as interleaving feature # maps of four convolutions (A,B,C,D). Building block for up-projections. # Convolution A (3x3) # -------------------------------------------------- layerName = "layer%s_ConvA" % (id) self.feed(input_data) self.conv( 3, 3, size[3], stride, stride, name = layerName, padding = 'SAME', relu = False, trainable=trainable) outputA = self.get_output() # Convolution B (2x3) # -------------------------------------------------- layerName = "layer%s_ConvB" % (id) padded_input_B = tf.pad(input_data, [[0, 0], [1, 0], [1, 1], [0, 0]], "CONSTANT") self.feed(padded_input_B) self.conv(2, 3, size[3], stride, stride, name = layerName, padding = 'VALID', relu = False, trainable=trainable) outputB = self.get_output() # Convolution C (3x2) # -------------------------------------------------- layerName = "layer%s_ConvC" % (id) padded_input_C = tf.pad(input_data, [[0, 0], [1, 1], [1, 0], [0, 0]], "CONSTANT") self.feed(padded_input_C) self.conv(3, 2, size[3], stride, stride, name = layerName, padding = 'VALID', relu = False, trainable=trainable) outputC = self.get_output() # Convolution D (2x2) # -------------------------------------------------- layerName = "layer%s_ConvD" % (id) padded_input_D = tf.pad(input_data, [[0, 0], [1, 0], [1, 0], [0, 0]], "CONSTANT") self.feed(padded_input_D) self.conv(2, 2, size[3], stride, stride, name = layerName, padding = 'VALID', relu = False, trainable=trainable) outputD = self.get_output() # Interleaving elements of the four feature maps # -------------------------------------------------- left = interleave([outputA, outputB], axis=1) # columns right = interleave([outputC, outputD], axis=1) # columns Y = interleave([left, right], axis=2) # rows if BN: layerName = "layer%s_BN" % (id) self.feed(Y) self.batch_normalization(name=layerName, scale_offset=True, relu=False, trainable=trainable) Y = self.get_output() if ReLU: Y = tf.nn.relu(Y, name = layerName) return Y def up_project(self, size, id, stride = 1, BN = True, trainable=False): # Create residual upsampling layer (UpProjection) input_data = self.get_output() # Branch 1 id_br1 = "%s_br1" % (id) # Interleaving Convs of 1st branch out = self.unpool_as_conv(size, input_data, id_br1, stride, ReLU=True, BN=True, trainable=trainable) # Convolution following the upProjection on the 1st branch layerName = "layer%s_Conv" % (id) self.feed(out) self.conv(size[0], size[1], size[3], stride, stride, name = layerName, relu = False, trainable=trainable) if BN: layerName = "layer%s_BN" % (id) self.batch_normalization(name=layerName, scale_offset=True, relu=False, trainable=trainable) # Output of 1st branch branch1_output = self.get_output() # Branch 2 id_br2 = "%s_br2" % (id) # Interleaving convolutions and output of 2nd branch branch2_output = self.unpool_as_conv(size, input_data, id_br2, stride, ReLU=False, trainable=trainable) # sum branches layerName = "layer%s_Sum" % (id) output = tf.add_n([branch1_output, branch2_output], name = layerName) # ReLU layerName = "layer%s_ReLU" % (id) output = tf.nn.relu(output, name=layerName) self.feed(output) return self ================================================ FILE: D3QN/world/SquareWorld.world ================================================ 1 0 0 10 0 -0 0 0.8 0.8 0.8 1 0.2 0.2 0.2 1 1000 0.9 0.01 0.001 0.5 0.1 -0.9 0.4 0.4 0.4 1 0.7 0.7 0.7 1 1 0.01 1 100 1 0 0 1 100 100 100 50 10 0 0 0 1 100 100 0 0 0 0 1 0.497681 0 0 0 -0 0 17585 380000000 593 732830907 1494623668 929573495 58808 -0.037988 0.034396 0 0 -0 0 1 1 1 -2.18999 1.8914 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 -2.19299 1.7984 0 0 0 -1.5708 0 0 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 2.68601 3.0634 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 3.25351 2.6559 0 0 0 -1.54994 0 0 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 2.71201 2.2484 0 0 -0 3.14159 0 0 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 2.14451 2.6559 0 0 -0 1.61372 0 0 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 -2.93375 -2.45648 0 0 -0 1.09459 0 0 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 -2.00876 -2.45648 0 0 0 -1.01325 0 0 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 -2.42752 -3.25087 0 0 -0 3.14159 0 0 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 2.35014 -2.44607 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 3.16763 -1.25358 0 0 -0 1.5708 0 0 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 -0.037988 5.4594 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 5.38701 0.034396 0 0 0 -1.5708 0 0 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 -0.037988 -5.3906 0 0 -0 3.14159 0 0 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 -5.46299 0.034396 0 0 -0 1.5708 0 0 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 4.76137 -3.62422 0 0 -0 0 1 1 1 4.76137 -3.62422 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 4.98173 5.02016 0 0 -0 0 1 1 1 4.98173 5.02016 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 -3.75684 3.84985 0 0 -0 0 1 1 1 -3.75684 3.84985 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 0.497681 0 0 0 -0 0 1 1 1 0.497681 0 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 -4.48069 -0.04466 0 0 -0 0 1 1 1 -4.48069 -0.04466 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 0 0 0 0 -0 0 0 0 10 0 -0 0 -0.19454 -5.8041 19.5357 0 1.3098 1.55175 orbit perspective 0 0 -9.8 6e-06 2.3e-05 -4.2e-05 EARTH_WGS84 0 0 0 0 -1.685 -2.396 0 0 -0 0 2.5 0.15 1 0 0 0.5 0 -0 0 10 0 0 0.5 0 -0 0 2.5 0.15 1 1 1 1 1 0 -2.152 1.857 0 0 -0 0 0 0 2.75 0.15 1 0 0 0.5 0 -0 0 10 0 0 0.5 0 -0 0 2.75 0.15 1 1 1 1 1 0 -2.155 1.764 0 0 0 -1.5708 0 0 1.2678 0.15 1 0 0 0.5 0 -0 0 10 0 0 0.5 0 -0 0 1.2678 0.15 1 1 1 1 1 0 2.724 3.029 0 0 -0 0 0 0 0.965005 0.15 1 0 0 0.5 0 -0 0 10 0 0 0.5 0 -0 0 0.965005 0.15 1 1 1 1 1 0 3.2915 2.6215 0 0 0 -1.54994 0 0 1.25 0.15 1 0 0 0.5 0 -0 0 10 0 0 0.5 0 -0 0 1.25 0.15 1 1 1 1 1 0 2.75 2.214 0 0 -0 3.14159 0 0 0.965005 0.15 1 0 0 0.5 0 -0 0 10 0 0 0.5 0 -0 0 0.965005 0.15 1 1 1 1 1 0 2.1825 2.6215 0 0 -0 1.61372 0 0 1.97704 0.15 1 0 0 0.5 0 -0 0 10 0 0 0.5 0 -0 0 1.97704 0.15 1 1 1 1 1 0 -2.89576 -2.49088 0 0 -0 1.09459 0 0 2.06356 0.15 1 0 0 0.5 0 -0 0 10 0 0 0.5 0 -0 0 2.06356 0.15 1 1 1 1 1 0 -1.97077 -2.49088 0 0 0 -1.01325 0 0 2 0.15 1 0 0 0.5 0 -0 0 10 0 0 0.5 0 -0 0 2 0.15 1 1 1 1 1 0 -2.38953 -3.28527 0 0 -0 3.14159 0 0 1.75 0.15 1 0 0 0.5 0 -0 0 10 0 0 0.5 0 -0 0 1.75 0.15 1 1 1 1 1 0 2.38813 -2.48047 0 0 -0 0 0 0 2.5 0.15 1 0 0 0.5 0 -0 0 10 0 0 0.5 0 -0 0 2.5 0.15 1 1 1 1 1 0 3.20562 -1.28798 0 0 -0 1.5708 0 0 11 0.15 2 0 0 1 0 -0 0 10 0 0 1 0 -0 0 11 0.15 2 1 1 1 1 0 0 5.425 0 0 -0 0 0 0 11 0.15 2 0 0 1 0 -0 0 10 0 0 1 0 -0 0 11 0.15 2 1 1 1 1 0 5.425 0 0 0 0 -1.5708 0 0 11 0.15 2 0 0 1 0 -0 0 10 0 0 1 0 -0 0 11 0.15 2 1 1 1 1 0 0 -5.425 0 0 -0 3.14159 0 0 11 0.15 2 0 0 1 0 -0 0 10 0 0 1 0 -0 0 11 0.15 2 1 1 1 1 0 -5.425 0 0 0 -0 1.5708 0 0 1 1 0 0 -1 0 -0 0 2.05 0 0 2.05 0 2.05 25 0 0 0.01 0 -0 0 0.45 0.45 0.02 10 0 0 0.01 0 -0 0 0.45 0.45 0.02 0.235 0 0.51 0 -0 0 0.02 0.45 1.02 10 0.235 0 0.51 0 -0 0 0.02 0.45 1.02 0 0.235 0.51 0 -0 0 0.45 0.02 1.02 10 0 0.235 0.51 0 -0 0 0.45 0.02 1.02 0 0 0.51 0 -0 0 0.45 0.45 0.02 10 0 0 0.51 0 -0 0 0.45 0.45 0.02 0 -0.235 0.51 0 -0 0 0.45 0.02 1.02 10 0 -0.235 0.51 0 -0 0 0.45 0.02 1.02 0 0 1.01 0 -0 0 0.45 0.45 0.02 10 0 0 1.01 0 -0 0 0.45 0.45 0.02 0 0 4.98173 5.02016 0 0 -0 0 1 1 0 0.005 0.6 0 -0 0 0.9 0.01 1.2 10 0 0.005 0.6 0 -0 0 0.9 0.01 1.2 0.45 -0.195 0.6 0 -0 0 0.02 0.4 1.2 10 0.45 -0.195 0.6 0 -0 0 0.02 0.4 1.2 -0.45 -0.195 0.6 0 -0 0 0.02 0.4 1.2 10 -0.45 -0.195 0.6 0 -0 0 0.02 0.4 1.2 0 -0.195 0.03 0 -0 0 0.88 0.4 0.06 10 0 -0.195 0.03 0 -0 0 0.88 0.4 0.06 0 -0.195 1.19 0 -0 0 0.88 0.4 0.02 10 0 -0.195 1.19 0 -0 0 0.88 0.4 0.02 0 -0.195 0.43 0 -0 0 0.88 0.4 0.02 10 0 -0.195 0.43 0 -0 0 0.88 0.4 0.02 0 -0.195 0.8 0 -0 0 0.88 0.4 0.02 10 0 -0.195 0.8 0 -0 0 0.88 0.4 0.02 0 0 4.51394 -3.63417 0 0 -0 0 1 0 0 1 0 -0 0 1.5 0.8 0.03 0.6 0.6 10 0 0 1 0 -0 0 1.5 0.8 0.03 0.68 0.38 0.5 0 -0 0 0.02 1 10 0.68 0.38 0.5 0 -0 0 0.02 1 0.68 -0.38 0.5 0 -0 0 0.02 1 10 0.68 -0.38 0.5 0 -0 0 0.02 1 -0.68 -0.38 0.5 0 -0 0 0.02 1 10 -0.68 -0.38 0.5 0 -0 0 0.02 1 -0.68 0.38 0.5 0 -0 0 0.02 1 10 -0.68 0.38 0.5 0 -0 0 0.02 1 0 0 -4.48069 -0.04466 0 0 -0 0 1 model://fire_hydrant/meshes/fire_hydrant.dae 1 1 1 10 model://fire_hydrant/meshes/fire_hydrant.dae 1 1 1 0 0 -3.75684 3.84985 0 0 -0 0 ================================================ FILE: Depth/DataPreprocess.py ================================================ import numpy as np import cv2 import rospy import random import copy import pickle from collections import deque from pathlib import Path def ismember(A, B): return [ np.sum(a == B) for a in A ] def rgb_data_color_aug(rgb_images): rgb_images = np.asarray(rgb_images).astype(float) noise = np.random.rand(4) * 0.4 + 0.8 print noise rgb_images = rgb_images * noise[0] for x in xrange(3): rgb_images[:, :, :, x] = rgb_images[:, :, :, x] * noise[x + 1] rgb_images[rgb_images>255.] = 255. rgb_images = np.uint8(rgb_images) return rgb_images def crop_img(img): size = np.shape(img) alpha = 0.1 height_start = int(size[0]*alpha/1.5) height_end = size[0] - int(size[0]*alpha/2.1) alpha = 0.15 width_start = int(size[1]*alpha/2.5) width_end = size[1] - int(size[1]*alpha/1.8) if len(size) == 2: cropped_img = img[height_start : height_end, width_start : width_end] elif len(size) == 3: cropped_img = img[height_start : height_end, width_start : width_end, :] return cropped_img file_num = 1000 D_training = deque() D_testing = deque() testing = 0.3 cnt = 0 path = './data/' inpaint_flag = False depth_dim = (160, 128) rgb_dim = (304, 228) for i in xrange(1, file_num + 1): print 'img', i depth_file = Path(path+'depth'+str(i)+'.png') rgb_file = Path(path+'rgb'+str(i)+'.jpg') if depth_file.is_file() and rgb_file.is_file(): cv_depth_img = cv2.imread(path+'depth'+str(i)+'.png', 0) cv_depth_img = crop_img(cv_depth_img) cv_depth_img = np.array(cv_depth_img, dtype=np.float32) cv_depth_img = cv2.resize(cv_depth_img, depth_dim, interpolation = cv2.INTER_NEAREST) cv_depth_img *= (10./255.) mask = copy.deepcopy(cv_depth_img) mask[mask == 0.] = 1. mask[mask != 1.] = 0. mask = 1 - mask # print np.shape(cv_depth_img) # print np.mean(cv_depth_img) # cv2.normalize(cv_depth_img, cv_depth_img, 0.0, 1.0, cv2.NORM_MINMAX, dtype=cv2.CV_32F) # cv_depth_img=np.uint8(cv_depth_img*255.0) # cv2.imshow('depth image',cv_depth_img) # cv2.waitKey(0) # mask=np.uint8(mask*255.0) # cv2.imshow('mask',mask) # cv2.waitKey(0) cv_rgb_img = cv2.imread(path+'rgb'+str(i)+'.jpg') cv_rgb_img = crop_img(cv_rgb_img) cv_resized_rgb_img = cv2.resize(cv_rgb_img, rgb_dim, interpolation = cv2.INTER_AREA) same_flag = False cnt += 1 if cnt == 1: last_rgb_img = cv_resized_rgb_img current_rgb_img = cv_resized_rgb_img last_depth_img = cv_depth_img current_depth_img = cv_depth_img else: last_rgb_img = current_rgb_img current_rgb_img = cv_resized_rgb_img if np.array_equal(current_rgb_img, last_rgb_img): print "The same rgb image" same_flag = True last_depth_img = current_depth_img current_depth_img = cv_depth_img if np.array_equal(current_depth_img, last_depth_img): print "The same depth image" same_flag = True save_rgb_img = current_rgb_img # cv2.imshow('rgb image',cv_resized_rgb_img) # cv2.waitKey(0) cv_depth_img = np.reshape(cv_depth_img, (depth_dim[1], depth_dim[0], 1)) if not same_flag: if random.uniform(0, 1.0) > .1: D_training.append((save_rgb_img, cv_depth_img, mask)) # D_training.append((np.flip(save_rgb_img, axis=1), np.flip(cv_depth_img, axis=1), np.flip(mask, axis=1))) else: D_testing.append((save_rgb_img, cv_depth_img, mask)) # D_testing.append((np.flip(save_rgb_img, axis=1), np.flip(cv_depth_img, axis=1), np.flip(mask, axis=1))) print 'rgb size:', cv_resized_rgb_img.shape print 'depth size:', cv_depth_img.shape pickle.dump(D_training, open("rgb_depth_images_training_real.p", "wb")) pickle.dump(D_testing, open("rgb_depth_images_testing_real.p", "wb")) print 'D_training:',len(D_training), '| D_testing:', len(D_testing) ================================================ FILE: Depth/FCRN/models/__init__.py ================================================ from .fcrn import ResNet50UpProj ================================================ FILE: Depth/FCRN/models/fcrn.py ================================================ from .network import Network class ResNet50UpProj(Network): def setup(self): # 1 trainable_flag = True (self.feed('data') .conv(7, 7, 64, 2, 2, relu=False, name='conv1', trainable=trainable_flag) .batch_normalization(relu=True, name='bn_conv1', trainable=trainable_flag) .max_pool(3, 3, 2, 2, name='pool1') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch1', trainable=trainable_flag) .batch_normalization(name='bn2a_branch1', trainable=trainable_flag)) # 2 trainable_flag = True (self.feed('pool1') .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2a_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn2a_branch2a', trainable=trainable_flag) .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2a_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn2a_branch2b', trainable=trainable_flag) .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch2c', trainable=trainable_flag) .batch_normalization(name='bn2a_branch2c', trainable=trainable_flag)) # 3 trainable_flag = True (self.feed('bn2a_branch1', 'bn2a_branch2c') .add(name='res2a') .relu(name='res2a_relu') .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2b_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn2b_branch2a', trainable=trainable_flag) .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2b_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn2b_branch2b', trainable=trainable_flag) .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2b_branch2c', trainable=trainable_flag) .batch_normalization(name='bn2b_branch2c', trainable=trainable_flag)) # 4 trainable_flag = True (self.feed('res2a_relu', 'bn2b_branch2c') .add(name='res2b') .relu(name='res2b_relu') .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2c_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn2c_branch2a', trainable=trainable_flag) .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2c_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn2c_branch2b', trainable=trainable_flag) .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2c_branch2c', trainable=trainable_flag) .batch_normalization(name='bn2c_branch2c', trainable=trainable_flag)) # 5 trainable_flag = True (self.feed('res2b_relu', 'bn2c_branch2c') .add(name='res2c') .relu(name='res2c_relu') .conv(1, 1, 512, 2, 2, biased=False, relu=False, name='res3a_branch1', trainable=trainable_flag) .batch_normalization(name='bn3a_branch1', trainable=trainable_flag)) # 6 trainable_flag = True (self.feed('res2c_relu') .conv(1, 1, 128, 2, 2, biased=False, relu=False, name='res3a_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn3a_branch2a', trainable=trainable_flag) .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3a_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn3a_branch2b', trainable=trainable_flag) .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3a_branch2c', trainable=trainable_flag) .batch_normalization(name='bn3a_branch2c', trainable=trainable_flag)) # 7 trainable_flag = True (self.feed('bn3a_branch1', 'bn3a_branch2c') .add(name='res3a') .relu(name='res3a_relu') .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn3b_branch2a', trainable=trainable_flag) .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn3b_branch2b', trainable=trainable_flag) .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b_branch2c', trainable=trainable_flag) .batch_normalization(name='bn3b_branch2c', trainable=trainable_flag)) # 8 trainable_flag = True (self.feed('res3a_relu', 'bn3b_branch2c') .add(name='res3b') .relu(name='res3b_relu') .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3c_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn3c_branch2a', trainable=trainable_flag) .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3c_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn3c_branch2b', trainable=trainable_flag) .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3c_branch2c', trainable=trainable_flag) .batch_normalization(name='bn3c_branch2c', trainable=trainable_flag)) # 9 trainable_flag = True (self.feed('res3b_relu', 'bn3c_branch2c') .add(name='res3c') .relu(name='res3c_relu') .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3d_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn3d_branch2a', trainable=trainable_flag) .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3d_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn3d_branch2b', trainable=trainable_flag) .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3d_branch2c', trainable=trainable_flag) .batch_normalization(name='bn3d_branch2c', trainable=trainable_flag)) # 10 trainable_flag = True (self.feed('res3c_relu', 'bn3d_branch2c') .add(name='res3d') .relu(name='res3d_relu') .conv(1, 1, 1024, 2, 2, biased=False, relu=False, name='res4a_branch1', trainable=trainable_flag) .batch_normalization(name='bn4a_branch1', trainable=trainable_flag)) # 11 trainable_flag = True (self.feed('res3d_relu') .conv(1, 1, 256, 2, 2, biased=False, relu=False, name='res4a_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn4a_branch2a', trainable=trainable_flag) .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4a_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn4a_branch2b', trainable=trainable_flag) .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4a_branch2c', trainable=trainable_flag) .batch_normalization(name='bn4a_branch2c', trainable=trainable_flag)) # 12 (self.feed('bn4a_branch1', 'bn4a_branch2c') .add(name='res4a') .relu(name='res4a_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn4b_branch2a', trainable=trainable_flag) .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4b_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn4b_branch2b', trainable=trainable_flag) .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b_branch2c', trainable=trainable_flag) .batch_normalization(name='bn4b_branch2c', trainable=trainable_flag)) # 13 trainable_flag = True (self.feed('res4a_relu', 'bn4b_branch2c') .add(name='res4b') .relu(name='res4b_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4c_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn4c_branch2a', trainable=trainable_flag) .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4c_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn4c_branch2b', trainable=trainable_flag) .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4c_branch2c', trainable=trainable_flag) .batch_normalization(name='bn4c_branch2c', trainable=trainable_flag)) # 14 (self.feed('res4b_relu', 'bn4c_branch2c') .add(name='res4c') .relu(name='res4c_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4d_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn4d_branch2a', trainable=trainable_flag) .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4d_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn4d_branch2b', trainable=trainable_flag) .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4d_branch2c', trainable=trainable_flag) .batch_normalization(name='bn4d_branch2c', trainable=trainable_flag)) # 15 trainable_flag = True (self.feed('res4c_relu', 'bn4d_branch2c') .add(name='res4d') .relu(name='res4d_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4e_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn4e_branch2a', trainable=trainable_flag) .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4e_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn4e_branch2b', trainable=trainable_flag) .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4e_branch2c', trainable=trainable_flag) .batch_normalization(name='bn4e_branch2c', trainable=trainable_flag)) # 16 (self.feed('res4d_relu', 'bn4e_branch2c') .add(name='res4e') .relu(name='res4e_relu') .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4f_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn4f_branch2a', trainable=trainable_flag) .conv(3, 3, 256, 1, 1, biased=False, relu=False, name='res4f_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn4f_branch2b', trainable=trainable_flag) .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4f_branch2c', trainable=trainable_flag) .batch_normalization(name='bn4f_branch2c', trainable=trainable_flag)) # 17 trainable_flag = True (self.feed('res4e_relu', 'bn4f_branch2c') .add(name='res4f') .relu(name='res4f_relu') .conv(1, 1, 2048, 2, 2, biased=False, relu=False, name='res5a_branch1', trainable=trainable_flag) .batch_normalization(name='bn5a_branch1', trainable=trainable_flag)) # 18 (self.feed('res4f_relu') .conv(1, 1, 512, 2, 2, biased=False, relu=False, name='res5a_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn5a_branch2a', trainable=trainable_flag) .conv(3, 3, 512, 1, 1, biased=False, relu=False, name='res5a_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn5a_branch2b', trainable=trainable_flag) .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5a_branch2c', trainable=trainable_flag) .batch_normalization(name='bn5a_branch2c', trainable=trainable_flag)) # 19 trainable_flag = True (self.feed('bn5a_branch1', 'bn5a_branch2c') .add(name='res5a') .relu(name='res5a_relu') .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5b_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn5b_branch2a', trainable=trainable_flag) .conv(3, 3, 512, 1, 1, biased=False, relu=False, name='res5b_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn5b_branch2b', trainable=trainable_flag) .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5b_branch2c', trainable=trainable_flag) .batch_normalization(name='bn5b_branch2c', trainable=trainable_flag)) # 20 (self.feed('res5a_relu', 'bn5b_branch2c') .add(name='res5b') .relu(name='res5b_relu') .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5c_branch2a', trainable=trainable_flag) .batch_normalization(relu=True, name='bn5c_branch2a', trainable=trainable_flag) .conv(3, 3, 512, 1, 1, biased=False, relu=False, name='res5c_branch2b', trainable=trainable_flag) .batch_normalization(relu=True, name='bn5c_branch2b', trainable=trainable_flag) .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5c_branch2c', trainable=trainable_flag) .batch_normalization(name='bn5c_branch2c', trainable=trainable_flag)) # 21 trainable_flag = True (self.feed('res5b_relu', 'bn5c_branch2c') .add(name='res5c') .relu(name='res5c_relu') .conv(1, 1, 1024, 1, 1, biased=True, relu=False, name='layer1', trainable=trainable_flag) .batch_normalization(relu=False, name='layer1_BN', trainable=trainable_flag) .up_project([3, 3, 1024, 512], id = '2x', stride = 1, BN=True, trainable=trainable_flag) .up_project([3, 3, 512, 256], id = '4x', stride = 1, BN=True, trainable=trainable_flag) .up_project([3, 3, 256, 128], id = '8x', stride = 1, BN=True, trainable=trainable_flag) .up_project([3, 3, 128, 64], id = '16x', stride = 1, BN=True, trainable=trainable_flag) .dropout(name = 'drop', keep_prob = 1.) .conv(3, 3, 1, 1, 1, name = 'ConvPred', trainable=trainable_flag)) ================================================ FILE: Depth/FCRN/models/network.py ================================================ import numpy as np import tensorflow as tf # ---------------------------------------------------------------------------------- # Commonly used layers and operations based on ethereon's implementation # https://github.com/ethereon/caffe-tensorflow # Slight modifications may apply. FCRN-specific operations have also been appended. # ---------------------------------------------------------------------------------- # Thanks to *Helisa Dhamo* for the model conversion and integration into TensorFlow. # ---------------------------------------------------------------------------------- DEFAULT_PADDING = 'SAME' def get_incoming_shape(incoming): """ Returns the incoming data shape """ if isinstance(incoming, tf.Tensor): return incoming.get_shape().as_list() elif type(incoming) in [np.array, list, tuple]: return np.shape(incoming) else: raise Exception("Invalid incoming layer.") def interleave(tensors, axis): old_shape = get_incoming_shape(tensors[0])[1:] new_shape = [-1] + old_shape new_shape[axis] *= len(tensors) return tf.reshape(tf.stack(tensors, axis + 1), new_shape) def layer(op): '''Decorator for composable network layers.''' def layer_decorated(self, *args, **kwargs): # Automatically set a name if not provided. name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) # Figure out the layer inputs. if len(self.terminals) == 0: raise RuntimeError('No input variables found for layer %s.' % name) elif len(self.terminals) == 1: layer_input = self.terminals[0] else: layer_input = list(self.terminals) # Perform the operation and get the output. layer_output = op(self, layer_input, *args, **kwargs) # Add to layer LUT. self.layers[name] = layer_output # This output is now the input for the next layer. self.feed(layer_output) # Return self for chained calls. return self return layer_decorated class Network(object): def __init__(self, inputs, batch, keep_prob, is_training, trainable=False): # The input nodes for this network self.inputs = inputs # The current list of terminal nodes self.terminals = [] # Mapping from layer names to layers self.layers = dict(inputs) # If true, the resulting variables are set as trainable self.trainable = trainable self.batch_size = batch self.keep_prob = keep_prob self.is_training = is_training self.setup() def setup(self): '''Construct the network. ''' raise NotImplementedError('Must be implemented by the subclass.') def load(self, data_path, session, ignore_missing=False): '''Load network weights. data_path: The path to the numpy-serialized network weights session: The current TensorFlow session ignore_missing: If true, serialized weights for missing layers are ignored. ''' data_dict = np.load(data_path, encoding='latin1').item() for op_name in data_dict: with tf.variable_scope(op_name, reuse=True): for param_name, data in iter(data_dict[op_name].items()): try: var = tf.get_variable(param_name) session.run(var.assign(data)) except ValueError: if not ignore_missing: raise def feed(self, *args): '''Set the input(s) for the next operation by replacing the terminal nodes. The arguments can be either layer names or the actual layers. ''' assert len(args) != 0 self.terminals = [] for fed_layer in args: if isinstance(fed_layer, str): try: fed_layer = self.layers[fed_layer] except KeyError: raise KeyError('Unknown layer name fed: %s' % fed_layer) self.terminals.append(fed_layer) return self def get_output(self): '''Returns the current network output.''' return self.terminals[-1] def get_layer_output(self, name): return self.layers[name] def get_unique_name(self, prefix): '''Returns an index-suffixed unique name for the given prefix. This is used for auto-generating layer names based on the type-prefix. ''' ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 return '%s_%d' % (prefix, ident) def make_var(self, name, shape, trainable=False): '''Creates a new TensorFlow variable.''' return tf.get_variable(name, shape, dtype = 'float32', trainable=trainable) def validate_padding(self, padding): '''Verifies that the padding is one of the supported ones.''' assert padding in ('SAME', 'VALID') @layer def conv(self, input_data, k_h, k_w, c_o, s_h, s_w, name, relu=True, padding=DEFAULT_PADDING, group=1, biased=True, trainable=False): # Verify that the padding is acceptable self.validate_padding(padding) # Get the number of channels in the input c_i = input_data.get_shape()[-1] if (padding == 'SAME'): input_data = tf.pad(input_data, [[0, 0], [(k_h - 1)//2, (k_h - 1)//2], [(k_w - 1)//2, (k_w - 1)//2], [0, 0]], "CONSTANT") # Verify that the grouping parameter is valid assert c_i % group == 0 assert c_o % group == 0 # Convolution for a given input and kernel convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding='VALID') with tf.variable_scope(name) as scope: kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o], trainable=trainable) if group == 1: # This is the common-case. Convolve the input without any further complications. output = convolve(input_data, kernel) else: # Split the input into groups and then convolve each of them independently input_groups = tf.split(3, group, input_data) kernel_groups = tf.split(3, group, kernel) output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)] # Concatenate the groups output = tf.concat(3, output_groups) # Add the biases if biased: biases = self.make_var('biases', [c_o], trainable=trainable) output = tf.nn.bias_add(output, biases) if relu: # ReLU non-linearity output = tf.nn.relu(output, name=scope.name) return output @layer def relu(self, input_data, name): return tf.nn.relu(input_data, name=name) @layer def max_pool(self, input_data, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): self.validate_padding(padding) return tf.nn.max_pool(input_data, ksize=[1, k_h, k_w, 1], strides=[1, s_h, s_w, 1], padding=padding, name=name) @layer def avg_pool(self, input_data, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): self.validate_padding(padding) return tf.nn.avg_pool(input_data, ksize=[1, k_h, k_w, 1], strides=[1, s_h, s_w, 1], padding=padding, name=name) @layer def lrn(self, input_data, radius, alpha, beta, name, bias=1.0): return tf.nn.local_response_normalization(input_data, depth_radius=radius, alpha=alpha, beta=beta, bias=bias, name=name) @layer def concat(self, inputs, axis, name): return tf.concat(concat_dim=axis, values=inputs, name=name) @layer def add(self, inputs, name): return tf.add_n(inputs, name=name) @layer def fc(self, input_data, num_out, name, relu=True): with tf.variable_scope(name) as scope: input_shape = input_data.get_shape() if input_shape.ndims == 4: # The input is spatial. Vectorize it first. dim = 1 for d in input_shape[1:].as_list(): dim *= d feed_in = tf.reshape(input_data, [-1, dim]) else: feed_in, dim = (input_data, input_shape[-1].value) weights = self.make_var('weights', shape=[dim, num_out]) biases = self.make_var('biases', [num_out]) op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b fc = op(feed_in, weights, biases, name=scope.name) return fc @layer def softmax(self, input_data, name): input_shape = map(lambda v: v.value, input_data.get_shape()) if len(input_shape) > 2: # For certain models (like NiN), the singleton spatial dimensions # need to be explicitly squeezed, since they're not broadcast-able # in TensorFlow's NHWC ordering (unlike Caffe's NCHW). if input_shape[1] == 1 and input_shape[2] == 1: input_data = tf.squeeze(input_data, squeeze_dims=[1, 2]) else: raise ValueError('Rank 2 tensor input expected for softmax!') return tf.nn.softmax(input_data, name) @layer def batch_normalization(self, input_data, name, scale_offset=True, relu=False, trainable=False): with tf.variable_scope(name) as scope: shape = [input_data.get_shape()[-1]] pop_mean = tf.get_variable("mean", shape, initializer = tf.constant_initializer(0.0), trainable=trainable) pop_var = tf.get_variable("variance", shape, initializer = tf.constant_initializer(1.0), trainable=trainable) epsilon = 1e-4 decay = 0.999 if scale_offset: scale = tf.get_variable("scale", shape, initializer = tf.constant_initializer(1.0), trainable=trainable) offset = tf.get_variable("offset", shape, initializer = tf.constant_initializer(0.0), trainable=trainable) else: scale, offset = (None, None) if self.is_training: batch_mean, batch_var = tf.nn.moments(input_data, [0, 1, 2]) train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay)) train_var = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay)) with tf.control_dependencies([train_mean, train_var]): output = tf.nn.batch_normalization(input_data, batch_mean, batch_var, offset, scale, epsilon, name = name) else: output = tf.nn.batch_normalization(input_data, pop_mean, pop_var, offset, scale, epsilon, name = name) if relu: output = tf.nn.relu(output) return output @layer def dropout(self, input_data, keep_prob, name): return tf.nn.dropout(input_data, keep_prob, name=name) def unpool_as_conv(self, size, input_data, id, stride = 1, ReLU = False, BN = True, trainable=False): # Model upconvolutions (unpooling + convolution) as interleaving feature # maps of four convolutions (A,B,C,D). Building block for up-projections. # Convolution A (3x3) # -------------------------------------------------- layerName = "layer%s_ConvA" % (id) self.feed(input_data) self.conv( 3, 3, size[3], stride, stride, name = layerName, padding = 'SAME', relu = False, trainable=trainable) outputA = self.get_output() # Convolution B (2x3) # -------------------------------------------------- layerName = "layer%s_ConvB" % (id) padded_input_B = tf.pad(input_data, [[0, 0], [1, 0], [1, 1], [0, 0]], "CONSTANT") self.feed(padded_input_B) self.conv(2, 3, size[3], stride, stride, name = layerName, padding = 'VALID', relu = False, trainable=trainable) outputB = self.get_output() # Convolution C (3x2) # -------------------------------------------------- layerName = "layer%s_ConvC" % (id) padded_input_C = tf.pad(input_data, [[0, 0], [1, 1], [1, 0], [0, 0]], "CONSTANT") self.feed(padded_input_C) self.conv(3, 2, size[3], stride, stride, name = layerName, padding = 'VALID', relu = False, trainable=trainable) outputC = self.get_output() # Convolution D (2x2) # -------------------------------------------------- layerName = "layer%s_ConvD" % (id) padded_input_D = tf.pad(input_data, [[0, 0], [1, 0], [1, 0], [0, 0]], "CONSTANT") self.feed(padded_input_D) self.conv(2, 2, size[3], stride, stride, name = layerName, padding = 'VALID', relu = False, trainable=trainable) outputD = self.get_output() # Interleaving elements of the four feature maps # -------------------------------------------------- left = interleave([outputA, outputB], axis=1) # columns right = interleave([outputC, outputD], axis=1) # columns Y = interleave([left, right], axis=2) # rows if BN: layerName = "layer%s_BN" % (id) self.feed(Y) self.batch_normalization(name=layerName, scale_offset=True, relu=False, trainable=trainable) Y = self.get_output() if ReLU: Y = tf.nn.relu(Y, name = layerName) return Y def up_project(self, size, id, stride = 1, BN = True, trainable=False): # Create residual upsampling layer (UpProjection) input_data = self.get_output() # Branch 1 id_br1 = "%s_br1" % (id) # Interleaving Convs of 1st branch out = self.unpool_as_conv(size, input_data, id_br1, stride, ReLU=True, BN=True, trainable=trainable) # Convolution following the upProjection on the 1st branch layerName = "layer%s_Conv" % (id) self.feed(out) self.conv(size[0], size[1], size[3], stride, stride, name = layerName, relu = False, trainable=trainable) if BN: layerName = "layer%s_BN" % (id) self.batch_normalization(name=layerName, scale_offset=True, relu=False, trainable=trainable) # Output of 1st branch branch1_output = self.get_output() # Branch 2 id_br2 = "%s_br2" % (id) # Interleaving convolutions and output of 2nd branch branch2_output = self.unpool_as_conv(size, input_data, id_br2, stride, ReLU=False, trainable=trainable) # sum branches layerName = "layer%s_Sum" % (id) output = tf.add_n([branch1_output, branch2_output], name = layerName) # ReLU layerName = "layer%s_ReLU" % (id) output = tf.nn.relu(output, name=layerName) self.feed(output) return self ================================================ FILE: Depth/FCRN/testing.py ================================================ import tensorflow as tf import numpy as np import cv2 import pickle import random import models import copy import time import math from cv_bridge import CvBridge, CvBridgeError from PIL import Image import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = '0' CHANNEL = 3 DEPTH_IMAGE_WIDTH = 160 DEPTH_IMAGE_HEIGHT = 128 RGB_IMAGE_WIDTH = 304 RGB_IMAGE_HEIGHT = 228 MAX_STEP = 200 MAX_EPOCH = 150 BATCH = 1 def output_predict(predict, kinect, rgb, epoch, step): max_val = 10. kinect[kinect>max_val] = max_val if np.max(kinect) != 0: kinect_save = (kinect/max_val)*255.0 # print('kinect_max', np.amax(kinect)) else: kinect_save = kinect*255.0 kinect_save=np.uint8(kinect_save) name = "data/%04d" % epoch + "_%04d_kinect.png" % step cv2.imwrite(name,kinect_save) predict[predict>max_val] = max_val if np.max(predict) != 0: predict_save = (predict/max_val)*255.0 # print('predict_max', np.amax(predict)) else: predict_save = predict*255.0 predict_save=np.uint8(predict_save) name = "data/%04d" % epoch + "_%04d_predicted.png" % step cv2.imwrite(name,predict_save) name = "data/%04d" % epoch + "_%04d_rgb.png" % step cv2.imwrite(name,rgb) def SetDiff(first, second): second = set(second) return [item for item in first if item not in second] def normalize_rgb(rgb_images, value): rgb_images = np.asarray(rgb_images).astype(float) rgb_images /= 255. for x in xrange(3): rgb_images[:, :, :, x] -= value[x] return rgb_images def consecutive_sample(data, start, end): # return a list part = [] for x in xrange(start, end): part.append(data[x]) return part with tf.Session() as sess: # Construct network and define loss function state = tf.placeholder("float", [None, RGB_IMAGE_HEIGHT, RGB_IMAGE_WIDTH, CHANNEL]) net = models.ResNet50UpProj({'data': state}, BATCH, 1, True) depth_predict = net.get_output() depth_kinect = tf.placeholder("float", [None, DEPTH_IMAGE_HEIGHT, DEPTH_IMAGE_WIDTH, 1]) img_mask = tf.placeholder("float", [None, DEPTH_IMAGE_HEIGHT, DEPTH_IMAGE_WIDTH, 1]) print 'Loading initial network param' init_saver = tf.train.Saver() init_saver.restore(sess, '../init_network/NYU_FCRN.ckpt') # d_show = tf.subtract(tf.multiply(depth_predict, img_mask), tf.multiply(depth_kinect, img_mask)) # abs_d_show = tf.abs(d_show) # c = tf.divide(tf.reduce_max(abs_d_show), 5.) # berHu = tf.where(tf.less_equal(abs_d_show, c), abs_d_show, tf.square(d_show)) # loss = tf.reduce_mean(tf.reduce_mean(berHu, 1)) # train_step = tf.train.AdamOptimizer(5e-5).minimize(loss) print 'Initializing var' uninitialized_vars = [] start_time = time.time() for var in tf.global_variables(): try: sess.run(var) except tf.errors.FailedPreconditionError: uninitialized_vars.append(var) init_new_vars_op = tf.variables_initializer(uninitialized_vars) # print(" [*] printing unitialized variables") # for idx, v in enumerate(uninitialized_vars): # print(" var {:3}: {:15} {}".format(idx, str(v.get_shape()), v.name)) sess.run(init_new_vars_op) print 'Var initialized, time:', time.time() - start_time trainable_var = tf.trainable_variables() # print " [*] printing trainable variables" # for idx, v in enumerate(trainable_var): # print(" var {:3}: {:15} {}".format(idx, str(v.get_shape()), v.name)) depth_net_saver = tf.train.Saver(trainable_var, max_to_keep=1) checkpoint = tf.train.get_checkpoint_state('saved_network') if checkpoint and checkpoint.model_checkpoint_path: print 'Loading from checkpoint:', checkpoint depth_net_saver.restore(sess, checkpoint.model_checkpoint_path) print "Depth network model loaded:", checkpoint.model_checkpoint_path else: print 'No new model' print 'Loading data' training_data = pickle.load(open('../rgb_depth_images_training_real.p', "rb")) testing_data = pickle.load(open('../rgb_depth_images_testing_real.p', "rb")) print 'Data loaded' epoch = 0 for step in xrange(1,int(len(training_data)/(BATCH))+1): start_time = time.time() training_batch_real = consecutive_sample(training_data, (step-1)*BATCH, step*BATCH) rgb_img = [d[0] for d in training_batch_real] depth_img = [d[1] for d in training_batch_real] depth_img = np.reshape(depth_img, [-1, DEPTH_IMAGE_HEIGHT, DEPTH_IMAGE_WIDTH, 1]) mask = [d[2] for d in training_batch_real] mask = np.reshape(mask, [-1, DEPTH_IMAGE_HEIGHT, DEPTH_IMAGE_WIDTH, 1]) depth_predict_value = sess.run(depth_predict, feed_dict={ state : rgb_img}) output_predict(depth_predict_value[0], depth_img[0], np.array(rgb_img)[0], epoch, step) # print("epoch: {:2} | step: {:3} | testing loss: {:.4f}, time: {:.3f}"\ # .format(epoch, step, testing_loss, time.time()-start_time)) ================================================ FILE: Depth/FCRN/training.py ================================================ import tensorflow as tf import numpy as np import cv2 import pickle import random import models import copy import time import math import os from cv_bridge import CvBridge, CvBridgeError from PIL import Image os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = '0' CHANNEL = 3 DEPTH_IMAGE_WIDTH = 160 DEPTH_IMAGE_HEIGHT = 128 RGB_IMAGE_WIDTH = 304 RGB_IMAGE_HEIGHT = 228 MAX_STEP = 200 MAX_EPOCH = 150 BATCH = 16 def output_predict(predict, kinect, rgb, epoch, step): max_val = 10. kinect[kinect>max_val] = max_val if np.max(kinect) != 0: kinect_save = (kinect/max_val)*255.0 # print('kinect_max', np.amax(kinect)) else: kinect_save = kinect*255.0 kinect_save=np.uint8(kinect_save) name = "data/%04d" % epoch + "_%04d_kinect.png" % step cv2.imwrite(name,kinect_save) predict[predict>max_val] = max_val if np.max(predict) != 0: predict_save = (predict/max_val)*255.0 # print('predict_max', np.amax(predict)) else: predict_save = predict*255.0 predict_save=np.uint8(predict_save) name = "data/%04d" % epoch + "_%04d_predicted.png" % step cv2.imwrite(name,predict_save) name = "data/%04d" % epoch + "_%04d_rgb.png" % step cv2.imwrite(name,rgb) def SetDiff(first, second): second = set(second) return [item for item in first if item not in second] def normalize_rgb(rgb_images, value): rgb_images = np.asarray(rgb_images).astype(float) rgb_images /= 255. for x in xrange(3): rgb_images[:, :, :, x] -= value[x] return rgb_images def consecutive_sample(data, start, end): # return a list part = [] for x in xrange(start, end): part.append(data[x]) return part with tf.Session() as sess: # Construct network and define loss function state = tf.placeholder("float", [None, RGB_IMAGE_HEIGHT, RGB_IMAGE_WIDTH, CHANNEL]) net = models.ResNet50UpProj({'data': state}, BATCH, 1, True) depth_predict = net.get_output() depth_kinect = tf.placeholder("float", [None, DEPTH_IMAGE_HEIGHT, DEPTH_IMAGE_WIDTH, 1]) img_mask = tf.placeholder("float", [None, DEPTH_IMAGE_HEIGHT, DEPTH_IMAGE_WIDTH, 1]) print 'Loading initial network param' init_saver = tf.train.Saver() init_saver.restore(sess, '../init_network/NYU_FCRN.ckpt') d_show = tf.subtract(tf.multiply(depth_predict, img_mask), tf.multiply(depth_kinect, img_mask)) abs_d_show = tf.abs(d_show) c = tf.divide(tf.reduce_max(abs_d_show), 5.) berHu = tf.where(tf.less_equal(abs_d_show, c), abs_d_show, tf.square(d_show)) loss = tf.reduce_mean(tf.reduce_mean(berHu, 1)) train_step = tf.train.AdamOptimizer(5e-5).minimize(loss) train_loss_var = tf.Variable(0., trainable=False) train_loss_sum = tf.summary.scalar('training_loss', train_loss_var) test_loss_var = tf.Variable(0., trainable=False) test_loss_sum = tf.summary.scalar('testing_loss', test_loss_var) merged_summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter('./logs', sess.graph) print 'Initializing var' uninitialized_vars = [] start_time = time.time() for var in tf.global_variables(): try: sess.run(var) except tf.errors.FailedPreconditionError: uninitialized_vars.append(var) init_new_vars_op = tf.variables_initializer(uninitialized_vars) print(" [*] printing unitialized variables") for idx, v in enumerate(uninitialized_vars): print(" var {:3}: {:15} {}".format(idx, str(v.get_shape()), v.name)) sess.run(init_new_vars_op) print 'Var initialized, time:', time.time() - start_time trainable_var = tf.trainable_variables() print " [*] printing trainable variables" for idx, v in enumerate(trainable_var): print(" var {:3}: {:15} {}".format(idx, str(v.get_shape()), v.name)) depth_net_saver = tf.train.Saver(trainable_var, max_to_keep=1) checkpoint = tf.train.get_checkpoint_state('saved_network') if checkpoint and checkpoint.model_checkpoint_path: print 'Loading from checkpoint:', checkpoint depth_net_saver.restore(sess, checkpoint.model_checkpoint_path) print "Depth network model loaded:", checkpoint.model_checkpoint_path else: print 'No new model' print 'Loading data' training_data = pickle.load(open('../rgb_depth_images_training_real.p', "rb")) testing_data = pickle.load(open('../rgb_depth_images_testing_real.p', "rb")) print 'Data loaded' Step = 0 for epoch in xrange(1,MAX_EPOCH+1): np.random.shuffle(training_data) np.random.shuffle(testing_data) for step in xrange(1,int(len(training_data)/(BATCH))+1): start_time = time.time() training_batch_real = consecutive_sample(training_data, (step-1)*BATCH, step*BATCH) rgb_img = [d[0] for d in training_batch_real] depth_img = [d[1] for d in training_batch_real] depth_img = np.reshape(depth_img, [-1, DEPTH_IMAGE_HEIGHT, DEPTH_IMAGE_WIDTH, 1]) mask = [d[2] for d in training_batch_real] mask = np.reshape(mask, [-1, DEPTH_IMAGE_HEIGHT, DEPTH_IMAGE_WIDTH, 1]) # flip if np.random.rand() > 0.5: rgb_img = np.flip(rgb_img, axis=2) depth_img = np.flip(depth_img, axis=2) mask = np.flip(mask, axis=2) training_loss = 0. depth_predict_value, _, training_loss = sess.run([depth_predict, train_step, loss], feed_dict = { state : rgb_img, depth_kinect : depth_img, img_mask : mask}) if step % 10 == 0: testing_batch = random.sample(testing_data, BATCH) rgb_img_test = [d[0] for d in testing_batch] depth_img_test = [d[1] for d in testing_batch] depth_img_test = np.reshape(depth_img_test, [-1, DEPTH_IMAGE_HEIGHT, DEPTH_IMAGE_WIDTH, 1]) mask_test = [d[2] for d in testing_batch] mask_test = np.reshape(mask_test, [-1, DEPTH_IMAGE_HEIGHT, DEPTH_IMAGE_WIDTH, 1]) depth_predict_value, testing_loss, difference= sess.run([depth_predict, loss, abs_d_show], feed_dict = { state : rgb_img_test, depth_kinect : depth_img_test, img_mask : mask_test}) # if epoch % 10 == 0: output_predict(depth_predict_value[0], depth_img_test[0], np.asarray(rgb_img_test)[0], epoch, step) # print("epoch: {:2} | step: {:3} | traning loss: {:.4f} | testing loss: {:.4f}, time: {:.2f}"\ # .format(epoch, step, training_loss, testing_loss, (time.time()-start_time)/60.)) summary_str = sess.run(merged_summary, feed_dict={train_loss_var: training_loss, test_loss_var: testing_loss}) summary_writer.add_summary(summary_str, Step) Step = Step + 1 # full testing testing_loss_buff = [] for step in xrange(int(len(testing_data)/(BATCH))): testing_batch = consecutive_sample(testing_data, (step-1)*BATCH, step*BATCH) rgb_img_test = [d[0] for d in testing_batch] depth_img_test = [d[1] for d in testing_batch] mask_test = [d[2] for d in testing_batch] mask_test = np.reshape(mask_test, [-1, DEPTH_IMAGE_HEIGHT, DEPTH_IMAGE_WIDTH, 1]) depth_predict_value, testing_loss, difference = sess.run([depth_predict, loss, abs_d_show], feed_dict = { state : rgb_img_test, depth_kinect : depth_img_test, img_mask : mask_test}) testing_loss_buff.append(testing_loss) print("epoch: {:3} | traning loss: {:.4f} | testing loss: {:.4f}, time: {:.2f}"\ .format(epoch, training_loss, np.mean(testing_loss_buff), (time.time()-start_time)/60.)) if epoch % 30 == 0: depth_net_saver.save(sess, 'saved_network/DepthNet', global_step = epoch) ================================================ FILE: LICENSE ================================================ Copyright (c) 2016, Iro Laina All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: README.md ================================================ # Towards Monocular Vision based Obstacle Avoidance through Deep Reinforcement Learning By [Linhai Xie](https://www.cs.ox.ac.uk/people/linhai.xie/), [Sen Wang](http://senwang.weebly.com/), Niki trigoni, Andrew Markham. The tensorflow implmentation for the paper: [Towards Monocular Vision based Obstacle Avoidance through Deep Reinforcement Learning](https://arxiv.org/abs/1706.09829) ## Contents 0. [Introduction](#introduction) 0. [Prerequisite](#Prerequisite) 0. [Instruction](#instruction) 0. [Citation](#citation) ## Introduction This repository contains: 1.Training code for [FCRN](https://arxiv.org/abs/1606.00373). We write our own training code but build the mode directly with the code provided [here](https://github.com/iro-cp/FCRN-DepthPrediction). (We retain Iro's license in the repository) 2.Data preprocessing code for training FCRN. 3.Training code for D3QN(Double Deep Q Network with Dueling architecture) with a turtlebot2 in Gazebo simulator. 4.Testing code for D3QN with a turtlebot2 in real world 5.The interface code between tensorflow and ros The network model for D3QN is slightly different from the paper as we find this version has a better performance. The video of our real world experiments is available at [Youtube](https://www.youtube.com/watch?v=qNIVgG4RUDM) ## Prerequisites Tensorflow > 1.1 ROS Kinetic cv2 ## Instruction **Retraining FCRN** We have a example dataset collected with a turtlebot in folder ```/Depth/data``` which contains 1k labeled rgb-depth images. And the full dataset we used with 10k images is [here.](https://drive.google.com/file/d/1OUH0Ioe1uT3MoEz9TwmzzBbtgJWtZVPh/view?usp=sharing) We recommend to collected more than 10k images by yourself to retrain a good model based on the initial one. After collecting enough data, use `DataPreprocess.py` to generate training and testing datasets. Download the [initial model of FCRN](http://campar.in.tum.de/files/rupprecht/depthpred/NYU_FCRN-checkpoint.zip) into the folder '''/Depth/init_network''' Then use `training.py` in ```/Depth/FCRN``` to retrain the model. Finally copy the generated checkpoint and model file from ```/Depth/saved_network``` to ```/D3QN/saved_networks/depth``` **Training D3QN in Gazebo** Launch the Gazebo world with our world file with command: ```roslaunch turtlebot_gazebo turtlebot_world.launch world_file:=/PATH TO/Monocular-Obstacle-Avoidance/D3QN/world/SquareWorld.world``` Start training with: ```python D3QN_training``` **Testing D3QN in real world** ```python D3QN_testing``` ## Citation If you use this method in your research, please cite: @inproceedings{xie2017towards, title = "Towards Monocular Vision based Obstacle Avoidance through Deep Reinforcement Learning", author = "Xie, Linhai and Wang, Sen and Markham, Andrew and Trigoni, Niki", year = "2017", booktitle = "RSS 2017 workshop on New Frontiers for Deep Learning in Robotics", }