Repository: hadipash/AI_GTA5 Branch: master Commit: 424e615419d9 Files: 24 Total size: 59.8 KB Directory structure: gitextract_4lc496_7/ ├── README.md ├── data_collection/ │ ├── data_balancing.py │ ├── data_collect.py │ ├── gamepad_cap.py │ ├── histogram.py │ ├── img_process.py │ ├── key_cap.py │ └── resources/ │ ├── arrows.npy │ ├── arrows_labels.npy │ ├── digits.npy │ └── digits_labels.npy ├── drivers.txt ├── driving/ │ ├── drive.py │ └── gamepad.py ├── game_plugins.txt ├── object_detection/ │ ├── direction.py │ ├── lane_detect.py │ └── object_detect.py ├── requirements.txt └── training/ ├── base_model.h5 ├── model.py ├── models/ │ └── original + radar/ │ └── base_model.h5 ├── train.py └── utils.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: README.md ================================================ # Self-Driving Car for GTA V ### Overview The aim of this project is to create a self-driving car using a virtual similator (particularly GTA V). ### [Youtube Video](https://www.youtube.com/watch?v=BRK0wm7rrfQ)

================================================ FILE: data_collection/data_balancing.py ================================================ import h5py from data_collection.data_collect import path as source_path dest_path = "F:\Graduation_Project\\training_data_balanced.h5" destination = h5py.File(dest_path, 'w') destination.create_dataset('img', (0, 240, 320, 3), dtype='u1', maxshape=(None, 240, 320, 3), chunks=(30, 240, 320, 3)) destination.create_dataset('controls', (0, 2), dtype='i1', maxshape=(None, 2), chunks=(30, 2)) destination.create_dataset('metrics', (0, 2), dtype='u1', maxshape=(None, 2), chunks=(30, 2)) def save(data_img, controls, metrics): if data_img: # if the list is not empty destination["img"].resize((destination["img"].shape[0] + len(data_img)), axis=0) destination["img"][-len(data_img):] = data_img destination["controls"].resize((destination["controls"].shape[0] + len(controls)), axis=0) destination["controls"][-len(controls):] = controls destination["metrics"].resize((destination["metrics"].shape[0] + len(metrics)), axis=0) destination["metrics"][-len(metrics):] = metrics def main(): source = h5py.File(source_path, 'r') images = [] controls = [] metrics = [] tuples = 0 straights = 0 for i in range(source['img'].shape[0]): # if speed is not 0 and not arrived at the destination if source['metrics'][i][0] != 0 and source['metrics'][i][1] != 6: # save only each 5th straight drive frame if source['controls'][i][1] == 0: add = (straights % 5 == 0) straights += 1 # save all turns else: add = True if add: images.append(source['img'][i]) controls.append(source['controls'][i]) metrics.append(source['metrics'][i]) tuples += 1 if tuples % 10000 == 0: # every 2.5 GB print(tuples) save(images, controls, metrics) images = [] controls = [] metrics = [] save(images, controls, metrics) print("Copied: {:d} tuples from the source file".format(tuples)) source.close() destination.close() if __name__ == '__main__': main() ================================================ FILE: data_collection/data_collect.py ================================================ """ Data collection module (saves data in H5 format). Saves screen captures and pressed keys into a file for further trainings of NN. """ import os import threading import time import winsound import h5py from data_collection.gamepad_cap import Gamepad from data_collection.img_process import img_process from data_collection.key_cap import key_check lock = threading.Lock() # open the data file path = "F:\Graduation_Project\\training_data.h5" data_file = None if os.path.isfile(path): data_file = h5py.File(path, 'a') else: data_file = h5py.File(path, 'w') # Write data in chunks for faster writing and reading by NN data_file.create_dataset('img', (0, 240, 320, 3), dtype='u1', maxshape=(None, 240, 320, 3), chunks=(30, 240, 320, 3)) data_file.create_dataset('controls', (0, 2), dtype='i1', maxshape=(None, 2), chunks=(30, 2)) data_file.create_dataset('metrics', (0, 2), dtype='u1', maxshape=(None, 2), chunks=(30, 2)) def save(data_img, controls, metrics): with lock: # make sure that data is consistent if data_img: # if the list is not empty # last_time = time.time() data_file["img"].resize((data_file["img"].shape[0] + len(data_img)), axis=0) data_file["img"][-len(data_img):] = data_img data_file["controls"].resize((data_file["controls"].shape[0] + len(controls)), axis=0) data_file["controls"][-len(controls):] = controls data_file["metrics"].resize((data_file["metrics"].shape[0] + len(metrics)), axis=0) data_file["metrics"][-len(metrics):] = metrics # print('Saving took {} seconds'.format(time.time() - last_time)) def delete(session): frames = session if session < 500 else 500 data_file["img"].resize((data_file["img"].shape[0] - frames), axis=0) data_file["controls"].resize((data_file["controls"].shape[0] - frames), axis=0) data_file["metrics"].resize((data_file["metrics"].shape[0] - frames), axis=0) def main(): # initialize gamepad gamepad = Gamepad() gamepad.open() # last_time = time.time() # to measure the number of frames alert_time = time.time() # to signal about exceeding speed limit close = False # to exit execution pause = True # to pause execution session = 0 # number of frames recorded in one session training_img = [] # lists for storing training data controls = [] metrics = [] print("Press RB on your gamepad to start recording") while not close: while not pause: # read throttle and steering values from the gamepad throttle, steering = gamepad.get_state() # get screen, speed and direction ignore, screen, speed, direction = img_process("Grand Theft Auto V") training_img.append(screen) controls.append([throttle, steering]) metrics.append([speed, direction]) session += 1 if speed > 60 and time.time() - alert_time > 1: winsound.PlaySound('.\\resources\\alert.wav', winsound.SND_ASYNC) alert_time = time.time() # save the data every 30 iterations if len(training_img) % 30 == 0: # print("-" * 30 + "Saving" + "-" * 30) threading.Thread(target=save, args=(training_img, controls, metrics)).start() training_img = [] controls = [] metrics = [] time.sleep(0.015) # in order to slow down fps # print('Main loop took {} seconds'.format(time.time() - last_time)) # last_time = time.time() if gamepad.get_RB(): pause = True print('Paused. Save the last 15 seconds?') keys = key_check() while ('Y' not in keys) and ('N' not in keys): keys = key_check() if 'N' in keys: delete(session) training_img = [] controls = [] metrics = [] print('Deleted.') else: print('Saved.') print('To exit the program press LB.') session = 0 time.sleep(0.5) if gamepad.get_RB(): pause = False print('Unpaused') time.sleep(1) elif gamepad.get_LB(): gamepad.close() close = True print('Saving data and closing the program.') save(training_img, controls, metrics) data_file.close() if __name__ == '__main__': main() ================================================ FILE: data_collection/gamepad_cap.py ================================================ """ Module for reading information from an Xbox gamepad """ import threading from inputs import get_gamepad # Gamepad part AXIS_MAX = 32767 AXIS_MIN = -32768 TRIGGER_MAX = 255 TRIGGER_MIN = -255 AXIS_MAX_NORM = 10 / AXIS_MAX AXIS_MIN_NORM = -10 / AXIS_MIN TRIGGER_MAX_NORM = 10 / TRIGGER_MAX TRIGGER_MIN_NORM = -10 / TRIGGER_MIN DEADZONE = 3 class Gamepad: def __init__(self): self.x_axis = 0 self.y_axisP = 0 self.y_axisN = 0 self.RB = 0 self.LB = 0 self.stop = False def open(self): self.stop = False threading.Thread(target=self.run).start() def run(self): while not self.stop: events = get_gamepad() for event in events: if event.code == "ABS_X": self.x_axis = event.state elif event.code == "ABS_RZ": self.y_axisP = event.state elif event.code == "ABS_Z": self.y_axisN = -event.state elif event.code == "BTN_TR": self.RB = event.state elif event.code == "BTN_TL": self.LB = event.state else: pass # we're not interested in the remain signals def get_state(self): xAxis = self.x_axis yAxis = self.y_axisP if self.y_axisP > 60 else self.y_axisN # normalize x axis if xAxis > 0: xAxis = int(round(xAxis * AXIS_MAX_NORM)) else: xAxis = int(round(xAxis * AXIS_MIN_NORM)) if -DEADZONE < xAxis < DEADZONE: xAxis = 0 # normalize y axis if yAxis > 0: yAxis = int(round(yAxis * TRIGGER_MAX_NORM)) else: yAxis = int(round(yAxis * TRIGGER_MIN_NORM)) if -DEADZONE < yAxis < DEADZONE: yAxis = 0 # return throttle and then steering return yAxis, xAxis def get_RB(self): return self.RB def get_LB(self): return self.LB def close(self): self.stop = True ================================================ FILE: data_collection/histogram.py ================================================ """ Histogram of turns (for future balancing of data) """ import h5py import matplotlib.pyplot as plt import numpy as np from data_collection.data_collect import path n_bins = [x - 0.5 for x in range(-10, 12)] data = h5py.File(path, 'r') fig, axs = plt.subplots() axs.hist([d[1] for d in data['controls'][:]], bins=n_bins) data.close() plt.xticks(np.arange(-10, 11, step=1)) plt.show() ================================================ FILE: data_collection/img_process.py ================================================ """ Module for preprocessing screen captures """ import win32gui import win32ui import cv2 import numpy as np import win32con def initKNN(data, labels, shape): knn = cv2.ml.KNearest_create() train = np.load(data).reshape(-1, shape).astype(np.float32) train_labels = np.load(labels) knn.train(train, cv2.ml.ROW_SAMPLE, train_labels) return knn knnDigits = initKNN('..\data_collection\\resources\digits.npy', '..\data_collection\\resources\digits_labels.npy', 40) knnArrows = initKNN('..\data_collection\\resources\\arrows.npy', '..\data_collection\\resources\\arrows_labels.npy', 90) # Done by Frannecklp def grab_screen(winName: str = "Grand Theft Auto V"): desktop = win32gui.GetDesktopWindow() # get area by a window name gtawin = win32gui.FindWindow(None, winName) # get the bounding box of the window left, top, x2, y2 = win32gui.GetWindowRect(gtawin) # cut window boarders top += 32 left += 3 y2 -= 4 x2 -= 4 width = x2 - left + 1 height = y2 - top + 1 # the device context(DC) for the entire window (title bar, menus, scroll bars, etc.) hwindc = win32gui.GetWindowDC(desktop) # Create a DC object from an integer handle srcdc = win32ui.CreateDCFromHandle(hwindc) # Create a memory device context that is compatible with the source DC memdc = srcdc.CreateCompatibleDC() # Create a bitmap object bmp = win32ui.CreateBitmap() # Create a bitmap compatible with the specified device context bmp.CreateCompatibleBitmap(srcdc, width, height) # Select an object into the device context. memdc.SelectObject(bmp) # Copy a bitmap from the source device context to this device context # parameters: destPos, size, dc, srcPos, rop(the raster operation)) memdc.BitBlt((0, 0), (width, height), srcdc, (left, top), win32con.SRCCOPY) # the bitmap bits signedIntsArray = bmp.GetBitmapBits(True) # form a 1-D array initialized from text data in a string. img = np.fromstring(signedIntsArray, dtype='uint8') img.shape = (height, width, 4) # Delete all resources associated with the device context srcdc.DeleteDC() memdc.DeleteDC() # Releases the device context win32gui.ReleaseDC(desktop, hwindc) # Delete the bitmap and freeing all system resources associated with the object. # After the object is deleted, the specified handle is no longer valid. win32gui.DeleteObject(bmp.GetHandle()) return cv2.cvtColor(img, cv2.COLOR_RGBA2RGB) def predict(img, knn): ret, result, neighbours, dist = knn.findNearest(img, k=1) return result def preprocess(img): gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) thr = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 7, -5) return thr def convert_speed(num1, num2, num3): hundreds = 1 tens = 1 speed = 0 if num3[0][0] != 10: hundreds = 10 tens = 10 speed += int(num3[0][0]) if num2[0][0] != 10: speed += tens * int(num2[0][0]) hundreds = tens * 10 if num1[0][0] != 10: speed += hundreds * int(num1[0][0]) return speed def img_process(winName: str = "Grand Theft Auto V"): screen = grab_screen(winName) # Ji Hyun's computer numbers = preprocess(screen[567:575, 683:702, :]) # Rustam's computer # numbers = preprocess(screen[573:581, 683:702, :]) # three fields for numbers num1 = predict(numbers[:, :5].reshape(-1, 40).astype(np.float32), knnDigits) num2 = predict(numbers[:, 7:12].reshape(-1, 40).astype(np.float32), knnDigits) num3 = predict(numbers[:, -5:].reshape(-1, 40).astype(np.float32), knnDigits) # one field for direction arrows # Ji Hyun's computer direct = preprocess(screen[561:570, 18:28, :]).reshape(-1, 90).astype(np.float32) # Rustam's computer # direct = preprocess(screen[567:576, 18:28, :]).reshape(-1, 90).astype(np.float32) direct = int(predict(direct, knnArrows)[0][0]) speed = convert_speed(num1, num2, num3) resized = cv2.resize(screen, (320, 240)) return screen, resized, speed, direct ================================================ FILE: data_collection/key_cap.py ================================================ # Citation: Box Of Hats (https://github.com/Box-Of-Hats) """ Module for reading keys from a keyboard """ import win32api as wapi keyList = ["\b"] for char in "ABCDEFGHIJKLMNOPQRSTUVWXYZ 0123456789,.'£$/\\": keyList.append(char) def key_check(): keys = [] for key in keyList: if wapi.GetAsyncKeyState(ord(key)): keys.append(key) return keys ================================================ FILE: drivers.txt ================================================ # For testing AI an XBox controller emulator is needed # https://github.com/shauleiz/ScpVBus/releases ScpVBus # Installation: # In CMD (administrator): devcon.exe install ScpVBus.inf Root\ScpVBus # Removal: # In CMD (administrator): devcon.exe remove Root\ScpVBus ================================================ FILE: driving/drive.py ================================================ """ Car driving module. """ # reading and writing files import os import time import cv2 import numpy as np # load our saved model from keras.models import load_model # helper classes from data_collection.img_process import img_process from data_collection.key_cap import key_check # gamepad axes limits and gamepad module from driving.gamepad import AXIS_MIN, AXIS_MAX, TRIGGER_MAX, XInputDevice from object_detection.direction import Direct # YOLO algorithm from object_detection.object_detect import yolo_detection # lane detection algorithm from object_detection.lane_detect import detect_lane, draw_lane from training.utils import preprocess model_path = "..\\training" gamepad = None def set_gamepad(controls): # trigger value trigger = int(round(controls[0][1] * TRIGGER_MAX)) if trigger >= 0: # set left trigger to zero gamepad.SetTrigger('L', 0) gamepad.SetTrigger('R', trigger) else: # inverse value trigger = -trigger # set right trigger to zero gamepad.SetTrigger('L', trigger) gamepad.SetTrigger('R', 0) # axis value axis = 0 if controls[0][0] >= 0: axis = int(round(controls[0][0] * AXIS_MAX)) else: axis = int(round(controls[0][0] * (-AXIS_MIN))) gamepad.SetAxis('X', axis) def drive(model): global gamepad gamepad = XInputDevice(1) gamepad.PlugIn() # last_time = time.time() # to measure the number of frames close = False # to exit execution pause = True # to pause execution stop = False # to stop the car throttle = 0 left_line_max = 75 right_line_max = 670 print("Press T to start driving") while not close: yolo_screen, resized, speed, direct = img_process("Grand Theft Auto V") cv2.imshow("Driving-mode", yolo_screen) cv2.waitKey(1) while not pause: # apply the preprocessing screen, resized, speed, direct = img_process("Grand Theft Auto V") radar = cv2.cvtColor(resized[206:226, 25:45, :], cv2.COLOR_RGB2BGR)[:, :, 2:3] resized = preprocess(resized) left_line_color = [0, 255, 0] right_line_color = [0, 255, 0] # predict steering angle for the image # original + radar (small) + speed controls = model.predict([np.array([resized]), np.array([radar]), np.array([speed])], batch_size=1) # check that the car is following lane lane, stop_line = detect_lane(screen) # detect objects yolo_screen, color_detected, obj_distance = yolo_detection(screen, direct) if not stop: # adjusting speed if speed < 45: throttle = 0.4 elif speed > 50: throttle = 0.0 if 0 <= obj_distance <= 0.6: if speed < 5: throttle = 0 else: throttle = -0.7 if obj_distance <= 0.4 else -0.3 elif color_detected == "Red": if stop_line: if speed < 5: throttle = 0 elif 0 <= stop_line[0][1] <= 50: throttle = -0.5 elif 50 < stop_line[0][1] <= 120: throttle = -1 # else: # throttle = -0.5 elif speed > 5: throttle = -1 else: throttle = 0 cv2.destroyAllWindows() pause = True # adjusting steering angle if lane[0] and lane[0][0] > left_line_max: if abs(controls[0][0]) < 0.27: controls[0][0] = 0.27 left_line_color = [0, 0, 255] elif lane[1] and lane[1][0] < right_line_max: if abs(controls[0][0]) < 0.27: controls[0][0] = -0.27 right_line_color = [0, 0, 255] # set the gamepad values set_gamepad([[controls[0][0], throttle]]) # print('Main loop took {} seconds'.format(time.time() - last_time)) # last_time = time.time() screen[280:-130, :, :] = draw_lane(screen[280:-130, :, :], lane, stop_line, left_line_color, right_line_color) cv2.imshow("Driving-mode", yolo_screen) cv2.waitKey(1) if direct == 6: print("Arrived at destination.") stop = True # print('Main loop took {} seconds'.format(time.time() - last_time)) # last_time = time.time() keys = key_check() if 'T' in keys: cv2.destroyAllWindows() pause = True # release gamepad keys set_gamepad([[0, 0]]) print('Paused. To exit the program press Z.') time.sleep(0.5) keys = key_check() if 'T' in keys: pause = False stop = False print('Unpaused') time.sleep(1) elif 'Z' in keys: cv2.destroyAllWindows() close = True print('Closing the program.') gamepad.UnPlug() def main(): # load model location = os.path.join(model_path, 'base_model.h5') model = load_model(location) # control a car drive(model) if __name__ == '__main__': main() ================================================ FILE: driving/gamepad.py ================================================ # This code based on Musi13's code (https://github.com/Musi13/pyvxbox) """ Gamepad emulating module. """ import sys from ctypes import * dll_path = "vXboxInterface.dll" try: _vx = cdll.LoadLibrary(dll_path) except OSError as e: print(e) sys.exit("Unable to load vXbox SDK DLL. Ensure that %s is present" % dll_path) if not _vx.isVBusExists(): raise Exception('Xbox VBus does not exist') AXIS_MAX = 32767 AXIS_MIN = -32768 TRIGGER_MAX = 255 BTN_ON = True BTN_OFF = False class XInputDevice: def __init__(self, port): if _vx.isControllerExists(port): raise Exception('Port %d is already used' % port) self.UserIndex = port def PlugIn(self): _vx.PlugIn(self.UserIndex) def UnPlug(self, force=False): if not force: _vx.UnPlug(self.UserIndex) else: _vx.UnPlugForce(self.UserIndex) def SetBtn(self, button, value): function = { 'A': _vx.SetBtnA, 'B': _vx.SetBtnB, 'X': _vx.SetBtnX, 'Y': _vx.SetBtnY, 'Start': _vx.SetBtnStart, 'Back': _vx.SetBtnBack, 'LT': _vx.SetBtnLT, 'RT': _vx.SetBtnRT, 'LB': _vx.SetBtnLB, 'RB': _vx.SetBtnRB, 'GD': _vx.SetBtnGD }.get(button, None) if function is None: raise Exception('Unknown button %s' % str(button)) function(self.UserIndex, value) def SetTrigger(self, trigger, value): function = { 'L': _vx.SetTriggerL, 'R': _vx.SetTriggerR }.get(trigger, None) if function is None: raise Exception('Unknown trigger %s' % str(trigger)) function(self.UserIndex, value) def SetAxis(self, axis, value): function = { 'X': _vx.SetAxisX, 'Y': _vx.SetAxisY, 'Rx': _vx.SetAxisRx, 'Ry': _vx.SetAxisRy }.get(axis, None) if function is None: raise Exception('Unknown axis %s' % str(axis)) function(self.UserIndex, value) def SetDpad(self, direction, value=0): function = { 'Up': _vx.SetDpadUp, 'Right': _vx.SetDpadRight, 'Down': _vx.SetDpadDown, 'Left': _vx.SetDpadLeft, '': _vx.SetDpad }.get(direction, None) if function is None: raise Exception('Unknown direction %s' % str(direction)) if direction == '': function(self.UserIndex, value) else: function(self.UserIndex) def GetLedNumber(self, pLed): _vx.GetLedNumber(self.UserIndex, pLed) def GetVibration(self, pVib): _vx.GetVibration(self.UserIndex, pVib) ================================================ FILE: game_plugins.txt ================================================ ### List of plugins used in GTA V ### for generating better conditions for AI # allows installation of plugins Script Hook V # for adjusting weather conditions, time, amount of car, pedestrians, etc. Simple Trainer for GTA V ================================================ FILE: object_detection/direction.py ================================================ from enum import Enum class Direct(Enum): STRAIGHT = 0 LEFT = 1 RIGHT = 2 SLIGHTLY_LEFT = 3 SLIGHTLY_RIGHT = 4 U_TURN = 5 ARRIVED = 6 ================================================ FILE: object_detection/lane_detect.py ================================================ import math import cv2 import numpy as np from data_collection.img_process import grab_screen prev_lines = [[], [], []] def crop(image): """ Crop the image (removing the sky at the top and the car front at the bottom) """ return image[280:-130, :, :] def grayscale(img): """ Applies the Grayscale transform This will return an image with only one color channel """ return cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) def canny(img, low_threshold=100, high_threshold=300): """ Applies the Canny transform """ return cv2.Canny(img, low_threshold, high_threshold) def gaussian_blur(img, kernel_size): """ Applies a Gaussian Noise kernel """ return cv2.GaussianBlur(img, (kernel_size, kernel_size), sigmaX=30, sigmaY=30) def region_of_interest(img, vertices): """ Applies an image mask. Only keeps the region of the image defined by the polygon formed from `vertices`. The rest of the image is set to black. `vertices` should be a numpy array of integer points. """ # defining a blank mask to start with mask = np.zeros_like(img) # defining a 3 channel or 1 channel color to fill the mask with depending on the input image if len(img.shape) > 2: channel_count = img.shape[2] # i.e. 3 or 4 depending on your image ignore_mask_color = (255,) * channel_count else: ignore_mask_color = 255 # filling pixels inside the polygon defined by "vertices" with the fill color cv2.fillPoly(mask, vertices, ignore_mask_color) # returning the image only where mask pixels are nonzero masked_image = cv2.bitwise_and(img, mask) return masked_image def construct_lane(lines): """ NOTE: this is the function you might want to use as a starting point once you want to average/extrapolate the line segments you detect to map out the full extent of the lane (going from the result shown in raw-lines-example.mp4 to that shown in P1_example.mp4). Think about things like separating line segments by their slope ((y2-y1)/(x2-x1)) to decide which segments are part of the left line vs. the right line. Then, you can average the position of each of the lines and extrapolate to the top and bottom of the lane. This function draws `lines` with `color` and `thickness`. Lines are drawn on the image inplace (mutates the image). If you want to make the lines semi-transparent, think about combining this function with the add_images() function below """ left_line_x = [] left_line_y = [] right_line_x = [] right_line_y = [] stop_line_x_first = [] stop_line_y_first = [] stop_line_x_second = [] stop_line_y_second = [] lane = [[], []] stop_line = [] min_y = 0 max_y = 190 if lines is not None: for line in lines: for x1, y1, x2, y2 in line: slope = (y2 - y1) / (x2 - x1) if x1 != x2 else 0 # <-- Calculating the slope. if 0.05 < math.fabs(slope) < 0.3: # not interested continue if math.fabs(slope) <= 0.05: # stop line if (y1 > 20) and (y2 > 20): # we need to detect two stop lines (top and bottom) if not stop_line_x_first or abs(stop_line_y_first[0] - y1) < 15: stop_line_x_first.extend([x1, x2]) stop_line_y_first.extend([y1, y2]) else: stop_line_x_second.extend([x1, x2]) stop_line_y_second.extend([y1, y2]) elif slope <= 0: # <-- If the slope is negative, left group. left_line_x.extend([x1, x2]) left_line_y.extend([y1, y2]) else: # <-- Otherwise, right group. right_line_x.extend([x1, x2]) right_line_y.extend([y1, y2]) offset = 7 if left_line_x: poly_left = np.poly1d(np.polyfit( left_line_y, left_line_x, deg=1 )) x1 = int(poly_left(max_y)) x2 = int(poly_left(min_y)) if prev_lines[0]: # recalculate x1 if abs(x1 - prev_lines[0][0]) > offset: x1 = prev_lines[0][0] - offset if prev_lines[0][0] > x1 else prev_lines[0][0] + offset # recalculate x2 if abs(x2 - prev_lines[0][1]) > offset: x2 = prev_lines[0][1] - offset if prev_lines[0][1] > x2 else prev_lines[0][1] + offset prev_lines[0] = [x1, x2] lane[0] = [x1, max_y, x2, min_y] elif prev_lines[0]: lane[0] = [prev_lines[0][0], max_y, prev_lines[0][1], min_y] prev_lines[0] = [] if right_line_x: poly_right = np.poly1d(np.polyfit( right_line_y, right_line_x, deg=1 )) x1 = int(poly_right(max_y)) x2 = int(poly_right(min_y)) if prev_lines[1]: # recalculate x1 if abs(x1 - prev_lines[1][0]) > offset: x1 = prev_lines[1][0] - offset if prev_lines[1][0] > x1 else prev_lines[1][0] + offset # recalculate x2 if abs(x2 - prev_lines[1][1]) > offset: x2 = prev_lines[1][1] - offset if prev_lines[1][1] > x2 else prev_lines[1][1] + offset prev_lines[1] = [x1, x2] lane[1] = [x1, max_y, x2, min_y] elif prev_lines[1]: lane[1] = [prev_lines[1][0], max_y, prev_lines[1][1], min_y] prev_lines[1] = [] if stop_line_x_second: poly_stop = np.poly1d(np.polyfit( stop_line_x_first, stop_line_y_first, deg=1 )) y1 = int(poly_stop(50)) y2 = int(poly_stop(750)) if prev_lines[2]: # recalculate y1 if abs(y1 - prev_lines[2][0]) > offset: y1 = prev_lines[2][0] - offset if prev_lines[2][0] > y1 else prev_lines[2][0] + offset # recalculate y2 if abs(y2 - prev_lines[2][1]) > offset: y2 = prev_lines[2][1] - offset if prev_lines[2][1] > y2 else prev_lines[2][1] + offset prev_lines[2] = [y1, y2] stop_line.append([50, y1, 750, y2]) elif prev_lines[2]: stop_line.append([50, prev_lines[2][0], 750, prev_lines[2][1]]) prev_lines[2] = [] return lane, stop_line def hough_lines(img, rho=6, theta=np.pi / 120, threshold=160, min_line_len=60, max_line_gap=10): """ `img` should be the output of a Canny transform. Returns an image with hough lines drawn. """ lines = cv2.HoughLinesP(img, rho, theta, threshold, np.array([]), minLineLength=min_line_len, maxLineGap=max_line_gap) return lines # Python 3 has support for cool math symbols. def add_images(img, initial_img): """ `img` is the output of the hough_lines(), An image with lines drawn on it. Should be a blank image (all black) with lines drawn on it. `initial_img` should be the image before any processing. The result image is computed as follows: initial_img * α + img * β + γ NOTE: initial_img and img must be the same shape! """ return cv2.add(initial_img, img) def draw_lane(original_img, lane, stop_line, left_color, right_color, thickness=5): img = np.zeros((original_img.shape[0], original_img.shape[1], 3), dtype=np.uint8) polygon_points = None offset_from_lane_edge = 8 # draw lane lines if lane[0]: for x1, y1, x2, y2 in [lane[0]]: cv2.line(img, (int(x1), int(y1)), (int(x2), int(y2)), left_color, thickness) if lane[1]: for x1, y1, x2, y2 in [lane[1]]: cv2.line(img, (int(x1), int(y1)), (int(x2), int(y2)), right_color, thickness) # color the lane if lane[0] and lane[1]: lane_color = [40, 60, 0] for x1, y1, x2, y2 in [lane[0]]: p1 = (x1 + offset_from_lane_edge, y1) p2 = (x2 + offset_from_lane_edge, y2) for x1, y1, x2, y2 in [lane[1]]: p3 = (x2 - offset_from_lane_edge, y2) p4 = (x1 - offset_from_lane_edge, y1) polygon_points = np.array([[p1, p2, p3, p4]], np.int32) cv2.fillPoly(img, polygon_points, lane_color) # draw stop line if stop_line: for x1, y1, x2, y2 in stop_line: cv2.line(img, (int(x1), int(y1)), (int(x2), int(y2)), [0, 0, 255], thickness * 3) if polygon_points is not None: for px1, py1, px2, py2 in [lane[0]]: p1 = (px1 - offset_from_lane_edge, py1) p2 = (px2 - offset_from_lane_edge, py2) for px1, py1, px2, py2 in [lane[1]]: p3 = (px2 + offset_from_lane_edge, py2) p4 = (px1 + offset_from_lane_edge, py1) polygon_points = np.array([[p1, p2, p3, p4]], np.int32) img = region_of_interest(img, polygon_points) return add_images(img, original_img) def detect_lane(screen): # 0. Crop the image image = crop(screen) # 1. convert to gray image = grayscale(image) # 2. apply gaussian filter image = gaussian_blur(image, 7) # 3. canny image = canny(image, 50, 100) # 4. ROI image = region_of_interest(image, np.array([[(0, 190), (0, 70), (187, 0), (613, 0), (800, 70), (800, 190)]], np.int32)) # 5. Hough lines lines = hough_lines(image) # 6. construct lane return construct_lane(lines) def main(): while True: original_img = grab_screen() # 1. convert to gray image = grayscale(crop(original_img)) # 2. apply gaussian filter image = gaussian_blur(image, 7) # 3. canny image = canny(image, 50, 100) # 4. ROI image = region_of_interest(image, np.array([[(0, 190), (0, 70), (187, 0), (613, 0), (800, 70), (800, 190)]], np.int32)) # 5. Hough lines lines = hough_lines(image) # 6. construct lane lane, stop_line = construct_lane(lines) # 7. Place lane detection output on the original image original_img[280:-130, :, :] = draw_lane(original_img[280:-130, :, :], lane, stop_line, [0, 255, 0], [0, 255, 0]) cv2.imshow("Frame", original_img) key = cv2.waitKey(1) & 0xFF if key == ord("q"): cv2.destroyAllWindows() break if __name__ == '__main__': main() ================================================ FILE: object_detection/object_detect.py ================================================ import cv2 import numpy as np from darkflow.net.build import TFNet from shapely.geometry import box, Polygon from data_collection.img_process import grab_screen from object_detection.direction import Direct # set YOLO options options = { 'model': 'cfg/yolo.cfg', 'load': 'yolov2.weights', 'threshold': 0.3, 'gpu': 0.5 } tfnet = TFNet(options) # capture = cv2.VideoCapture('gta2.mp4') t = (0, 0, 0) colors = [tuple(255 * np.random.rand(3)) for i in range(5)] colors2 = [tuple(t) for j in range(15)] def light_recog(frame, direct, traffic_lights): traffic_light = traffic_lights[0] # find out which traffic light to follow, if there are several if len(traffic_lights) > 1: # if we need to go to the right if direct == Direct.RIGHT or direct == Direct.SLIGHTLY_RIGHT: for tl in traffic_lights: if tl['topleft']['x'] > traffic_light['topleft']['x']: traffic_light = tl # straight or left else: for tl in traffic_lights: if tl['topleft']['x'] < traffic_light['topleft']['x']: traffic_light = tl # coordinates of the traffic light top_left = (traffic_light['topleft']['x'], traffic_light['topleft']['y']) bottom_right = (traffic_light['bottomright']['x'], traffic_light['bottomright']['y']) # crop the frame to the traffic light roi = frame[top_left[1]:bottom_right[1], top_left[0]:bottom_right[0]] hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV) color_detected = '' # possible color ranges for traffic lights red_lower = np.array([136, 87, 111], dtype=np.uint8) red_upper = np.array([180, 255, 255], dtype=np.uint8) yellow_lower = np.array([22, 60, 200], dtype=np.uint8) yellow_upper = np.array([60, 255, 255], dtype=np.uint8) green_lower = np.array([50, 100, 100], dtype=np.uint8) green_upper = np.array([70, 255, 255], dtype=np.uint8) # find what color the traffic light is showing red = cv2.inRange(hsv, red_lower, red_upper) yellow = cv2.inRange(hsv, yellow_lower, yellow_upper) green = cv2.inRange(hsv, green_lower, green_upper) kernel = np.ones((5, 5), np.uint8) red = cv2.dilate(red, kernel) res = cv2.bitwise_and(roi, roi, mask=red) green = cv2.dilate(green, kernel) res2 = cv2.bitwise_and(roi, roi, mask=green) (_, contours, hierarchy) = cv2.findContours(red, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) for contour in enumerate(contours): color_detected = "Red" (_, contours, hierarchy) = cv2.findContours(yellow, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) for contour in enumerate(contours): color_detected = "Yellow" (_, contours, hierarchy) = cv2.findContours(green, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) for contour in enumerate(contours): color_detected = "Green" if (0 <= top_left[1] and bottom_right[1] <= 437) and (244 <= top_left[0] and bottom_right[0] <= 630): frame = cv2.putText(frame, color_detected, bottom_right, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2) frame = cv2.putText(frame, color_detected, bottom_right, cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2) return frame, color_detected def distance_to_car(frame, top_left, bottom_right): distance = None # myRoi_array= np.array([[(0, 490), (309, 269), (490, 270), (800,473)]]) # process_img = region_of_interest(frame, myRoi_array) # cv2.imshow("precess_img", process_img) # roi = Polygon([(15, 472), (330, 321), (470, 321), (796, 495)]) roi = Polygon([(100, 470), (350, 280), (450, 280), (700, 470)]) car = box(top_left[0], top_left[1], bottom_right[0], bottom_right[1]) if roi.intersects(car): mid_x = (bottom_right[0] + top_left[0]) / 2 mid_y = (top_left[1] + bottom_right[1]) / 2 distance = round((1 - ((bottom_right[0] / 800) - (top_left[0] / 800))) ** 4, 1) frame = cv2.putText(frame, '{}'.format(distance), (int(mid_x), int(mid_y)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2) cv2.putText(frame[top_left[1]:bottom_right[1], top_left[0]:bottom_right[0]], 'WARNING!', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 3) return frame, distance def distance_to_human(frame, top_left, bottom_right): distance = None roi = Polygon([(90, 470), (350, 280), (450, 280), (700, 470)]) person = box(top_left[0], top_left[1], bottom_right[0], bottom_right[1]) if roi.intersects(person): mid_x = (bottom_right[0] + top_left[0]) / 2 mid_y = (top_left[1] + bottom_right[1]) / 2 distance = round((1 - ((bottom_right[0] / 800) - (top_left[0] / 800))) ** 15, 1) frame = cv2.putText(frame, '{}'.format(distance), (int(mid_x), int(mid_y)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2) cv2.putText(frame[top_left[1]:bottom_right[1], top_left[0]:bottom_right[0]], 'WARNING!', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 3) return frame, distance def yolo_detection(screen, direct): # find objects on a frame by using YOLO results = tfnet.return_predict(screen[:-130, :, :]) # create a list of detected traffic lights (might be several on a frame) traffic_lights = [] color_detected = None distance = 1 for color, color2, result in zip(colors, colors2, results): top_left = (result['topleft']['x'], result['topleft']['y']) bottom_right = (result['bottomright']['x'], result['bottomright']['y']) label = result['label'] confidence = result['confidence'] text = '{}: {:.0f}%'.format(label, confidence * 100) if label == 'traffic light' and confidence > 0.3: if 220 <= result['topleft']['x'] <= 630: traffic_lights.append(result) color = color2 screen = cv2.rectangle(screen, top_left, bottom_right, color, 6) screen = cv2.putText(screen, text, top_left, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2) if label == 'car' or label == 'bus' or label == 'truck' or label == 'train': screen, car_distance = distance_to_car(screen, top_left, bottom_right) if car_distance and 0 <= car_distance < distance: distance = car_distance screen = cv2.rectangle(screen, top_left, bottom_right, color, 6) screen = cv2.putText(screen, text, top_left, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2) if label == 'person': screen, person_distance = distance_to_human(screen, top_left, bottom_right) if person_distance and 0 <= person_distance < distance: distance = person_distance screen = cv2.rectangle(screen, top_left, bottom_right, color, 6) screen = cv2.putText(screen, text, top_left, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2) if traffic_lights: screen, color_detected = light_recog(screen, direct, traffic_lights) return screen, color_detected, distance def main(): while True: screen = grab_screen() screen, color_detected, obj_distance = yolo_detection(screen, 0) if color_detected: print("Color detected: " + color_detected) if obj_distance != 1: print("Distance to obstacle: {}".format(obj_distance)) cv2.imshow("Frame", screen) key = cv2.waitKey(1) & 0xFF if key == ord("q"): cv2.destroyAllWindows() break if __name__ == '__main__': main() ================================================ FILE: requirements.txt ================================================ ### To install the packages type in the console: ### pip install -r requirements.txt numpy opencv-python # tensorflow tensorflow-gpu # Python for Window Extensions pywin32 # For data management h5py # A high-level neural networks API capable of running on top of TensorFlow Keras # Tools for data mining and data analysis scikit-learn # To read information from a gamepad inputs # for object detection module Shapely # for YOLO Cython ================================================ FILE: training/model.py ================================================ """ NN model """ from keras.layers import Lambda, Conv2D, Dropout, Dense, Flatten, Concatenate, Input, MaxPooling2D from keras.models import Model from training.utils import INPUT_SHAPE, RADAR_SHAPE # original Nvidia model # def build_model(args): # """ # NVIDIA model used # Image normalization to avoid saturation and make gradients work better. # Convolution: 5x5, filter: 24, strides: 2x2, activation: ELU # Convolution: 5x5, filter: 36, strides: 2x2, activation: ELU # Convolution: 5x5, filter: 48, strides: 2x2, activation: ELU # Convolution: 3x3, filter: 64, strides: 1x1, activation: ELU # Convolution: 3x3, filter: 64, strides: 1x1, activation: ELU # Drop out (0.5) # Fully connected: neurons: 100, activation: ELU # Fully connected: neurons: 50, activation: ELU # Fully connected: neurons: 10, activation: ELU # Fully connected: neurons: 1 (output) # # the convolution layers are meant to handle feature engineering # the fully connected layer for predicting the steering angle. # dropout avoids overfitting # ELU(Exponential linear unit) function takes care of the Vanishing gradient problem. # """ # model = Sequential() # model.add(Lambda(lambda x: x / 127.5 - 1.0, input_shape=INPUT_SHAPE)) # model.add(Conv2D(24, (5, 5), activation='elu', strides=(2, 2))) # model.add(Conv2D(36, (5, 5), activation='elu', strides=(2, 2))) # model.add(Conv2D(48, (5, 5), activation='elu', strides=(2, 2))) # model.add(Conv2D(64, (3, 3), activation='elu')) # model.add(Conv2D(64, (3, 3), activation='elu')) # model.add(Dropout(args.keep_prob)) # model.add(Flatten()) # model.add(Dense(100, activation='elu')) # model.add(Dense(50, activation='elu')) # model.add(Dense(10, activation='elu')) # model.add(Dense(1)) # model.summary() # # return model # original + radar added # def build_model(args): # # image model # img_input = Input(shape=INPUT_SHAPE) # img_model = (Lambda(lambda x: x / 127.5 - 1.0, input_shape=INPUT_SHAPE))(img_input) # img_model = (Conv2D(24, (5, 5), activation='elu', strides=(2, 2)))(img_model) # img_model = (Conv2D(36, (5, 5), activation='elu', strides=(2, 2)))(img_model) # img_model = (Conv2D(48, (5, 5), activation='elu', strides=(2, 2)))(img_model) # img_model = (Conv2D(64, (3, 3), activation='elu'))(img_model) # img_model = (Conv2D(64, (3, 3), activation='elu'))(img_model) # img_model = (Dropout(args.keep_prob))(img_model) # img_model = (Flatten())(img_model) # img_model = (Dense(100, activation='elu'))(img_model) # # # radar model # radar_input = Input(shape=RADAR_SHAPE) # radar_model = (Conv2D(10, (5, 5), activation='elu'))(radar_input) # radar_model = (MaxPooling2D((2, 2)))(radar_model) # radar_model = (Conv2D(20, (5, 5), activation='elu'))(radar_model) # radar_model = (MaxPooling2D((2, 2)))(radar_model) # radar_model = (Dropout(args.keep_prob / 2))(radar_model) # radar_model = (Flatten())(radar_model) # radar_model = (Dense(30, activation='elu'))(radar_model) # # # combined model # out = Concatenate()([img_model, radar_model]) # out = (Dense(50, activation='elu'))(out) # out = (Dense(10, activation='elu'))(out) # out = (Dense(1))(out) # # final_model = Model(inputs=[img_input, radar_input], outputs=out) # final_model.summary() # # return final_model # original + radar and speed info added def build_model(args): # image model img_input = Input(shape=INPUT_SHAPE) img_model = (Lambda(lambda x: x / 127.5 - 1.0, input_shape=INPUT_SHAPE))(img_input) img_model = (Conv2D(24, (5, 5), activation='elu', strides=(2, 2)))(img_model) img_model = (Conv2D(36, (5, 5), activation='elu', strides=(2, 2)))(img_model) img_model = (Conv2D(48, (5, 5), activation='elu', strides=(2, 2)))(img_model) img_model = (Conv2D(64, (3, 3), activation='elu'))(img_model) img_model = (Conv2D(64, (3, 3), activation='elu'))(img_model) img_model = (Dropout(args.keep_prob))(img_model) img_model = (Flatten())(img_model) img_model = (Dense(100, activation='elu'))(img_model) # radar model radar_input = Input(shape=RADAR_SHAPE) radar_model = (Conv2D(32, (5, 5), activation='elu'))(radar_input) radar_model = (MaxPooling2D((2, 2), strides=(2, 2)))(radar_model) radar_model = (Conv2D(64, (5, 5), activation='elu'))(radar_model) radar_model = (MaxPooling2D((2, 2), strides=(2, 2)))(radar_model) radar_model = (Dropout(args.keep_prob / 2))(radar_model) radar_model = (Flatten())(radar_model) radar_model = (Dense(10, activation='elu'))(radar_model) # speed speed_input = Input(shape=(1,)) # combined model out = Concatenate()([img_model, radar_model]) out = (Dense(50, activation='elu'))(out) out = Concatenate()([out, speed_input]) out = (Dense(10, activation='elu'))(out) out = (Dense(1))(out) final_model = Model(inputs=[img_input, radar_input, speed_input], outputs=out) final_model.summary() return final_model # original + throttle control # def build_model(args): # """ # NVIDIA model used # Image normalization to avoid saturation and make gradients work better. # Convolution: 5x5, filter: 24, strides: 2x2, activation: ELU # Convolution: 5x5, filter: 36, strides: 2x2, activation: ELU # Convolution: 5x5, filter: 48, strides: 2x2, activation: ELU # Convolution: 3x3, filter: 64, strides: 1x1, activation: ELU # Convolution: 3x3, filter: 64, strides: 1x1, activation: ELU # Drop out (0.5) # Fully connected: neurons: 100, activation: ELU # Fully connected: neurons: 50, activation: ELU # Fully connected: neurons: 10, activation: ELU # Fully connected: neurons: 1 (output) # # the convolution layers are meant to handle feature engineering # the fully connected layer for predicting the steering angle. # dropout avoids overfitting # ELU(Exponential linear unit) function takes care of the Vanishing gradient problem. # """ # # image model # img_input = Input(shape=INPUT_SHAPE) # img_model = (Lambda(lambda x: x / 127.5 - 1.0, input_shape=INPUT_SHAPE))(img_input) # img_model = (Conv2D(24, (5, 5), activation='elu', strides=(2, 2)))(img_model) # img_model = (Conv2D(36, (5, 5), activation='elu', strides=(2, 2)))(img_model) # img_model = (Conv2D(48, (5, 5), activation='elu', strides=(2, 2)))(img_model) # img_model = (Conv2D(64, (3, 3), activation='elu'))(img_model) # img_model = (Conv2D(64, (3, 3), activation='elu'))(img_model) # img_model = (Dropout(args.keep_prob))(img_model) # img_model = (Flatten())(img_model) # img_model = (Dense(100, activation='elu'))(img_model) # # # speed and direction model # metrics_input = Input(shape=(2,)) # metrics_model = Dense(2, activation='elu')(metrics_input) # # # combined model # out = Concatenate()([img_model, metrics_model]) # out = (Dense(50, activation='elu'))(out) # out = (Dense(10, activation='elu'))(out) # out = (Dense(2))(out) # # final_model = Model(inputs=[img_input, metrics_input], outputs=out) # final_model.summary() # # return final_model ================================================ FILE: training/train.py ================================================ # This code based on Siraj Raval's code (https://github.com/llSourcell/How_to_simulate_a_self_driving_car) """ Training module. Based on "End to End Learning for Self-Driving Cars" research paper by Nvidia. """ import argparse import h5py import numpy as np from keras.callbacks import ModelCheckpoint from keras.models import load_model from keras.optimizers import Adam from sklearn.model_selection import train_test_split # to split out training and testing data # path with training files from data_collection.data_collect import path from training.model import build_model # helper class from training.utils import batch_generator # for debugging, allows for reproducible (deterministic) results np.random.seed(0) def load_data(args): """ Load training data and split it into training and validation set """ data = h5py.File(path, 'r') # list of all possible indexes indexes = list(range(data['img'].shape[0])) # split the data into a training (80), testing(20), and validation set indexes_train, indexes_valid = train_test_split(indexes, test_size=args.test_size, random_state=0) return data, indexes_train, indexes_valid def load_weights(model): """ Load weights from previously trained model """ prev_model = load_model("..\\training\\base_model.h5") model.set_weights(prev_model.get_weights()) return model def train_model(model, args, data, indexes_train, indexes_valid): """ Train the model """ # Saves the model after every epoch. # quantity to monitor, verbosity i.e logging mode (0 or 1), # if save_best_only is true the latest best model according to the quantity monitored will not be overwritten. # mode: one of {auto, min, max}. If save_best_only=True, the decision to overwrite the current save file is # made based on either the maximization or the minimization of the monitored quantity. For val_acc, # this should be max, for val_loss this should be min, etc. In auto mode, the direction is automatically # inferred from the name of the monitored quantity. checkpoint = ModelCheckpoint('model-{epoch:03d}.h5', monitor='val_loss', verbose=0, save_best_only=args.save_best_only, mode='auto') # calculate the difference between expected steering angle and actual steering angle # square the difference # add up all those differences for as many data points as we have # divide by the number of them # that value is our mean squared error! this is what we want to minimize via # gradient descent model.compile(loss='mean_squared_error', optimizer=Adam(lr=args.learning_rate)) # Fits the model on data generated batch-by-batch by a Python generator. # The generator is run in parallel to the model, for efficiency. # For instance, this allows you to do real-time data augmentation on images on CPU in # parallel to training your model on GPU. # so we reshape our data into their appropriate batches and train our model simultaneously model.fit_generator(batch_generator(data, indexes_train, args.batch_size, True), steps_per_epoch=len(indexes_train) / args.batch_size, epochs=args.nb_epoch, max_queue_size=1, validation_data=batch_generator(data, indexes_valid, args.batch_size, False), validation_steps=len(indexes_valid) / args.batch_size, callbacks=[checkpoint], verbose=1) # for command line args def s2b(s): """ Converts a string to boolean value """ s = s.lower() return s == 'true' or s == 'yes' or s == 'y' or s == '1' def main(): """ Load train/validation data set and train the model """ # The argparse module makes it easy to write user-friendly command-line interfaces. parser = argparse.ArgumentParser(description='Behavioral Cloning Training Program') parser.add_argument('-d', help='data directory', dest='data_dir', type=str, default=path) parser.add_argument('-t', help='test size fraction', dest='test_size', type=float, default=0.2) parser.add_argument('-k', help='drop out probability', dest='keep_prob', type=float, default=0.5) parser.add_argument('-n', help='number of epochs', dest='nb_epoch', type=int, default=200) parser.add_argument('-b', help='batch size', dest='batch_size', type=int, default=500) parser.add_argument('-o', help='save best models only', dest='save_best_only', type=s2b, default='true') parser.add_argument('-l', help='learning rate', dest='learning_rate', type=float, default=1.0e-4) args = parser.parse_args() # print parameters print('-' * 30) print('Parameters') print('-' * 30) for key, value in vars(args).items(): print('{:<20} := {}'.format(key, value)) print('-' * 30) # load data data = load_data(args) # build model model = build_model(args) # load previous weights model = load_weights(model) # train model on data, it saves as model.h5 train_model(model, args, *data) if __name__ == '__main__': main() ================================================ FILE: training/utils.py ================================================ # This code based on Siraj Raval's code (https://github.com/llSourcell/How_to_simulate_a_self_driving_car) import math import cv2 import numpy as np import tensorflow as tf IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS = 66, 200, 3 INPUT_SHAPE = (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS) RADAR_HEIGHT, RADAR_WIDTH, RADAR_CHANNELS = 20, 20, 1 RADAR_SHAPE = (RADAR_HEIGHT, RADAR_WIDTH, RADAR_CHANNELS) def crop(image): """ Crop the image (removing the sky at the top and the car front at the bottom) """ return image[90:-50, :, :] def resize(image): """ Resize the image to the input shape used by the network model """ return cv2.resize(image, (IMAGE_WIDTH, IMAGE_HEIGHT), cv2.INTER_AREA) def rgb2yuv(image): """ Convert the image from RGB to YUV (This is what the NVIDIA model does) """ return cv2.cvtColor(image, cv2.COLOR_RGB2YUV) def preprocess(image): """ Combine all preprocess functions into one """ image = crop(image) image = resize(image) image = rgb2yuv(image) return image # def choose_image(data_dir, center, left, right, steering_angle): # """ # Randomly choose an image from the center, left or right, and adjust # the steering angle. # """ # choice = np.random.choice(3) # if choice == 0: # return load_image(data_dir, left), steering_angle + 0.2 # elif choice == 1: # return load_image(data_dir, right), steering_angle - 0.2 # return load_image(data_dir, center), steering_angle # flip image causes car riding on the opposite direction lane # def random_flip(image, steering_angle): # """ # Randomly flip the image left <-> right, and adjust the steering angle. # """ # if np.random.rand() < 0.5: # image = cv2.flip(image, 1) # steering_angle = -steering_angle # return image, steering_angle def random_translate(image, steering_angle, range_x, range_y): """ Randomly shift the image vertically and horizontally (translation). """ trans_x = range_x * (np.random.rand() - 0.5) trans_y = range_y * (np.random.rand() - 0.5) # adjusting steering angle t_x = trans_x / 25 if t_x > 0: t_x = math.ceil(t_x) if t_x > 2: steering_angle += (t_x - 2) if steering_angle > 10: steering_angle = 10 else: t_x = math.floor(t_x) if t_x < -2: steering_angle += (t_x + 2) if steering_angle < -10: steering_angle = -10 trans_m = np.float32([[1, 0, trans_x], [0, 1, trans_y]]) height, width = image.shape[:2] # apply an affine transformation to an image image = cv2.warpAffine(image, trans_m, (width, height)) return image, steering_angle def random_shadow(image): """ Generates and adds random shadow """ # (x1, y1) and (x2, y2) forms a line # xm, ym gives all the locations of the image x1, y1 = IMAGE_WIDTH * np.random.rand(), 0 x2, y2 = IMAGE_WIDTH * np.random.rand(), IMAGE_HEIGHT xm, ym = np.mgrid[0:IMAGE_HEIGHT, 0:IMAGE_WIDTH] # mathematically speaking, we want to set 1 below the line and zero otherwise # Our coordinate is up side down. So, the above the line: # (ym-y1)/(xm-x1) > (y2-y1)/(x2-x1) # as x2 == x1 causes zero-division problem, we'll write it in the below form: # (ym-y1)*(x2-x1) - (y2-y1)*(xm-x1) > 0 mask = np.zeros_like(image[:, :, 1]) mask[np.where((ym - y1) * (x2 - x1) - (y2 - y1) * (xm - x1) > 0)] = 1 # choose which side should have shadow and adjust saturation cond = mask == np.random.randint(2) s_ratio = np.random.uniform(low=0.2, high=0.5) # adjust Saturation in HLS(Hue, Light, Saturation) hls = cv2.cvtColor(image, cv2.COLOR_RGB2HLS) hls[:, :, 1][cond] = hls[:, :, 1][cond] * s_ratio return cv2.cvtColor(hls, cv2.COLOR_HLS2RGB) def random_brightness(image): """ Randomly adjust brightness of the image. """ # HSV (Hue, Saturation, Value) is also called HSB ('B' for Brightness). hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV) ratio = 1.0 + 0.4 * (np.random.rand() - 0.5) hsv[:, :, 2] = hsv[:, :, 2] * ratio return cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB) def augment(image, steering_angle, range_x=250, range_y=20): """ Generate an augmented image and adjust steering angle. (The steering angle is associated with the center image) """ # image, steering_angle = choose_image(data_dir, center, left, right, steering_angle) # image, steering_angle = random_flip(image, steering_angle) image, steering_angle = random_translate(image, steering_angle, range_x, range_y) image = random_shadow(image) image = random_brightness(image) return image, steering_angle def batch_generator(data, indexes, batch_size, is_training): """ Generate training image give image paths and associated steering angles """ # preprocessing on the CPU with tf.device('/cpu:0'): images = np.empty([batch_size, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS]) radars = np.empty([batch_size, RADAR_HEIGHT, RADAR_WIDTH, RADAR_CHANNELS]) # metrics = np.empty([batch_size, 2]) # controls = np.empty([batch_size, 2]) speeds = np.empty(batch_size) controls = np.empty(batch_size) while True: i = 0 for index in np.random.permutation(indexes): camera = data['img'][index] radar = cv2.cvtColor(camera[206:226, 25:45, :], cv2.COLOR_RGB2BGR) steer = data['controls'][index][1] # augmentation if is_training: prob = np.random.rand() if (abs(steer) < 0.4 and prob > 0.2) or (prob < 0.6): camera, steer = augment(camera, steer) # add the image and steering angle to the batch images[i] = preprocess(camera) radars[i] = radar[:, :, 2:3] # controls[i] = [data['controls'][index][0] / 10, steer / 10] # normalized throttle and steering controls[i] = steer / 10 speeds[i] = data['metrics'][index][0] # metrics[i] = data['metrics'][index] i += 1 if i == batch_size: break # yield [images, metrics], controls yield [images, radars, speeds], controls