Repository: hadipash/AI_GTA5
Branch: master
Commit: 424e615419d9
Files: 24
Total size: 59.8 KB
Directory structure:
gitextract_4lc496_7/
├── README.md
├── data_collection/
│ ├── data_balancing.py
│ ├── data_collect.py
│ ├── gamepad_cap.py
│ ├── histogram.py
│ ├── img_process.py
│ ├── key_cap.py
│ └── resources/
│ ├── arrows.npy
│ ├── arrows_labels.npy
│ ├── digits.npy
│ └── digits_labels.npy
├── drivers.txt
├── driving/
│ ├── drive.py
│ └── gamepad.py
├── game_plugins.txt
├── object_detection/
│ ├── direction.py
│ ├── lane_detect.py
│ └── object_detect.py
├── requirements.txt
└── training/
├── base_model.h5
├── model.py
├── models/
│ └── original + radar/
│ └── base_model.h5
├── train.py
└── utils.py
================================================
FILE CONTENTS
================================================
================================================
FILE: README.md
================================================
# Self-Driving Car for GTA V
### Overview
The aim of this project is to create a self-driving car using a virtual similator (particularly GTA V).
### [Youtube Video](https://www.youtube.com/watch?v=BRK0wm7rrfQ)
================================================
FILE: data_collection/data_balancing.py
================================================
import h5py
from data_collection.data_collect import path as source_path
dest_path = "F:\Graduation_Project\\training_data_balanced.h5"
destination = h5py.File(dest_path, 'w')
destination.create_dataset('img', (0, 240, 320, 3), dtype='u1', maxshape=(None, 240, 320, 3), chunks=(30, 240, 320, 3))
destination.create_dataset('controls', (0, 2), dtype='i1', maxshape=(None, 2), chunks=(30, 2))
destination.create_dataset('metrics', (0, 2), dtype='u1', maxshape=(None, 2), chunks=(30, 2))
def save(data_img, controls, metrics):
if data_img: # if the list is not empty
destination["img"].resize((destination["img"].shape[0] + len(data_img)), axis=0)
destination["img"][-len(data_img):] = data_img
destination["controls"].resize((destination["controls"].shape[0] + len(controls)), axis=0)
destination["controls"][-len(controls):] = controls
destination["metrics"].resize((destination["metrics"].shape[0] + len(metrics)), axis=0)
destination["metrics"][-len(metrics):] = metrics
def main():
source = h5py.File(source_path, 'r')
images = []
controls = []
metrics = []
tuples = 0
straights = 0
for i in range(source['img'].shape[0]):
# if speed is not 0 and not arrived at the destination
if source['metrics'][i][0] != 0 and source['metrics'][i][1] != 6:
# save only each 5th straight drive frame
if source['controls'][i][1] == 0:
add = (straights % 5 == 0)
straights += 1
# save all turns
else:
add = True
if add:
images.append(source['img'][i])
controls.append(source['controls'][i])
metrics.append(source['metrics'][i])
tuples += 1
if tuples % 10000 == 0: # every 2.5 GB
print(tuples)
save(images, controls, metrics)
images = []
controls = []
metrics = []
save(images, controls, metrics)
print("Copied: {:d} tuples from the source file".format(tuples))
source.close()
destination.close()
if __name__ == '__main__':
main()
================================================
FILE: data_collection/data_collect.py
================================================
"""
Data collection module (saves data in H5 format).
Saves screen captures and pressed keys into a file
for further trainings of NN.
"""
import os
import threading
import time
import winsound
import h5py
from data_collection.gamepad_cap import Gamepad
from data_collection.img_process import img_process
from data_collection.key_cap import key_check
lock = threading.Lock()
# open the data file
path = "F:\Graduation_Project\\training_data.h5"
data_file = None
if os.path.isfile(path):
data_file = h5py.File(path, 'a')
else:
data_file = h5py.File(path, 'w')
# Write data in chunks for faster writing and reading by NN
data_file.create_dataset('img', (0, 240, 320, 3), dtype='u1',
maxshape=(None, 240, 320, 3), chunks=(30, 240, 320, 3))
data_file.create_dataset('controls', (0, 2), dtype='i1', maxshape=(None, 2), chunks=(30, 2))
data_file.create_dataset('metrics', (0, 2), dtype='u1', maxshape=(None, 2), chunks=(30, 2))
def save(data_img, controls, metrics):
with lock: # make sure that data is consistent
if data_img: # if the list is not empty
# last_time = time.time()
data_file["img"].resize((data_file["img"].shape[0] + len(data_img)), axis=0)
data_file["img"][-len(data_img):] = data_img
data_file["controls"].resize((data_file["controls"].shape[0] + len(controls)), axis=0)
data_file["controls"][-len(controls):] = controls
data_file["metrics"].resize((data_file["metrics"].shape[0] + len(metrics)), axis=0)
data_file["metrics"][-len(metrics):] = metrics
# print('Saving took {} seconds'.format(time.time() - last_time))
def delete(session):
frames = session if session < 500 else 500
data_file["img"].resize((data_file["img"].shape[0] - frames), axis=0)
data_file["controls"].resize((data_file["controls"].shape[0] - frames), axis=0)
data_file["metrics"].resize((data_file["metrics"].shape[0] - frames), axis=0)
def main():
# initialize gamepad
gamepad = Gamepad()
gamepad.open()
# last_time = time.time() # to measure the number of frames
alert_time = time.time() # to signal about exceeding speed limit
close = False # to exit execution
pause = True # to pause execution
session = 0 # number of frames recorded in one session
training_img = [] # lists for storing training data
controls = []
metrics = []
print("Press RB on your gamepad to start recording")
while not close:
while not pause:
# read throttle and steering values from the gamepad
throttle, steering = gamepad.get_state()
# get screen, speed and direction
ignore, screen, speed, direction = img_process("Grand Theft Auto V")
training_img.append(screen)
controls.append([throttle, steering])
metrics.append([speed, direction])
session += 1
if speed > 60 and time.time() - alert_time > 1:
winsound.PlaySound('.\\resources\\alert.wav', winsound.SND_ASYNC)
alert_time = time.time()
# save the data every 30 iterations
if len(training_img) % 30 == 0:
# print("-" * 30 + "Saving" + "-" * 30)
threading.Thread(target=save, args=(training_img, controls, metrics)).start()
training_img = []
controls = []
metrics = []
time.sleep(0.015) # in order to slow down fps
# print('Main loop took {} seconds'.format(time.time() - last_time))
# last_time = time.time()
if gamepad.get_RB():
pause = True
print('Paused. Save the last 15 seconds?')
keys = key_check()
while ('Y' not in keys) and ('N' not in keys):
keys = key_check()
if 'N' in keys:
delete(session)
training_img = []
controls = []
metrics = []
print('Deleted.')
else:
print('Saved.')
print('To exit the program press LB.')
session = 0
time.sleep(0.5)
if gamepad.get_RB():
pause = False
print('Unpaused')
time.sleep(1)
elif gamepad.get_LB():
gamepad.close()
close = True
print('Saving data and closing the program.')
save(training_img, controls, metrics)
data_file.close()
if __name__ == '__main__':
main()
================================================
FILE: data_collection/gamepad_cap.py
================================================
"""
Module for reading information from an Xbox gamepad
"""
import threading
from inputs import get_gamepad
# Gamepad part
AXIS_MAX = 32767
AXIS_MIN = -32768
TRIGGER_MAX = 255
TRIGGER_MIN = -255
AXIS_MAX_NORM = 10 / AXIS_MAX
AXIS_MIN_NORM = -10 / AXIS_MIN
TRIGGER_MAX_NORM = 10 / TRIGGER_MAX
TRIGGER_MIN_NORM = -10 / TRIGGER_MIN
DEADZONE = 3
class Gamepad:
def __init__(self):
self.x_axis = 0
self.y_axisP = 0
self.y_axisN = 0
self.RB = 0
self.LB = 0
self.stop = False
def open(self):
self.stop = False
threading.Thread(target=self.run).start()
def run(self):
while not self.stop:
events = get_gamepad()
for event in events:
if event.code == "ABS_X":
self.x_axis = event.state
elif event.code == "ABS_RZ":
self.y_axisP = event.state
elif event.code == "ABS_Z":
self.y_axisN = -event.state
elif event.code == "BTN_TR":
self.RB = event.state
elif event.code == "BTN_TL":
self.LB = event.state
else:
pass # we're not interested in the remain signals
def get_state(self):
xAxis = self.x_axis
yAxis = self.y_axisP if self.y_axisP > 60 else self.y_axisN
# normalize x axis
if xAxis > 0:
xAxis = int(round(xAxis * AXIS_MAX_NORM))
else:
xAxis = int(round(xAxis * AXIS_MIN_NORM))
if -DEADZONE < xAxis < DEADZONE:
xAxis = 0
# normalize y axis
if yAxis > 0:
yAxis = int(round(yAxis * TRIGGER_MAX_NORM))
else:
yAxis = int(round(yAxis * TRIGGER_MIN_NORM))
if -DEADZONE < yAxis < DEADZONE:
yAxis = 0
# return throttle and then steering
return yAxis, xAxis
def get_RB(self):
return self.RB
def get_LB(self):
return self.LB
def close(self):
self.stop = True
================================================
FILE: data_collection/histogram.py
================================================
"""
Histogram of turns (for future balancing of data)
"""
import h5py
import matplotlib.pyplot as plt
import numpy as np
from data_collection.data_collect import path
n_bins = [x - 0.5 for x in range(-10, 12)]
data = h5py.File(path, 'r')
fig, axs = plt.subplots()
axs.hist([d[1] for d in data['controls'][:]], bins=n_bins)
data.close()
plt.xticks(np.arange(-10, 11, step=1))
plt.show()
================================================
FILE: data_collection/img_process.py
================================================
"""
Module for preprocessing screen captures
"""
import win32gui
import win32ui
import cv2
import numpy as np
import win32con
def initKNN(data, labels, shape):
knn = cv2.ml.KNearest_create()
train = np.load(data).reshape(-1, shape).astype(np.float32)
train_labels = np.load(labels)
knn.train(train, cv2.ml.ROW_SAMPLE, train_labels)
return knn
knnDigits = initKNN('..\data_collection\\resources\digits.npy',
'..\data_collection\\resources\digits_labels.npy', 40)
knnArrows = initKNN('..\data_collection\\resources\\arrows.npy',
'..\data_collection\\resources\\arrows_labels.npy', 90)
# Done by Frannecklp
def grab_screen(winName: str = "Grand Theft Auto V"):
desktop = win32gui.GetDesktopWindow()
# get area by a window name
gtawin = win32gui.FindWindow(None, winName)
# get the bounding box of the window
left, top, x2, y2 = win32gui.GetWindowRect(gtawin)
# cut window boarders
top += 32
left += 3
y2 -= 4
x2 -= 4
width = x2 - left + 1
height = y2 - top + 1
# the device context(DC) for the entire window (title bar, menus, scroll bars, etc.)
hwindc = win32gui.GetWindowDC(desktop)
# Create a DC object from an integer handle
srcdc = win32ui.CreateDCFromHandle(hwindc)
# Create a memory device context that is compatible with the source DC
memdc = srcdc.CreateCompatibleDC()
# Create a bitmap object
bmp = win32ui.CreateBitmap()
# Create a bitmap compatible with the specified device context
bmp.CreateCompatibleBitmap(srcdc, width, height)
# Select an object into the device context.
memdc.SelectObject(bmp)
# Copy a bitmap from the source device context to this device context
# parameters: destPos, size, dc, srcPos, rop(the raster operation))
memdc.BitBlt((0, 0), (width, height), srcdc, (left, top), win32con.SRCCOPY)
# the bitmap bits
signedIntsArray = bmp.GetBitmapBits(True)
# form a 1-D array initialized from text data in a string.
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (height, width, 4)
# Delete all resources associated with the device context
srcdc.DeleteDC()
memdc.DeleteDC()
# Releases the device context
win32gui.ReleaseDC(desktop, hwindc)
# Delete the bitmap and freeing all system resources associated with the object.
# After the object is deleted, the specified handle is no longer valid.
win32gui.DeleteObject(bmp.GetHandle())
return cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
def predict(img, knn):
ret, result, neighbours, dist = knn.findNearest(img, k=1)
return result
def preprocess(img):
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
thr = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 7, -5)
return thr
def convert_speed(num1, num2, num3):
hundreds = 1
tens = 1
speed = 0
if num3[0][0] != 10:
hundreds = 10
tens = 10
speed += int(num3[0][0])
if num2[0][0] != 10:
speed += tens * int(num2[0][0])
hundreds = tens * 10
if num1[0][0] != 10:
speed += hundreds * int(num1[0][0])
return speed
def img_process(winName: str = "Grand Theft Auto V"):
screen = grab_screen(winName)
# Ji Hyun's computer
numbers = preprocess(screen[567:575, 683:702, :])
# Rustam's computer
# numbers = preprocess(screen[573:581, 683:702, :])
# three fields for numbers
num1 = predict(numbers[:, :5].reshape(-1, 40).astype(np.float32), knnDigits)
num2 = predict(numbers[:, 7:12].reshape(-1, 40).astype(np.float32), knnDigits)
num3 = predict(numbers[:, -5:].reshape(-1, 40).astype(np.float32), knnDigits)
# one field for direction arrows
# Ji Hyun's computer
direct = preprocess(screen[561:570, 18:28, :]).reshape(-1, 90).astype(np.float32)
# Rustam's computer
# direct = preprocess(screen[567:576, 18:28, :]).reshape(-1, 90).astype(np.float32)
direct = int(predict(direct, knnArrows)[0][0])
speed = convert_speed(num1, num2, num3)
resized = cv2.resize(screen, (320, 240))
return screen, resized, speed, direct
================================================
FILE: data_collection/key_cap.py
================================================
# Citation: Box Of Hats (https://github.com/Box-Of-Hats)
"""
Module for reading keys from a keyboard
"""
import win32api as wapi
keyList = ["\b"]
for char in "ABCDEFGHIJKLMNOPQRSTUVWXYZ 0123456789,.'£$/\\":
keyList.append(char)
def key_check():
keys = []
for key in keyList:
if wapi.GetAsyncKeyState(ord(key)):
keys.append(key)
return keys
================================================
FILE: drivers.txt
================================================
# For testing AI an XBox controller emulator is needed
# https://github.com/shauleiz/ScpVBus/releases
ScpVBus
# Installation:
# In CMD (administrator): devcon.exe install ScpVBus.inf Root\ScpVBus
# Removal:
# In CMD (administrator): devcon.exe remove Root\ScpVBus
================================================
FILE: driving/drive.py
================================================
"""
Car driving module.
"""
# reading and writing files
import os
import time
import cv2
import numpy as np
# load our saved model
from keras.models import load_model
# helper classes
from data_collection.img_process import img_process
from data_collection.key_cap import key_check
# gamepad axes limits and gamepad module
from driving.gamepad import AXIS_MIN, AXIS_MAX, TRIGGER_MAX, XInputDevice
from object_detection.direction import Direct
# YOLO algorithm
from object_detection.object_detect import yolo_detection
# lane detection algorithm
from object_detection.lane_detect import detect_lane, draw_lane
from training.utils import preprocess
model_path = "..\\training"
gamepad = None
def set_gamepad(controls):
# trigger value
trigger = int(round(controls[0][1] * TRIGGER_MAX))
if trigger >= 0:
# set left trigger to zero
gamepad.SetTrigger('L', 0)
gamepad.SetTrigger('R', trigger)
else:
# inverse value
trigger = -trigger
# set right trigger to zero
gamepad.SetTrigger('L', trigger)
gamepad.SetTrigger('R', 0)
# axis value
axis = 0
if controls[0][0] >= 0:
axis = int(round(controls[0][0] * AXIS_MAX))
else:
axis = int(round(controls[0][0] * (-AXIS_MIN)))
gamepad.SetAxis('X', axis)
def drive(model):
global gamepad
gamepad = XInputDevice(1)
gamepad.PlugIn()
# last_time = time.time() # to measure the number of frames
close = False # to exit execution
pause = True # to pause execution
stop = False # to stop the car
throttle = 0
left_line_max = 75
right_line_max = 670
print("Press T to start driving")
while not close:
yolo_screen, resized, speed, direct = img_process("Grand Theft Auto V")
cv2.imshow("Driving-mode", yolo_screen)
cv2.waitKey(1)
while not pause:
# apply the preprocessing
screen, resized, speed, direct = img_process("Grand Theft Auto V")
radar = cv2.cvtColor(resized[206:226, 25:45, :], cv2.COLOR_RGB2BGR)[:, :, 2:3]
resized = preprocess(resized)
left_line_color = [0, 255, 0]
right_line_color = [0, 255, 0]
# predict steering angle for the image
# original + radar (small) + speed
controls = model.predict([np.array([resized]), np.array([radar]), np.array([speed])], batch_size=1)
# check that the car is following lane
lane, stop_line = detect_lane(screen)
# detect objects
yolo_screen, color_detected, obj_distance = yolo_detection(screen, direct)
if not stop:
# adjusting speed
if speed < 45:
throttle = 0.4
elif speed > 50:
throttle = 0.0
if 0 <= obj_distance <= 0.6:
if speed < 5:
throttle = 0
else:
throttle = -0.7 if obj_distance <= 0.4 else -0.3
elif color_detected == "Red":
if stop_line:
if speed < 5:
throttle = 0
elif 0 <= stop_line[0][1] <= 50:
throttle = -0.5
elif 50 < stop_line[0][1] <= 120:
throttle = -1
# else:
# throttle = -0.5
elif speed > 5:
throttle = -1
else:
throttle = 0
cv2.destroyAllWindows()
pause = True
# adjusting steering angle
if lane[0] and lane[0][0] > left_line_max:
if abs(controls[0][0]) < 0.27:
controls[0][0] = 0.27
left_line_color = [0, 0, 255]
elif lane[1] and lane[1][0] < right_line_max:
if abs(controls[0][0]) < 0.27:
controls[0][0] = -0.27
right_line_color = [0, 0, 255]
# set the gamepad values
set_gamepad([[controls[0][0], throttle]])
# print('Main loop took {} seconds'.format(time.time() - last_time))
# last_time = time.time()
screen[280:-130, :, :] = draw_lane(screen[280:-130, :, :], lane, stop_line,
left_line_color, right_line_color)
cv2.imshow("Driving-mode", yolo_screen)
cv2.waitKey(1)
if direct == 6:
print("Arrived at destination.")
stop = True
# print('Main loop took {} seconds'.format(time.time() - last_time))
# last_time = time.time()
keys = key_check()
if 'T' in keys:
cv2.destroyAllWindows()
pause = True
# release gamepad keys
set_gamepad([[0, 0]])
print('Paused. To exit the program press Z.')
time.sleep(0.5)
keys = key_check()
if 'T' in keys:
pause = False
stop = False
print('Unpaused')
time.sleep(1)
elif 'Z' in keys:
cv2.destroyAllWindows()
close = True
print('Closing the program.')
gamepad.UnPlug()
def main():
# load model
location = os.path.join(model_path, 'base_model.h5')
model = load_model(location)
# control a car
drive(model)
if __name__ == '__main__':
main()
================================================
FILE: driving/gamepad.py
================================================
# This code based on Musi13's code (https://github.com/Musi13/pyvxbox)
"""
Gamepad emulating module.
"""
import sys
from ctypes import *
dll_path = "vXboxInterface.dll"
try:
_vx = cdll.LoadLibrary(dll_path)
except OSError as e:
print(e)
sys.exit("Unable to load vXbox SDK DLL. Ensure that %s is present" % dll_path)
if not _vx.isVBusExists():
raise Exception('Xbox VBus does not exist')
AXIS_MAX = 32767
AXIS_MIN = -32768
TRIGGER_MAX = 255
BTN_ON = True
BTN_OFF = False
class XInputDevice:
def __init__(self, port):
if _vx.isControllerExists(port):
raise Exception('Port %d is already used' % port)
self.UserIndex = port
def PlugIn(self):
_vx.PlugIn(self.UserIndex)
def UnPlug(self, force=False):
if not force:
_vx.UnPlug(self.UserIndex)
else:
_vx.UnPlugForce(self.UserIndex)
def SetBtn(self, button, value):
function = {
'A': _vx.SetBtnA,
'B': _vx.SetBtnB,
'X': _vx.SetBtnX,
'Y': _vx.SetBtnY,
'Start': _vx.SetBtnStart,
'Back': _vx.SetBtnBack,
'LT': _vx.SetBtnLT,
'RT': _vx.SetBtnRT,
'LB': _vx.SetBtnLB,
'RB': _vx.SetBtnRB,
'GD': _vx.SetBtnGD
}.get(button, None)
if function is None:
raise Exception('Unknown button %s' % str(button))
function(self.UserIndex, value)
def SetTrigger(self, trigger, value):
function = {
'L': _vx.SetTriggerL,
'R': _vx.SetTriggerR
}.get(trigger, None)
if function is None:
raise Exception('Unknown trigger %s' % str(trigger))
function(self.UserIndex, value)
def SetAxis(self, axis, value):
function = {
'X': _vx.SetAxisX,
'Y': _vx.SetAxisY,
'Rx': _vx.SetAxisRx,
'Ry': _vx.SetAxisRy
}.get(axis, None)
if function is None:
raise Exception('Unknown axis %s' % str(axis))
function(self.UserIndex, value)
def SetDpad(self, direction, value=0):
function = {
'Up': _vx.SetDpadUp,
'Right': _vx.SetDpadRight,
'Down': _vx.SetDpadDown,
'Left': _vx.SetDpadLeft,
'': _vx.SetDpad
}.get(direction, None)
if function is None:
raise Exception('Unknown direction %s' % str(direction))
if direction == '':
function(self.UserIndex, value)
else:
function(self.UserIndex)
def GetLedNumber(self, pLed):
_vx.GetLedNumber(self.UserIndex, pLed)
def GetVibration(self, pVib):
_vx.GetVibration(self.UserIndex, pVib)
================================================
FILE: game_plugins.txt
================================================
### List of plugins used in GTA V
### for generating better conditions for AI
# allows installation of plugins
Script Hook V
# for adjusting weather conditions, time, amount of car, pedestrians, etc.
Simple Trainer for GTA V
================================================
FILE: object_detection/direction.py
================================================
from enum import Enum
class Direct(Enum):
STRAIGHT = 0
LEFT = 1
RIGHT = 2
SLIGHTLY_LEFT = 3
SLIGHTLY_RIGHT = 4
U_TURN = 5
ARRIVED = 6
================================================
FILE: object_detection/lane_detect.py
================================================
import math
import cv2
import numpy as np
from data_collection.img_process import grab_screen
prev_lines = [[], [], []]
def crop(image):
"""
Crop the image (removing the sky at the top and the car front at the bottom)
"""
return image[280:-130, :, :]
def grayscale(img):
"""
Applies the Grayscale transform
This will return an image with only one color channel
"""
return cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
def canny(img, low_threshold=100, high_threshold=300):
"""
Applies the Canny transform
"""
return cv2.Canny(img, low_threshold, high_threshold)
def gaussian_blur(img, kernel_size):
"""
Applies a Gaussian Noise kernel
"""
return cv2.GaussianBlur(img, (kernel_size, kernel_size), sigmaX=30, sigmaY=30)
def region_of_interest(img, vertices):
"""
Applies an image mask.
Only keeps the region of the image defined by the polygon
formed from `vertices`. The rest of the image is set to black.
`vertices` should be a numpy array of integer points.
"""
# defining a blank mask to start with
mask = np.zeros_like(img)
# defining a 3 channel or 1 channel color to fill the mask with depending on the input image
if len(img.shape) > 2:
channel_count = img.shape[2] # i.e. 3 or 4 depending on your image
ignore_mask_color = (255,) * channel_count
else:
ignore_mask_color = 255
# filling pixels inside the polygon defined by "vertices" with the fill color
cv2.fillPoly(mask, vertices, ignore_mask_color)
# returning the image only where mask pixels are nonzero
masked_image = cv2.bitwise_and(img, mask)
return masked_image
def construct_lane(lines):
"""
NOTE: this is the function you might want to use as a starting point once you want to
average/extrapolate the line segments you detect to map out the full
extent of the lane (going from the result shown in raw-lines-example.mp4
to that shown in P1_example.mp4).
Think about things like separating line segments by their
slope ((y2-y1)/(x2-x1)) to decide which segments are part of the left
line vs. the right line. Then, you can average the position of each of
the lines and extrapolate to the top and bottom of the lane.
This function draws `lines` with `color` and `thickness`.
Lines are drawn on the image inplace (mutates the image).
If you want to make the lines semi-transparent, think about combining
this function with the add_images() function below
"""
left_line_x = []
left_line_y = []
right_line_x = []
right_line_y = []
stop_line_x_first = []
stop_line_y_first = []
stop_line_x_second = []
stop_line_y_second = []
lane = [[], []]
stop_line = []
min_y = 0
max_y = 190
if lines is not None:
for line in lines:
for x1, y1, x2, y2 in line:
slope = (y2 - y1) / (x2 - x1) if x1 != x2 else 0 # <-- Calculating the slope.
if 0.05 < math.fabs(slope) < 0.3: # not interested
continue
if math.fabs(slope) <= 0.05: # stop line
if (y1 > 20) and (y2 > 20):
# we need to detect two stop lines (top and bottom)
if not stop_line_x_first or abs(stop_line_y_first[0] - y1) < 15:
stop_line_x_first.extend([x1, x2])
stop_line_y_first.extend([y1, y2])
else:
stop_line_x_second.extend([x1, x2])
stop_line_y_second.extend([y1, y2])
elif slope <= 0: # <-- If the slope is negative, left group.
left_line_x.extend([x1, x2])
left_line_y.extend([y1, y2])
else: # <-- Otherwise, right group.
right_line_x.extend([x1, x2])
right_line_y.extend([y1, y2])
offset = 7
if left_line_x:
poly_left = np.poly1d(np.polyfit(
left_line_y,
left_line_x,
deg=1
))
x1 = int(poly_left(max_y))
x2 = int(poly_left(min_y))
if prev_lines[0]:
# recalculate x1
if abs(x1 - prev_lines[0][0]) > offset:
x1 = prev_lines[0][0] - offset if prev_lines[0][0] > x1 else prev_lines[0][0] + offset
# recalculate x2
if abs(x2 - prev_lines[0][1]) > offset:
x2 = prev_lines[0][1] - offset if prev_lines[0][1] > x2 else prev_lines[0][1] + offset
prev_lines[0] = [x1, x2]
lane[0] = [x1, max_y, x2, min_y]
elif prev_lines[0]:
lane[0] = [prev_lines[0][0], max_y, prev_lines[0][1], min_y]
prev_lines[0] = []
if right_line_x:
poly_right = np.poly1d(np.polyfit(
right_line_y,
right_line_x,
deg=1
))
x1 = int(poly_right(max_y))
x2 = int(poly_right(min_y))
if prev_lines[1]:
# recalculate x1
if abs(x1 - prev_lines[1][0]) > offset:
x1 = prev_lines[1][0] - offset if prev_lines[1][0] > x1 else prev_lines[1][0] + offset
# recalculate x2
if abs(x2 - prev_lines[1][1]) > offset:
x2 = prev_lines[1][1] - offset if prev_lines[1][1] > x2 else prev_lines[1][1] + offset
prev_lines[1] = [x1, x2]
lane[1] = [x1, max_y, x2, min_y]
elif prev_lines[1]:
lane[1] = [prev_lines[1][0], max_y, prev_lines[1][1], min_y]
prev_lines[1] = []
if stop_line_x_second:
poly_stop = np.poly1d(np.polyfit(
stop_line_x_first,
stop_line_y_first,
deg=1
))
y1 = int(poly_stop(50))
y2 = int(poly_stop(750))
if prev_lines[2]:
# recalculate y1
if abs(y1 - prev_lines[2][0]) > offset:
y1 = prev_lines[2][0] - offset if prev_lines[2][0] > y1 else prev_lines[2][0] + offset
# recalculate y2
if abs(y2 - prev_lines[2][1]) > offset:
y2 = prev_lines[2][1] - offset if prev_lines[2][1] > y2 else prev_lines[2][1] + offset
prev_lines[2] = [y1, y2]
stop_line.append([50, y1, 750, y2])
elif prev_lines[2]:
stop_line.append([50, prev_lines[2][0], 750, prev_lines[2][1]])
prev_lines[2] = []
return lane, stop_line
def hough_lines(img, rho=6, theta=np.pi / 120, threshold=160, min_line_len=60, max_line_gap=10):
"""
`img` should be the output of a Canny transform.
Returns an image with hough lines drawn.
"""
lines = cv2.HoughLinesP(img, rho, theta, threshold, np.array([]), minLineLength=min_line_len,
maxLineGap=max_line_gap)
return lines
# Python 3 has support for cool math symbols.
def add_images(img, initial_img):
"""
`img` is the output of the hough_lines(), An image with lines drawn on it.
Should be a blank image (all black) with lines drawn on it.
`initial_img` should be the image before any processing.
The result image is computed as follows:
initial_img * α + img * β + γ
NOTE: initial_img and img must be the same shape!
"""
return cv2.add(initial_img, img)
def draw_lane(original_img, lane, stop_line, left_color, right_color, thickness=5):
img = np.zeros((original_img.shape[0], original_img.shape[1], 3), dtype=np.uint8)
polygon_points = None
offset_from_lane_edge = 8
# draw lane lines
if lane[0]:
for x1, y1, x2, y2 in [lane[0]]:
cv2.line(img, (int(x1), int(y1)), (int(x2), int(y2)), left_color, thickness)
if lane[1]:
for x1, y1, x2, y2 in [lane[1]]:
cv2.line(img, (int(x1), int(y1)), (int(x2), int(y2)), right_color, thickness)
# color the lane
if lane[0] and lane[1]:
lane_color = [40, 60, 0]
for x1, y1, x2, y2 in [lane[0]]:
p1 = (x1 + offset_from_lane_edge, y1)
p2 = (x2 + offset_from_lane_edge, y2)
for x1, y1, x2, y2 in [lane[1]]:
p3 = (x2 - offset_from_lane_edge, y2)
p4 = (x1 - offset_from_lane_edge, y1)
polygon_points = np.array([[p1, p2, p3, p4]], np.int32)
cv2.fillPoly(img, polygon_points, lane_color)
# draw stop line
if stop_line:
for x1, y1, x2, y2 in stop_line:
cv2.line(img, (int(x1), int(y1)), (int(x2), int(y2)), [0, 0, 255], thickness * 3)
if polygon_points is not None:
for px1, py1, px2, py2 in [lane[0]]:
p1 = (px1 - offset_from_lane_edge, py1)
p2 = (px2 - offset_from_lane_edge, py2)
for px1, py1, px2, py2 in [lane[1]]:
p3 = (px2 + offset_from_lane_edge, py2)
p4 = (px1 + offset_from_lane_edge, py1)
polygon_points = np.array([[p1, p2, p3, p4]], np.int32)
img = region_of_interest(img, polygon_points)
return add_images(img, original_img)
def detect_lane(screen):
# 0. Crop the image
image = crop(screen)
# 1. convert to gray
image = grayscale(image)
# 2. apply gaussian filter
image = gaussian_blur(image, 7)
# 3. canny
image = canny(image, 50, 100)
# 4. ROI
image = region_of_interest(image, np.array([[(0, 190), (0, 70), (187, 0),
(613, 0), (800, 70), (800, 190)]], np.int32))
# 5. Hough lines
lines = hough_lines(image)
# 6. construct lane
return construct_lane(lines)
def main():
while True:
original_img = grab_screen()
# 1. convert to gray
image = grayscale(crop(original_img))
# 2. apply gaussian filter
image = gaussian_blur(image, 7)
# 3. canny
image = canny(image, 50, 100)
# 4. ROI
image = region_of_interest(image, np.array([[(0, 190), (0, 70), (187, 0),
(613, 0), (800, 70), (800, 190)]], np.int32))
# 5. Hough lines
lines = hough_lines(image)
# 6. construct lane
lane, stop_line = construct_lane(lines)
# 7. Place lane detection output on the original image
original_img[280:-130, :, :] = draw_lane(original_img[280:-130, :, :], lane, stop_line, [0, 255, 0],
[0, 255, 0])
cv2.imshow("Frame", original_img)
key = cv2.waitKey(1) & 0xFF
if key == ord("q"):
cv2.destroyAllWindows()
break
if __name__ == '__main__':
main()
================================================
FILE: object_detection/object_detect.py
================================================
import cv2
import numpy as np
from darkflow.net.build import TFNet
from shapely.geometry import box, Polygon
from data_collection.img_process import grab_screen
from object_detection.direction import Direct
# set YOLO options
options = {
'model': 'cfg/yolo.cfg',
'load': 'yolov2.weights',
'threshold': 0.3,
'gpu': 0.5
}
tfnet = TFNet(options)
# capture = cv2.VideoCapture('gta2.mp4')
t = (0, 0, 0)
colors = [tuple(255 * np.random.rand(3)) for i in range(5)]
colors2 = [tuple(t) for j in range(15)]
def light_recog(frame, direct, traffic_lights):
traffic_light = traffic_lights[0]
# find out which traffic light to follow, if there are several
if len(traffic_lights) > 1:
# if we need to go to the right
if direct == Direct.RIGHT or direct == Direct.SLIGHTLY_RIGHT:
for tl in traffic_lights:
if tl['topleft']['x'] > traffic_light['topleft']['x']:
traffic_light = tl
# straight or left
else:
for tl in traffic_lights:
if tl['topleft']['x'] < traffic_light['topleft']['x']:
traffic_light = tl
# coordinates of the traffic light
top_left = (traffic_light['topleft']['x'], traffic_light['topleft']['y'])
bottom_right = (traffic_light['bottomright']['x'], traffic_light['bottomright']['y'])
# crop the frame to the traffic light
roi = frame[top_left[1]:bottom_right[1], top_left[0]:bottom_right[0]]
hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
color_detected = ''
# possible color ranges for traffic lights
red_lower = np.array([136, 87, 111], dtype=np.uint8)
red_upper = np.array([180, 255, 255], dtype=np.uint8)
yellow_lower = np.array([22, 60, 200], dtype=np.uint8)
yellow_upper = np.array([60, 255, 255], dtype=np.uint8)
green_lower = np.array([50, 100, 100], dtype=np.uint8)
green_upper = np.array([70, 255, 255], dtype=np.uint8)
# find what color the traffic light is showing
red = cv2.inRange(hsv, red_lower, red_upper)
yellow = cv2.inRange(hsv, yellow_lower, yellow_upper)
green = cv2.inRange(hsv, green_lower, green_upper)
kernel = np.ones((5, 5), np.uint8)
red = cv2.dilate(red, kernel)
res = cv2.bitwise_and(roi, roi, mask=red)
green = cv2.dilate(green, kernel)
res2 = cv2.bitwise_and(roi, roi, mask=green)
(_, contours, hierarchy) = cv2.findContours(red, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
for contour in enumerate(contours):
color_detected = "Red"
(_, contours, hierarchy) = cv2.findContours(yellow, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
for contour in enumerate(contours):
color_detected = "Yellow"
(_, contours, hierarchy) = cv2.findContours(green, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
for contour in enumerate(contours):
color_detected = "Green"
if (0 <= top_left[1] and bottom_right[1] <= 437) and (244 <= top_left[0] and bottom_right[0] <= 630):
frame = cv2.putText(frame, color_detected, bottom_right, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)
frame = cv2.putText(frame, color_detected, bottom_right, cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)
return frame, color_detected
def distance_to_car(frame, top_left, bottom_right):
distance = None
# myRoi_array= np.array([[(0, 490), (309, 269), (490, 270), (800,473)]])
# process_img = region_of_interest(frame, myRoi_array)
# cv2.imshow("precess_img", process_img)
# roi = Polygon([(15, 472), (330, 321), (470, 321), (796, 495)])
roi = Polygon([(100, 470), (350, 280), (450, 280), (700, 470)])
car = box(top_left[0], top_left[1], bottom_right[0], bottom_right[1])
if roi.intersects(car):
mid_x = (bottom_right[0] + top_left[0]) / 2
mid_y = (top_left[1] + bottom_right[1]) / 2
distance = round((1 - ((bottom_right[0] / 800) - (top_left[0] / 800))) ** 4, 1)
frame = cv2.putText(frame, '{}'.format(distance), (int(mid_x), int(mid_y)), cv2.FONT_HERSHEY_SIMPLEX, 0.7,
(255, 255, 255), 2)
cv2.putText(frame[top_left[1]:bottom_right[1], top_left[0]:bottom_right[0]],
'WARNING!', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 3)
return frame, distance
def distance_to_human(frame, top_left, bottom_right):
distance = None
roi = Polygon([(90, 470), (350, 280), (450, 280), (700, 470)])
person = box(top_left[0], top_left[1], bottom_right[0], bottom_right[1])
if roi.intersects(person):
mid_x = (bottom_right[0] + top_left[0]) / 2
mid_y = (top_left[1] + bottom_right[1]) / 2
distance = round((1 - ((bottom_right[0] / 800) - (top_left[0] / 800))) ** 15, 1)
frame = cv2.putText(frame, '{}'.format(distance), (int(mid_x), int(mid_y)), cv2.FONT_HERSHEY_SIMPLEX, 0.7,
(255, 255, 255), 2)
cv2.putText(frame[top_left[1]:bottom_right[1], top_left[0]:bottom_right[0]],
'WARNING!', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 3)
return frame, distance
def yolo_detection(screen, direct):
# find objects on a frame by using YOLO
results = tfnet.return_predict(screen[:-130, :, :])
# create a list of detected traffic lights (might be several on a frame)
traffic_lights = []
color_detected = None
distance = 1
for color, color2, result in zip(colors, colors2, results):
top_left = (result['topleft']['x'], result['topleft']['y'])
bottom_right = (result['bottomright']['x'], result['bottomright']['y'])
label = result['label']
confidence = result['confidence']
text = '{}: {:.0f}%'.format(label, confidence * 100)
if label == 'traffic light' and confidence > 0.3:
if 220 <= result['topleft']['x'] <= 630:
traffic_lights.append(result)
color = color2
screen = cv2.rectangle(screen, top_left, bottom_right, color, 6)
screen = cv2.putText(screen, text, top_left, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)
if label == 'car' or label == 'bus' or label == 'truck' or label == 'train':
screen, car_distance = distance_to_car(screen, top_left, bottom_right)
if car_distance and 0 <= car_distance < distance:
distance = car_distance
screen = cv2.rectangle(screen, top_left, bottom_right, color, 6)
screen = cv2.putText(screen, text, top_left, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)
if label == 'person':
screen, person_distance = distance_to_human(screen, top_left, bottom_right)
if person_distance and 0 <= person_distance < distance:
distance = person_distance
screen = cv2.rectangle(screen, top_left, bottom_right, color, 6)
screen = cv2.putText(screen, text, top_left, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)
if traffic_lights:
screen, color_detected = light_recog(screen, direct, traffic_lights)
return screen, color_detected, distance
def main():
while True:
screen = grab_screen()
screen, color_detected, obj_distance = yolo_detection(screen, 0)
if color_detected:
print("Color detected: " + color_detected)
if obj_distance != 1:
print("Distance to obstacle: {}".format(obj_distance))
cv2.imshow("Frame", screen)
key = cv2.waitKey(1) & 0xFF
if key == ord("q"):
cv2.destroyAllWindows()
break
if __name__ == '__main__':
main()
================================================
FILE: requirements.txt
================================================
### To install the packages type in the console:
### pip install -r requirements.txt
numpy
opencv-python
# tensorflow
tensorflow-gpu
# Python for Window Extensions
pywin32
# For data management
h5py
# A high-level neural networks API capable of running on top of TensorFlow
Keras
# Tools for data mining and data analysis
scikit-learn
# To read information from a gamepad
inputs
# for object detection module
Shapely
# for YOLO
Cython
================================================
FILE: training/model.py
================================================
"""
NN model
"""
from keras.layers import Lambda, Conv2D, Dropout, Dense, Flatten, Concatenate, Input, MaxPooling2D
from keras.models import Model
from training.utils import INPUT_SHAPE, RADAR_SHAPE
# original Nvidia model
# def build_model(args):
# """
# NVIDIA model used
# Image normalization to avoid saturation and make gradients work better.
# Convolution: 5x5, filter: 24, strides: 2x2, activation: ELU
# Convolution: 5x5, filter: 36, strides: 2x2, activation: ELU
# Convolution: 5x5, filter: 48, strides: 2x2, activation: ELU
# Convolution: 3x3, filter: 64, strides: 1x1, activation: ELU
# Convolution: 3x3, filter: 64, strides: 1x1, activation: ELU
# Drop out (0.5)
# Fully connected: neurons: 100, activation: ELU
# Fully connected: neurons: 50, activation: ELU
# Fully connected: neurons: 10, activation: ELU
# Fully connected: neurons: 1 (output)
# # the convolution layers are meant to handle feature engineering
# the fully connected layer for predicting the steering angle.
# dropout avoids overfitting
# ELU(Exponential linear unit) function takes care of the Vanishing gradient problem.
# """
# model = Sequential()
# model.add(Lambda(lambda x: x / 127.5 - 1.0, input_shape=INPUT_SHAPE))
# model.add(Conv2D(24, (5, 5), activation='elu', strides=(2, 2)))
# model.add(Conv2D(36, (5, 5), activation='elu', strides=(2, 2)))
# model.add(Conv2D(48, (5, 5), activation='elu', strides=(2, 2)))
# model.add(Conv2D(64, (3, 3), activation='elu'))
# model.add(Conv2D(64, (3, 3), activation='elu'))
# model.add(Dropout(args.keep_prob))
# model.add(Flatten())
# model.add(Dense(100, activation='elu'))
# model.add(Dense(50, activation='elu'))
# model.add(Dense(10, activation='elu'))
# model.add(Dense(1))
# model.summary()
#
# return model
# original + radar added
# def build_model(args):
# # image model
# img_input = Input(shape=INPUT_SHAPE)
# img_model = (Lambda(lambda x: x / 127.5 - 1.0, input_shape=INPUT_SHAPE))(img_input)
# img_model = (Conv2D(24, (5, 5), activation='elu', strides=(2, 2)))(img_model)
# img_model = (Conv2D(36, (5, 5), activation='elu', strides=(2, 2)))(img_model)
# img_model = (Conv2D(48, (5, 5), activation='elu', strides=(2, 2)))(img_model)
# img_model = (Conv2D(64, (3, 3), activation='elu'))(img_model)
# img_model = (Conv2D(64, (3, 3), activation='elu'))(img_model)
# img_model = (Dropout(args.keep_prob))(img_model)
# img_model = (Flatten())(img_model)
# img_model = (Dense(100, activation='elu'))(img_model)
#
# # radar model
# radar_input = Input(shape=RADAR_SHAPE)
# radar_model = (Conv2D(10, (5, 5), activation='elu'))(radar_input)
# radar_model = (MaxPooling2D((2, 2)))(radar_model)
# radar_model = (Conv2D(20, (5, 5), activation='elu'))(radar_model)
# radar_model = (MaxPooling2D((2, 2)))(radar_model)
# radar_model = (Dropout(args.keep_prob / 2))(radar_model)
# radar_model = (Flatten())(radar_model)
# radar_model = (Dense(30, activation='elu'))(radar_model)
#
# # combined model
# out = Concatenate()([img_model, radar_model])
# out = (Dense(50, activation='elu'))(out)
# out = (Dense(10, activation='elu'))(out)
# out = (Dense(1))(out)
#
# final_model = Model(inputs=[img_input, radar_input], outputs=out)
# final_model.summary()
#
# return final_model
# original + radar and speed info added
def build_model(args):
# image model
img_input = Input(shape=INPUT_SHAPE)
img_model = (Lambda(lambda x: x / 127.5 - 1.0, input_shape=INPUT_SHAPE))(img_input)
img_model = (Conv2D(24, (5, 5), activation='elu', strides=(2, 2)))(img_model)
img_model = (Conv2D(36, (5, 5), activation='elu', strides=(2, 2)))(img_model)
img_model = (Conv2D(48, (5, 5), activation='elu', strides=(2, 2)))(img_model)
img_model = (Conv2D(64, (3, 3), activation='elu'))(img_model)
img_model = (Conv2D(64, (3, 3), activation='elu'))(img_model)
img_model = (Dropout(args.keep_prob))(img_model)
img_model = (Flatten())(img_model)
img_model = (Dense(100, activation='elu'))(img_model)
# radar model
radar_input = Input(shape=RADAR_SHAPE)
radar_model = (Conv2D(32, (5, 5), activation='elu'))(radar_input)
radar_model = (MaxPooling2D((2, 2), strides=(2, 2)))(radar_model)
radar_model = (Conv2D(64, (5, 5), activation='elu'))(radar_model)
radar_model = (MaxPooling2D((2, 2), strides=(2, 2)))(radar_model)
radar_model = (Dropout(args.keep_prob / 2))(radar_model)
radar_model = (Flatten())(radar_model)
radar_model = (Dense(10, activation='elu'))(radar_model)
# speed
speed_input = Input(shape=(1,))
# combined model
out = Concatenate()([img_model, radar_model])
out = (Dense(50, activation='elu'))(out)
out = Concatenate()([out, speed_input])
out = (Dense(10, activation='elu'))(out)
out = (Dense(1))(out)
final_model = Model(inputs=[img_input, radar_input, speed_input], outputs=out)
final_model.summary()
return final_model
# original + throttle control
# def build_model(args):
# """
# NVIDIA model used
# Image normalization to avoid saturation and make gradients work better.
# Convolution: 5x5, filter: 24, strides: 2x2, activation: ELU
# Convolution: 5x5, filter: 36, strides: 2x2, activation: ELU
# Convolution: 5x5, filter: 48, strides: 2x2, activation: ELU
# Convolution: 3x3, filter: 64, strides: 1x1, activation: ELU
# Convolution: 3x3, filter: 64, strides: 1x1, activation: ELU
# Drop out (0.5)
# Fully connected: neurons: 100, activation: ELU
# Fully connected: neurons: 50, activation: ELU
# Fully connected: neurons: 10, activation: ELU
# Fully connected: neurons: 1 (output)
# # the convolution layers are meant to handle feature engineering
# the fully connected layer for predicting the steering angle.
# dropout avoids overfitting
# ELU(Exponential linear unit) function takes care of the Vanishing gradient problem.
# """
# # image model
# img_input = Input(shape=INPUT_SHAPE)
# img_model = (Lambda(lambda x: x / 127.5 - 1.0, input_shape=INPUT_SHAPE))(img_input)
# img_model = (Conv2D(24, (5, 5), activation='elu', strides=(2, 2)))(img_model)
# img_model = (Conv2D(36, (5, 5), activation='elu', strides=(2, 2)))(img_model)
# img_model = (Conv2D(48, (5, 5), activation='elu', strides=(2, 2)))(img_model)
# img_model = (Conv2D(64, (3, 3), activation='elu'))(img_model)
# img_model = (Conv2D(64, (3, 3), activation='elu'))(img_model)
# img_model = (Dropout(args.keep_prob))(img_model)
# img_model = (Flatten())(img_model)
# img_model = (Dense(100, activation='elu'))(img_model)
#
# # speed and direction model
# metrics_input = Input(shape=(2,))
# metrics_model = Dense(2, activation='elu')(metrics_input)
#
# # combined model
# out = Concatenate()([img_model, metrics_model])
# out = (Dense(50, activation='elu'))(out)
# out = (Dense(10, activation='elu'))(out)
# out = (Dense(2))(out)
#
# final_model = Model(inputs=[img_input, metrics_input], outputs=out)
# final_model.summary()
#
# return final_model
================================================
FILE: training/train.py
================================================
# This code based on Siraj Raval's code (https://github.com/llSourcell/How_to_simulate_a_self_driving_car)
"""
Training module. Based on "End to End Learning for Self-Driving Cars" research paper by Nvidia.
"""
import argparse
import h5py
import numpy as np
from keras.callbacks import ModelCheckpoint
from keras.models import load_model
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split # to split out training and testing data
# path with training files
from data_collection.data_collect import path
from training.model import build_model
# helper class
from training.utils import batch_generator
# for debugging, allows for reproducible (deterministic) results
np.random.seed(0)
def load_data(args):
"""
Load training data and split it into training and validation set
"""
data = h5py.File(path, 'r')
# list of all possible indexes
indexes = list(range(data['img'].shape[0]))
# split the data into a training (80), testing(20), and validation set
indexes_train, indexes_valid = train_test_split(indexes, test_size=args.test_size, random_state=0)
return data, indexes_train, indexes_valid
def load_weights(model):
"""
Load weights from previously trained model
"""
prev_model = load_model("..\\training\\base_model.h5")
model.set_weights(prev_model.get_weights())
return model
def train_model(model, args, data, indexes_train, indexes_valid):
"""
Train the model
"""
# Saves the model after every epoch.
# quantity to monitor, verbosity i.e logging mode (0 or 1),
# if save_best_only is true the latest best model according to the quantity monitored will not be overwritten.
# mode: one of {auto, min, max}. If save_best_only=True, the decision to overwrite the current save file is
# made based on either the maximization or the minimization of the monitored quantity. For val_acc,
# this should be max, for val_loss this should be min, etc. In auto mode, the direction is automatically
# inferred from the name of the monitored quantity.
checkpoint = ModelCheckpoint('model-{epoch:03d}.h5',
monitor='val_loss',
verbose=0,
save_best_only=args.save_best_only,
mode='auto')
# calculate the difference between expected steering angle and actual steering angle
# square the difference
# add up all those differences for as many data points as we have
# divide by the number of them
# that value is our mean squared error! this is what we want to minimize via
# gradient descent
model.compile(loss='mean_squared_error', optimizer=Adam(lr=args.learning_rate))
# Fits the model on data generated batch-by-batch by a Python generator.
# The generator is run in parallel to the model, for efficiency.
# For instance, this allows you to do real-time data augmentation on images on CPU in
# parallel to training your model on GPU.
# so we reshape our data into their appropriate batches and train our model simultaneously
model.fit_generator(batch_generator(data, indexes_train, args.batch_size, True),
steps_per_epoch=len(indexes_train) / args.batch_size,
epochs=args.nb_epoch,
max_queue_size=1,
validation_data=batch_generator(data, indexes_valid, args.batch_size, False),
validation_steps=len(indexes_valid) / args.batch_size,
callbacks=[checkpoint],
verbose=1)
# for command line args
def s2b(s):
"""
Converts a string to boolean value
"""
s = s.lower()
return s == 'true' or s == 'yes' or s == 'y' or s == '1'
def main():
"""
Load train/validation data set and train the model
"""
# The argparse module makes it easy to write user-friendly command-line interfaces.
parser = argparse.ArgumentParser(description='Behavioral Cloning Training Program')
parser.add_argument('-d', help='data directory', dest='data_dir', type=str, default=path)
parser.add_argument('-t', help='test size fraction', dest='test_size', type=float, default=0.2)
parser.add_argument('-k', help='drop out probability', dest='keep_prob', type=float, default=0.5)
parser.add_argument('-n', help='number of epochs', dest='nb_epoch', type=int, default=200)
parser.add_argument('-b', help='batch size', dest='batch_size', type=int, default=500)
parser.add_argument('-o', help='save best models only', dest='save_best_only', type=s2b, default='true')
parser.add_argument('-l', help='learning rate', dest='learning_rate', type=float, default=1.0e-4)
args = parser.parse_args()
# print parameters
print('-' * 30)
print('Parameters')
print('-' * 30)
for key, value in vars(args).items():
print('{:<20} := {}'.format(key, value))
print('-' * 30)
# load data
data = load_data(args)
# build model
model = build_model(args)
# load previous weights
model = load_weights(model)
# train model on data, it saves as model.h5
train_model(model, args, *data)
if __name__ == '__main__':
main()
================================================
FILE: training/utils.py
================================================
# This code based on Siraj Raval's code (https://github.com/llSourcell/How_to_simulate_a_self_driving_car)
import math
import cv2
import numpy as np
import tensorflow as tf
IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS = 66, 200, 3
INPUT_SHAPE = (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS)
RADAR_HEIGHT, RADAR_WIDTH, RADAR_CHANNELS = 20, 20, 1
RADAR_SHAPE = (RADAR_HEIGHT, RADAR_WIDTH, RADAR_CHANNELS)
def crop(image):
"""
Crop the image (removing the sky at the top and the car front at the bottom)
"""
return image[90:-50, :, :]
def resize(image):
"""
Resize the image to the input shape used by the network model
"""
return cv2.resize(image, (IMAGE_WIDTH, IMAGE_HEIGHT), cv2.INTER_AREA)
def rgb2yuv(image):
"""
Convert the image from RGB to YUV (This is what the NVIDIA model does)
"""
return cv2.cvtColor(image, cv2.COLOR_RGB2YUV)
def preprocess(image):
"""
Combine all preprocess functions into one
"""
image = crop(image)
image = resize(image)
image = rgb2yuv(image)
return image
# def choose_image(data_dir, center, left, right, steering_angle):
# """
# Randomly choose an image from the center, left or right, and adjust
# the steering angle.
# """
# choice = np.random.choice(3)
# if choice == 0:
# return load_image(data_dir, left), steering_angle + 0.2
# elif choice == 1:
# return load_image(data_dir, right), steering_angle - 0.2
# return load_image(data_dir, center), steering_angle
# flip image causes car riding on the opposite direction lane
# def random_flip(image, steering_angle):
# """
# Randomly flip the image left <-> right, and adjust the steering angle.
# """
# if np.random.rand() < 0.5:
# image = cv2.flip(image, 1)
# steering_angle = -steering_angle
# return image, steering_angle
def random_translate(image, steering_angle, range_x, range_y):
"""
Randomly shift the image vertically and horizontally (translation).
"""
trans_x = range_x * (np.random.rand() - 0.5)
trans_y = range_y * (np.random.rand() - 0.5)
# adjusting steering angle
t_x = trans_x / 25
if t_x > 0:
t_x = math.ceil(t_x)
if t_x > 2:
steering_angle += (t_x - 2)
if steering_angle > 10:
steering_angle = 10
else:
t_x = math.floor(t_x)
if t_x < -2:
steering_angle += (t_x + 2)
if steering_angle < -10:
steering_angle = -10
trans_m = np.float32([[1, 0, trans_x], [0, 1, trans_y]])
height, width = image.shape[:2]
# apply an affine transformation to an image
image = cv2.warpAffine(image, trans_m, (width, height))
return image, steering_angle
def random_shadow(image):
"""
Generates and adds random shadow
"""
# (x1, y1) and (x2, y2) forms a line
# xm, ym gives all the locations of the image
x1, y1 = IMAGE_WIDTH * np.random.rand(), 0
x2, y2 = IMAGE_WIDTH * np.random.rand(), IMAGE_HEIGHT
xm, ym = np.mgrid[0:IMAGE_HEIGHT, 0:IMAGE_WIDTH]
# mathematically speaking, we want to set 1 below the line and zero otherwise
# Our coordinate is up side down. So, the above the line:
# (ym-y1)/(xm-x1) > (y2-y1)/(x2-x1)
# as x2 == x1 causes zero-division problem, we'll write it in the below form:
# (ym-y1)*(x2-x1) - (y2-y1)*(xm-x1) > 0
mask = np.zeros_like(image[:, :, 1])
mask[np.where((ym - y1) * (x2 - x1) - (y2 - y1) * (xm - x1) > 0)] = 1
# choose which side should have shadow and adjust saturation
cond = mask == np.random.randint(2)
s_ratio = np.random.uniform(low=0.2, high=0.5)
# adjust Saturation in HLS(Hue, Light, Saturation)
hls = cv2.cvtColor(image, cv2.COLOR_RGB2HLS)
hls[:, :, 1][cond] = hls[:, :, 1][cond] * s_ratio
return cv2.cvtColor(hls, cv2.COLOR_HLS2RGB)
def random_brightness(image):
"""
Randomly adjust brightness of the image.
"""
# HSV (Hue, Saturation, Value) is also called HSB ('B' for Brightness).
hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
ratio = 1.0 + 0.4 * (np.random.rand() - 0.5)
hsv[:, :, 2] = hsv[:, :, 2] * ratio
return cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)
def augment(image, steering_angle, range_x=250, range_y=20):
"""
Generate an augmented image and adjust steering angle.
(The steering angle is associated with the center image)
"""
# image, steering_angle = choose_image(data_dir, center, left, right, steering_angle)
# image, steering_angle = random_flip(image, steering_angle)
image, steering_angle = random_translate(image, steering_angle, range_x, range_y)
image = random_shadow(image)
image = random_brightness(image)
return image, steering_angle
def batch_generator(data, indexes, batch_size, is_training):
"""
Generate training image give image paths and associated steering angles
"""
# preprocessing on the CPU
with tf.device('/cpu:0'):
images = np.empty([batch_size, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS])
radars = np.empty([batch_size, RADAR_HEIGHT, RADAR_WIDTH, RADAR_CHANNELS])
# metrics = np.empty([batch_size, 2])
# controls = np.empty([batch_size, 2])
speeds = np.empty(batch_size)
controls = np.empty(batch_size)
while True:
i = 0
for index in np.random.permutation(indexes):
camera = data['img'][index]
radar = cv2.cvtColor(camera[206:226, 25:45, :], cv2.COLOR_RGB2BGR)
steer = data['controls'][index][1]
# augmentation
if is_training:
prob = np.random.rand()
if (abs(steer) < 0.4 and prob > 0.2) or (prob < 0.6):
camera, steer = augment(camera, steer)
# add the image and steering angle to the batch
images[i] = preprocess(camera)
radars[i] = radar[:, :, 2:3]
# controls[i] = [data['controls'][index][0] / 10, steer / 10] # normalized throttle and steering
controls[i] = steer / 10
speeds[i] = data['metrics'][index][0]
# metrics[i] = data['metrics'][index]
i += 1
if i == batch_size:
break
# yield [images, metrics], controls
yield [images, radars, speeds], controls