Repository: hadipash/AI_GTA5
Branch: master
Commit: 424e615419d9
Files: 24
Total size: 59.8 KB
Directory structure:
gitextract_4lc496_7/
├── README.md
├── data_collection/
│ ├── data_balancing.py
│ ├── data_collect.py
│ ├── gamepad_cap.py
│ ├── histogram.py
│ ├── img_process.py
│ ├── key_cap.py
│ └── resources/
│ ├── arrows.npy
│ ├── arrows_labels.npy
│ ├── digits.npy
│ └── digits_labels.npy
├── drivers.txt
├── driving/
│ ├── drive.py
│ └── gamepad.py
├── game_plugins.txt
├── object_detection/
│ ├── direction.py
│ ├── lane_detect.py
│ └── object_detect.py
├── requirements.txt
└── training/
├── base_model.h5
├── model.py
├── models/
│ └── original + radar/
│ └── base_model.h5
├── train.py
└── utils.py
================================================
FILE CONTENTS
================================================
================================================
FILE: README.md
================================================
# Self-Driving Car for GTA V
### Overview
The aim of this project is to create a self-driving car using a virtual similator (particularly GTA V).
### [Youtube Video](https://www.youtube.com/watch?v=BRK0wm7rrfQ)
<p align="center">
<img src="https://github.com/hadipash/AI_GTA5/raw/master/demo.gif">
</p>
================================================
FILE: data_collection/data_balancing.py
================================================
import h5py
from data_collection.data_collect import path as source_path
dest_path = "F:\Graduation_Project\\training_data_balanced.h5"
destination = h5py.File(dest_path, 'w')
destination.create_dataset('img', (0, 240, 320, 3), dtype='u1', maxshape=(None, 240, 320, 3), chunks=(30, 240, 320, 3))
destination.create_dataset('controls', (0, 2), dtype='i1', maxshape=(None, 2), chunks=(30, 2))
destination.create_dataset('metrics', (0, 2), dtype='u1', maxshape=(None, 2), chunks=(30, 2))
def save(data_img, controls, metrics):
if data_img: # if the list is not empty
destination["img"].resize((destination["img"].shape[0] + len(data_img)), axis=0)
destination["img"][-len(data_img):] = data_img
destination["controls"].resize((destination["controls"].shape[0] + len(controls)), axis=0)
destination["controls"][-len(controls):] = controls
destination["metrics"].resize((destination["metrics"].shape[0] + len(metrics)), axis=0)
destination["metrics"][-len(metrics):] = metrics
def main():
source = h5py.File(source_path, 'r')
images = []
controls = []
metrics = []
tuples = 0
straights = 0
for i in range(source['img'].shape[0]):
# if speed is not 0 and not arrived at the destination
if source['metrics'][i][0] != 0 and source['metrics'][i][1] != 6:
# save only each 5th straight drive frame
if source['controls'][i][1] == 0:
add = (straights % 5 == 0)
straights += 1
# save all turns
else:
add = True
if add:
images.append(source['img'][i])
controls.append(source['controls'][i])
metrics.append(source['metrics'][i])
tuples += 1
if tuples % 10000 == 0: # every 2.5 GB
print(tuples)
save(images, controls, metrics)
images = []
controls = []
metrics = []
save(images, controls, metrics)
print("Copied: {:d} tuples from the source file".format(tuples))
source.close()
destination.close()
if __name__ == '__main__':
main()
================================================
FILE: data_collection/data_collect.py
================================================
"""
Data collection module (saves data in H5 format).
Saves screen captures and pressed keys into a file
for further trainings of NN.
"""
import os
import threading
import time
import winsound
import h5py
from data_collection.gamepad_cap import Gamepad
from data_collection.img_process import img_process
from data_collection.key_cap import key_check
lock = threading.Lock()
# open the data file
path = "F:\Graduation_Project\\training_data.h5"
data_file = None
if os.path.isfile(path):
data_file = h5py.File(path, 'a')
else:
data_file = h5py.File(path, 'w')
# Write data in chunks for faster writing and reading by NN
data_file.create_dataset('img', (0, 240, 320, 3), dtype='u1',
maxshape=(None, 240, 320, 3), chunks=(30, 240, 320, 3))
data_file.create_dataset('controls', (0, 2), dtype='i1', maxshape=(None, 2), chunks=(30, 2))
data_file.create_dataset('metrics', (0, 2), dtype='u1', maxshape=(None, 2), chunks=(30, 2))
def save(data_img, controls, metrics):
with lock: # make sure that data is consistent
if data_img: # if the list is not empty
# last_time = time.time()
data_file["img"].resize((data_file["img"].shape[0] + len(data_img)), axis=0)
data_file["img"][-len(data_img):] = data_img
data_file["controls"].resize((data_file["controls"].shape[0] + len(controls)), axis=0)
data_file["controls"][-len(controls):] = controls
data_file["metrics"].resize((data_file["metrics"].shape[0] + len(metrics)), axis=0)
data_file["metrics"][-len(metrics):] = metrics
# print('Saving took {} seconds'.format(time.time() - last_time))
def delete(session):
frames = session if session < 500 else 500
data_file["img"].resize((data_file["img"].shape[0] - frames), axis=0)
data_file["controls"].resize((data_file["controls"].shape[0] - frames), axis=0)
data_file["metrics"].resize((data_file["metrics"].shape[0] - frames), axis=0)
def main():
# initialize gamepad
gamepad = Gamepad()
gamepad.open()
# last_time = time.time() # to measure the number of frames
alert_time = time.time() # to signal about exceeding speed limit
close = False # to exit execution
pause = True # to pause execution
session = 0 # number of frames recorded in one session
training_img = [] # lists for storing training data
controls = []
metrics = []
print("Press RB on your gamepad to start recording")
while not close:
while not pause:
# read throttle and steering values from the gamepad
throttle, steering = gamepad.get_state()
# get screen, speed and direction
ignore, screen, speed, direction = img_process("Grand Theft Auto V")
training_img.append(screen)
controls.append([throttle, steering])
metrics.append([speed, direction])
session += 1
if speed > 60 and time.time() - alert_time > 1:
winsound.PlaySound('.\\resources\\alert.wav', winsound.SND_ASYNC)
alert_time = time.time()
# save the data every 30 iterations
if len(training_img) % 30 == 0:
# print("-" * 30 + "Saving" + "-" * 30)
threading.Thread(target=save, args=(training_img, controls, metrics)).start()
training_img = []
controls = []
metrics = []
time.sleep(0.015) # in order to slow down fps
# print('Main loop took {} seconds'.format(time.time() - last_time))
# last_time = time.time()
if gamepad.get_RB():
pause = True
print('Paused. Save the last 15 seconds?')
keys = key_check()
while ('Y' not in keys) and ('N' not in keys):
keys = key_check()
if 'N' in keys:
delete(session)
training_img = []
controls = []
metrics = []
print('Deleted.')
else:
print('Saved.')
print('To exit the program press LB.')
session = 0
time.sleep(0.5)
if gamepad.get_RB():
pause = False
print('Unpaused')
time.sleep(1)
elif gamepad.get_LB():
gamepad.close()
close = True
print('Saving data and closing the program.')
save(training_img, controls, metrics)
data_file.close()
if __name__ == '__main__':
main()
================================================
FILE: data_collection/gamepad_cap.py
================================================
"""
Module for reading information from an Xbox gamepad
"""
import threading
from inputs import get_gamepad
# Gamepad part
AXIS_MAX = 32767
AXIS_MIN = -32768
TRIGGER_MAX = 255
TRIGGER_MIN = -255
AXIS_MAX_NORM = 10 / AXIS_MAX
AXIS_MIN_NORM = -10 / AXIS_MIN
TRIGGER_MAX_NORM = 10 / TRIGGER_MAX
TRIGGER_MIN_NORM = -10 / TRIGGER_MIN
DEADZONE = 3
class Gamepad:
def __init__(self):
self.x_axis = 0
self.y_axisP = 0
self.y_axisN = 0
self.RB = 0
self.LB = 0
self.stop = False
def open(self):
self.stop = False
threading.Thread(target=self.run).start()
def run(self):
while not self.stop:
events = get_gamepad()
for event in events:
if event.code == "ABS_X":
self.x_axis = event.state
elif event.code == "ABS_RZ":
self.y_axisP = event.state
elif event.code == "ABS_Z":
self.y_axisN = -event.state
elif event.code == "BTN_TR":
self.RB = event.state
elif event.code == "BTN_TL":
self.LB = event.state
else:
pass # we're not interested in the remain signals
def get_state(self):
xAxis = self.x_axis
yAxis = self.y_axisP if self.y_axisP > 60 else self.y_axisN
# normalize x axis
if xAxis > 0:
xAxis = int(round(xAxis * AXIS_MAX_NORM))
else:
xAxis = int(round(xAxis * AXIS_MIN_NORM))
if -DEADZONE < xAxis < DEADZONE:
xAxis = 0
# normalize y axis
if yAxis > 0:
yAxis = int(round(yAxis * TRIGGER_MAX_NORM))
else:
yAxis = int(round(yAxis * TRIGGER_MIN_NORM))
if -DEADZONE < yAxis < DEADZONE:
yAxis = 0
# return throttle and then steering
return yAxis, xAxis
def get_RB(self):
return self.RB
def get_LB(self):
return self.LB
def close(self):
self.stop = True
================================================
FILE: data_collection/histogram.py
================================================
"""
Histogram of turns (for future balancing of data)
"""
import h5py
import matplotlib.pyplot as plt
import numpy as np
from data_collection.data_collect import path
n_bins = [x - 0.5 for x in range(-10, 12)]
data = h5py.File(path, 'r')
fig, axs = plt.subplots()
axs.hist([d[1] for d in data['controls'][:]], bins=n_bins)
data.close()
plt.xticks(np.arange(-10, 11, step=1))
plt.show()
================================================
FILE: data_collection/img_process.py
================================================
"""
Module for preprocessing screen captures
"""
import win32gui
import win32ui
import cv2
import numpy as np
import win32con
def initKNN(data, labels, shape):
knn = cv2.ml.KNearest_create()
train = np.load(data).reshape(-1, shape).astype(np.float32)
train_labels = np.load(labels)
knn.train(train, cv2.ml.ROW_SAMPLE, train_labels)
return knn
knnDigits = initKNN('..\data_collection\\resources\digits.npy',
'..\data_collection\\resources\digits_labels.npy', 40)
knnArrows = initKNN('..\data_collection\\resources\\arrows.npy',
'..\data_collection\\resources\\arrows_labels.npy', 90)
# Done by Frannecklp
def grab_screen(winName: str = "Grand Theft Auto V"):
desktop = win32gui.GetDesktopWindow()
# get area by a window name
gtawin = win32gui.FindWindow(None, winName)
# get the bounding box of the window
left, top, x2, y2 = win32gui.GetWindowRect(gtawin)
# cut window boarders
top += 32
left += 3
y2 -= 4
x2 -= 4
width = x2 - left + 1
height = y2 - top + 1
# the device context(DC) for the entire window (title bar, menus, scroll bars, etc.)
hwindc = win32gui.GetWindowDC(desktop)
# Create a DC object from an integer handle
srcdc = win32ui.CreateDCFromHandle(hwindc)
# Create a memory device context that is compatible with the source DC
memdc = srcdc.CreateCompatibleDC()
# Create a bitmap object
bmp = win32ui.CreateBitmap()
# Create a bitmap compatible with the specified device context
bmp.CreateCompatibleBitmap(srcdc, width, height)
# Select an object into the device context.
memdc.SelectObject(bmp)
# Copy a bitmap from the source device context to this device context
# parameters: destPos, size, dc, srcPos, rop(the raster operation))
memdc.BitBlt((0, 0), (width, height), srcdc, (left, top), win32con.SRCCOPY)
# the bitmap bits
signedIntsArray = bmp.GetBitmapBits(True)
# form a 1-D array initialized from text data in a string.
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (height, width, 4)
# Delete all resources associated with the device context
srcdc.DeleteDC()
memdc.DeleteDC()
# Releases the device context
win32gui.ReleaseDC(desktop, hwindc)
# Delete the bitmap and freeing all system resources associated with the object.
# After the object is deleted, the specified handle is no longer valid.
win32gui.DeleteObject(bmp.GetHandle())
return cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
def predict(img, knn):
ret, result, neighbours, dist = knn.findNearest(img, k=1)
return result
def preprocess(img):
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
thr = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 7, -5)
return thr
def convert_speed(num1, num2, num3):
hundreds = 1
tens = 1
speed = 0
if num3[0][0] != 10:
hundreds = 10
tens = 10
speed += int(num3[0][0])
if num2[0][0] != 10:
speed += tens * int(num2[0][0])
hundreds = tens * 10
if num1[0][0] != 10:
speed += hundreds * int(num1[0][0])
return speed
def img_process(winName: str = "Grand Theft Auto V"):
screen = grab_screen(winName)
# Ji Hyun's computer
numbers = preprocess(screen[567:575, 683:702, :])
# Rustam's computer
# numbers = preprocess(screen[573:581, 683:702, :])
# three fields for numbers
num1 = predict(numbers[:, :5].reshape(-1, 40).astype(np.float32), knnDigits)
num2 = predict(numbers[:, 7:12].reshape(-1, 40).astype(np.float32), knnDigits)
num3 = predict(numbers[:, -5:].reshape(-1, 40).astype(np.float32), knnDigits)
# one field for direction arrows
# Ji Hyun's computer
direct = preprocess(screen[561:570, 18:28, :]).reshape(-1, 90).astype(np.float32)
# Rustam's computer
# direct = preprocess(screen[567:576, 18:28, :]).reshape(-1, 90).astype(np.float32)
direct = int(predict(direct, knnArrows)[0][0])
speed = convert_speed(num1, num2, num3)
resized = cv2.resize(screen, (320, 240))
return screen, resized, speed, direct
================================================
FILE: data_collection/key_cap.py
================================================
# Citation: Box Of Hats (https://github.com/Box-Of-Hats)
"""
Module for reading keys from a keyboard
"""
import win32api as wapi
keyList = ["\b"]
for char in "ABCDEFGHIJKLMNOPQRSTUVWXYZ 0123456789,.'£$/\\":
keyList.append(char)
def key_check():
keys = []
for key in keyList:
if wapi.GetAsyncKeyState(ord(key)):
keys.append(key)
return keys
================================================
FILE: drivers.txt
================================================
# For testing AI an XBox controller emulator is needed
# https://github.com/shauleiz/ScpVBus/releases
ScpVBus
# Installation:
# In CMD (administrator): devcon.exe install ScpVBus.inf Root\ScpVBus
# Removal:
# In CMD (administrator): devcon.exe remove Root\ScpVBus
================================================
FILE: driving/drive.py
================================================
"""
Car driving module.
"""
# reading and writing files
import os
import time
import cv2
import numpy as np
# load our saved model
from keras.models import load_model
# helper classes
from data_collection.img_process import img_process
from data_collection.key_cap import key_check
# gamepad axes limits and gamepad module
from driving.gamepad import AXIS_MIN, AXIS_MAX, TRIGGER_MAX, XInputDevice
from object_detection.direction import Direct
# YOLO algorithm
from object_detection.object_detect import yolo_detection
# lane detection algorithm
from object_detection.lane_detect import detect_lane, draw_lane
from training.utils import preprocess
model_path = "..\\training"
gamepad = None
def set_gamepad(controls):
# trigger value
trigger = int(round(controls[0][1] * TRIGGER_MAX))
if trigger >= 0:
# set left trigger to zero
gamepad.SetTrigger('L', 0)
gamepad.SetTrigger('R', trigger)
else:
# inverse value
trigger = -trigger
# set right trigger to zero
gamepad.SetTrigger('L', trigger)
gamepad.SetTrigger('R', 0)
# axis value
axis = 0
if controls[0][0] >= 0:
axis = int(round(controls[0][0] * AXIS_MAX))
else:
axis = int(round(controls[0][0] * (-AXIS_MIN)))
gamepad.SetAxis('X', axis)
def drive(model):
global gamepad
gamepad = XInputDevice(1)
gamepad.PlugIn()
# last_time = time.time() # to measure the number of frames
close = False # to exit execution
pause = True # to pause execution
stop = False # to stop the car
throttle = 0
left_line_max = 75
right_line_max = 670
print("Press T to start driving")
while not close:
yolo_screen, resized, speed, direct = img_process("Grand Theft Auto V")
cv2.imshow("Driving-mode", yolo_screen)
cv2.waitKey(1)
while not pause:
# apply the preprocessing
screen, resized, speed, direct = img_process("Grand Theft Auto V")
radar = cv2.cvtColor(resized[206:226, 25:45, :], cv2.COLOR_RGB2BGR)[:, :, 2:3]
resized = preprocess(resized)
left_line_color = [0, 255, 0]
right_line_color = [0, 255, 0]
# predict steering angle for the image
# original + radar (small) + speed
controls = model.predict([np.array([resized]), np.array([radar]), np.array([speed])], batch_size=1)
# check that the car is following lane
lane, stop_line = detect_lane(screen)
# detect objects
yolo_screen, color_detected, obj_distance = yolo_detection(screen, direct)
if not stop:
# adjusting speed
if speed < 45:
throttle = 0.4
elif speed > 50:
throttle = 0.0
if 0 <= obj_distance <= 0.6:
if speed < 5:
throttle = 0
else:
throttle = -0.7 if obj_distance <= 0.4 else -0.3
elif color_detected == "Red":
if stop_line:
if speed < 5:
throttle = 0
elif 0 <= stop_line[0][1] <= 50:
throttle = -0.5
elif 50 < stop_line[0][1] <= 120:
throttle = -1
# else:
# throttle = -0.5
elif speed > 5:
throttle = -1
else:
throttle = 0
cv2.destroyAllWindows()
pause = True
# adjusting steering angle
if lane[0] and lane[0][0] > left_line_max:
if abs(controls[0][0]) < 0.27:
controls[0][0] = 0.27
left_line_color = [0, 0, 255]
elif lane[1] and lane[1][0] < right_line_max:
if abs(controls[0][0]) < 0.27:
controls[0][0] = -0.27
right_line_color = [0, 0, 255]
# set the gamepad values
set_gamepad([[controls[0][0], throttle]])
# print('Main loop took {} seconds'.format(time.time() - last_time))
# last_time = time.time()
screen[280:-130, :, :] = draw_lane(screen[280:-130, :, :], lane, stop_line,
left_line_color, right_line_color)
cv2.imshow("Driving-mode", yolo_screen)
cv2.waitKey(1)
if direct == 6:
print("Arrived at destination.")
stop = True
# print('Main loop took {} seconds'.format(time.time() - last_time))
# last_time = time.time()
keys = key_check()
if 'T' in keys:
cv2.destroyAllWindows()
pause = True
# release gamepad keys
set_gamepad([[0, 0]])
print('Paused. To exit the program press Z.')
time.sleep(0.5)
keys = key_check()
if 'T' in keys:
pause = False
stop = False
print('Unpaused')
time.sleep(1)
elif 'Z' in keys:
cv2.destroyAllWindows()
close = True
print('Closing the program.')
gamepad.UnPlug()
def main():
# load model
location = os.path.join(model_path, 'base_model.h5')
model = load_model(location)
# control a car
drive(model)
if __name__ == '__main__':
main()
================================================
FILE: driving/gamepad.py
================================================
# This code based on Musi13's code (https://github.com/Musi13/pyvxbox)
"""
Gamepad emulating module.
"""
import sys
from ctypes import *
dll_path = "vXboxInterface.dll"
try:
_vx = cdll.LoadLibrary(dll_path)
except OSError as e:
print(e)
sys.exit("Unable to load vXbox SDK DLL. Ensure that %s is present" % dll_path)
if not _vx.isVBusExists():
raise Exception('Xbox VBus does not exist')
AXIS_MAX = 32767
AXIS_MIN = -32768
TRIGGER_MAX = 255
BTN_ON = True
BTN_OFF = False
class XInputDevice:
def __init__(self, port):
if _vx.isControllerExists(port):
raise Exception('Port %d is already used' % port)
self.UserIndex = port
def PlugIn(self):
_vx.PlugIn(self.UserIndex)
def UnPlug(self, force=False):
if not force:
_vx.UnPlug(self.UserIndex)
else:
_vx.UnPlugForce(self.UserIndex)
def SetBtn(self, button, value):
function = {
'A': _vx.SetBtnA,
'B': _vx.SetBtnB,
'X': _vx.SetBtnX,
'Y': _vx.SetBtnY,
'Start': _vx.SetBtnStart,
'Back': _vx.SetBtnBack,
'LT': _vx.SetBtnLT,
'RT': _vx.SetBtnRT,
'LB': _vx.SetBtnLB,
'RB': _vx.SetBtnRB,
'GD': _vx.SetBtnGD
}.get(button, None)
if function is None:
raise Exception('Unknown button %s' % str(button))
function(self.UserIndex, value)
def SetTrigger(self, trigger, value):
function = {
'L': _vx.SetTriggerL,
'R': _vx.SetTriggerR
}.get(trigger, None)
if function is None:
raise Exception('Unknown trigger %s' % str(trigger))
function(self.UserIndex, value)
def SetAxis(self, axis, value):
function = {
'X': _vx.SetAxisX,
'Y': _vx.SetAxisY,
'Rx': _vx.SetAxisRx,
'Ry': _vx.SetAxisRy
}.get(axis, None)
if function is None:
raise Exception('Unknown axis %s' % str(axis))
function(self.UserIndex, value)
def SetDpad(self, direction, value=0):
function = {
'Up': _vx.SetDpadUp,
'Right': _vx.SetDpadRight,
'Down': _vx.SetDpadDown,
'Left': _vx.SetDpadLeft,
'': _vx.SetDpad
}.get(direction, None)
if function is None:
raise Exception('Unknown direction %s' % str(direction))
if direction == '':
function(self.UserIndex, value)
else:
function(self.UserIndex)
def GetLedNumber(self, pLed):
_vx.GetLedNumber(self.UserIndex, pLed)
def GetVibration(self, pVib):
_vx.GetVibration(self.UserIndex, pVib)
================================================
FILE: game_plugins.txt
================================================
### List of plugins used in GTA V
### for generating better conditions for AI
# allows installation of plugins
Script Hook V
# for adjusting weather conditions, time, amount of car, pedestrians, etc.
Simple Trainer for GTA V
================================================
FILE: object_detection/direction.py
================================================
from enum import Enum
class Direct(Enum):
STRAIGHT = 0
LEFT = 1
RIGHT = 2
SLIGHTLY_LEFT = 3
SLIGHTLY_RIGHT = 4
U_TURN = 5
ARRIVED = 6
================================================
FILE: object_detection/lane_detect.py
================================================
import math
import cv2
import numpy as np
from data_collection.img_process import grab_screen
prev_lines = [[], [], []]
def crop(image):
"""
Crop the image (removing the sky at the top and the car front at the bottom)
"""
return image[280:-130, :, :]
def grayscale(img):
"""
Applies the Grayscale transform
This will return an image with only one color channel
"""
return cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
def canny(img, low_threshold=100, high_threshold=300):
"""
Applies the Canny transform
"""
return cv2.Canny(img, low_threshold, high_threshold)
def gaussian_blur(img, kernel_size):
"""
Applies a Gaussian Noise kernel
"""
return cv2.GaussianBlur(img, (kernel_size, kernel_size), sigmaX=30, sigmaY=30)
def region_of_interest(img, vertices):
"""
Applies an image mask.
Only keeps the region of the image defined by the polygon
formed from `vertices`. The rest of the image is set to black.
`vertices` should be a numpy array of integer points.
"""
# defining a blank mask to start with
mask = np.zeros_like(img)
# defining a 3 channel or 1 channel color to fill the mask with depending on the input image
if len(img.shape) > 2:
channel_count = img.shape[2] # i.e. 3 or 4 depending on your image
ignore_mask_color = (255,) * channel_count
else:
ignore_mask_color = 255
# filling pixels inside the polygon defined by "vertices" with the fill color
cv2.fillPoly(mask, vertices, ignore_mask_color)
# returning the image only where mask pixels are nonzero
masked_image = cv2.bitwise_and(img, mask)
return masked_image
def construct_lane(lines):
"""
NOTE: this is the function you might want to use as a starting point once you want to
average/extrapolate the line segments you detect to map out the full
extent of the lane (going from the result shown in raw-lines-example.mp4
to that shown in P1_example.mp4).
Think about things like separating line segments by their
slope ((y2-y1)/(x2-x1)) to decide which segments are part of the left
line vs. the right line. Then, you can average the position of each of
the lines and extrapolate to the top and bottom of the lane.
This function draws `lines` with `color` and `thickness`.
Lines are drawn on the image inplace (mutates the image).
If you want to make the lines semi-transparent, think about combining
this function with the add_images() function below
"""
left_line_x = []
left_line_y = []
right_line_x = []
right_line_y = []
stop_line_x_first = []
stop_line_y_first = []
stop_line_x_second = []
stop_line_y_second = []
lane = [[], []]
stop_line = []
min_y = 0
max_y = 190
if lines is not None:
for line in lines:
for x1, y1, x2, y2 in line:
slope = (y2 - y1) / (x2 - x1) if x1 != x2 else 0 # <-- Calculating the slope.
if 0.05 < math.fabs(slope) < 0.3: # not interested
continue
if math.fabs(slope) <= 0.05: # stop line
if (y1 > 20) and (y2 > 20):
# we need to detect two stop lines (top and bottom)
if not stop_line_x_first or abs(stop_line_y_first[0] - y1) < 15:
stop_line_x_first.extend([x1, x2])
stop_line_y_first.extend([y1, y2])
else:
stop_line_x_second.extend([x1, x2])
stop_line_y_second.extend([y1, y2])
elif slope <= 0: # <-- If the slope is negative, left group.
left_line_x.extend([x1, x2])
left_line_y.extend([y1, y2])
else: # <-- Otherwise, right group.
right_line_x.extend([x1, x2])
right_line_y.extend([y1, y2])
offset = 7
if left_line_x:
poly_left = np.poly1d(np.polyfit(
left_line_y,
left_line_x,
deg=1
))
x1 = int(poly_left(max_y))
x2 = int(poly_left(min_y))
if prev_lines[0]:
# recalculate x1
if abs(x1 - prev_lines[0][0]) > offset:
x1 = prev_lines[0][0] - offset if prev_lines[0][0] > x1 else prev_lines[0][0] + offset
# recalculate x2
if abs(x2 - prev_lines[0][1]) > offset:
x2 = prev_lines[0][1] - offset if prev_lines[0][1] > x2 else prev_lines[0][1] + offset
prev_lines[0] = [x1, x2]
lane[0] = [x1, max_y, x2, min_y]
elif prev_lines[0]:
lane[0] = [prev_lines[0][0], max_y, prev_lines[0][1], min_y]
prev_lines[0] = []
if right_line_x:
poly_right = np.poly1d(np.polyfit(
right_line_y,
right_line_x,
deg=1
))
x1 = int(poly_right(max_y))
x2 = int(poly_right(min_y))
if prev_lines[1]:
# recalculate x1
if abs(x1 - prev_lines[1][0]) > offset:
x1 = prev_lines[1][0] - offset if prev_lines[1][0] > x1 else prev_lines[1][0] + offset
# recalculate x2
if abs(x2 - prev_lines[1][1]) > offset:
x2 = prev_lines[1][1] - offset if prev_lines[1][1] > x2 else prev_lines[1][1] + offset
prev_lines[1] = [x1, x2]
lane[1] = [x1, max_y, x2, min_y]
elif prev_lines[1]:
lane[1] = [prev_lines[1][0], max_y, prev_lines[1][1], min_y]
prev_lines[1] = []
if stop_line_x_second:
poly_stop = np.poly1d(np.polyfit(
stop_line_x_first,
stop_line_y_first,
deg=1
))
y1 = int(poly_stop(50))
y2 = int(poly_stop(750))
if prev_lines[2]:
# recalculate y1
if abs(y1 - prev_lines[2][0]) > offset:
y1 = prev_lines[2][0] - offset if prev_lines[2][0] > y1 else prev_lines[2][0] + offset
# recalculate y2
if abs(y2 - prev_lines[2][1]) > offset:
y2 = prev_lines[2][1] - offset if prev_lines[2][1] > y2 else prev_lines[2][1] + offset
prev_lines[2] = [y1, y2]
stop_line.append([50, y1, 750, y2])
elif prev_lines[2]:
stop_line.append([50, prev_lines[2][0], 750, prev_lines[2][1]])
prev_lines[2] = []
return lane, stop_line
def hough_lines(img, rho=6, theta=np.pi / 120, threshold=160, min_line_len=60, max_line_gap=10):
"""
`img` should be the output of a Canny transform.
Returns an image with hough lines drawn.
"""
lines = cv2.HoughLinesP(img, rho, theta, threshold, np.array([]), minLineLength=min_line_len,
maxLineGap=max_line_gap)
return lines
# Python 3 has support for cool math symbols.
def add_images(img, initial_img):
"""
`img` is the output of the hough_lines(), An image with lines drawn on it.
Should be a blank image (all black) with lines drawn on it.
`initial_img` should be the image before any processing.
The result image is computed as follows:
initial_img * α + img * β + γ
NOTE: initial_img and img must be the same shape!
"""
return cv2.add(initial_img, img)
def draw_lane(original_img, lane, stop_line, left_color, right_color, thickness=5):
img = np.zeros((original_img.shape[0], original_img.shape[1], 3), dtype=np.uint8)
polygon_points = None
offset_from_lane_edge = 8
# draw lane lines
if lane[0]:
for x1, y1, x2, y2 in [lane[0]]:
cv2.line(img, (int(x1), int(y1)), (int(x2), int(y2)), left_color, thickness)
if lane[1]:
for x1, y1, x2, y2 in [lane[1]]:
cv2.line(img, (int(x1), int(y1)), (int(x2), int(y2)), right_color, thickness)
# color the lane
if lane[0] and lane[1]:
lane_color = [40, 60, 0]
for x1, y1, x2, y2 in [lane[0]]:
p1 = (x1 + offset_from_lane_edge, y1)
p2 = (x2 + offset_from_lane_edge, y2)
for x1, y1, x2, y2 in [lane[1]]:
p3 = (x2 - offset_from_lane_edge, y2)
p4 = (x1 - offset_from_lane_edge, y1)
polygon_points = np.array([[p1, p2, p3, p4]], np.int32)
cv2.fillPoly(img, polygon_points, lane_color)
# draw stop line
if stop_line:
for x1, y1, x2, y2 in stop_line:
cv2.line(img, (int(x1), int(y1)), (int(x2), int(y2)), [0, 0, 255], thickness * 3)
if polygon_points is not None:
for px1, py1, px2, py2 in [lane[0]]:
p1 = (px1 - offset_from_lane_edge, py1)
p2 = (px2 - offset_from_lane_edge, py2)
for px1, py1, px2, py2 in [lane[1]]:
p3 = (px2 + offset_from_lane_edge, py2)
p4 = (px1 + offset_from_lane_edge, py1)
polygon_points = np.array([[p1, p2, p3, p4]], np.int32)
img = region_of_interest(img, polygon_points)
return add_images(img, original_img)
def detect_lane(screen):
# 0. Crop the image
image = crop(screen)
# 1. convert to gray
image = grayscale(image)
# 2. apply gaussian filter
image = gaussian_blur(image, 7)
# 3. canny
image = canny(image, 50, 100)
# 4. ROI
image = region_of_interest(image, np.array([[(0, 190), (0, 70), (187, 0),
(613, 0), (800, 70), (800, 190)]], np.int32))
# 5. Hough lines
lines = hough_lines(image)
# 6. construct lane
return construct_lane(lines)
def main():
while True:
original_img = grab_screen()
# 1. convert to gray
image = grayscale(crop(original_img))
# 2. apply gaussian filter
image = gaussian_blur(image, 7)
# 3. canny
image = canny(image, 50, 100)
# 4. ROI
image = region_of_interest(image, np.array([[(0, 190), (0, 70), (187, 0),
(613, 0), (800, 70), (800, 190)]], np.int32))
# 5. Hough lines
lines = hough_lines(image)
# 6. construct lane
lane, stop_line = construct_lane(lines)
# 7. Place lane detection output on the original image
original_img[280:-130, :, :] = draw_lane(original_img[280:-130, :, :], lane, stop_line, [0, 255, 0],
[0, 255, 0])
cv2.imshow("Frame", original_img)
key = cv2.waitKey(1) & 0xFF
if key == ord("q"):
cv2.destroyAllWindows()
break
if __name__ == '__main__':
main()
================================================
FILE: object_detection/object_detect.py
================================================
import cv2
import numpy as np
from darkflow.net.build import TFNet
from shapely.geometry import box, Polygon
from data_collection.img_process import grab_screen
from object_detection.direction import Direct
# set YOLO options
options = {
'model': 'cfg/yolo.cfg',
'load': 'yolov2.weights',
'threshold': 0.3,
'gpu': 0.5
}
tfnet = TFNet(options)
# capture = cv2.VideoCapture('gta2.mp4')
t = (0, 0, 0)
colors = [tuple(255 * np.random.rand(3)) for i in range(5)]
colors2 = [tuple(t) for j in range(15)]
def light_recog(frame, direct, traffic_lights):
traffic_light = traffic_lights[0]
# find out which traffic light to follow, if there are several
if len(traffic_lights) > 1:
# if we need to go to the right
if direct == Direct.RIGHT or direct == Direct.SLIGHTLY_RIGHT:
for tl in traffic_lights:
if tl['topleft']['x'] > traffic_light['topleft']['x']:
traffic_light = tl
# straight or left
else:
for tl in traffic_lights:
if tl['topleft']['x'] < traffic_light['topleft']['x']:
traffic_light = tl
# coordinates of the traffic light
top_left = (traffic_light['topleft']['x'], traffic_light['topleft']['y'])
bottom_right = (traffic_light['bottomright']['x'], traffic_light['bottomright']['y'])
# crop the frame to the traffic light
roi = frame[top_left[1]:bottom_right[1], top_left[0]:bottom_right[0]]
hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
color_detected = ''
# possible color ranges for traffic lights
red_lower = np.array([136, 87, 111], dtype=np.uint8)
red_upper = np.array([180, 255, 255], dtype=np.uint8)
yellow_lower = np.array([22, 60, 200], dtype=np.uint8)
yellow_upper = np.array([60, 255, 255], dtype=np.uint8)
green_lower = np.array([50, 100, 100], dtype=np.uint8)
green_upper = np.array([70, 255, 255], dtype=np.uint8)
# find what color the traffic light is showing
red = cv2.inRange(hsv, red_lower, red_upper)
yellow = cv2.inRange(hsv, yellow_lower, yellow_upper)
green = cv2.inRange(hsv, green_lower, green_upper)
kernel = np.ones((5, 5), np.uint8)
red = cv2.dilate(red, kernel)
res = cv2.bitwise_and(roi, roi, mask=red)
green = cv2.dilate(green, kernel)
res2 = cv2.bitwise_and(roi, roi, mask=green)
(_, contours, hierarchy) = cv2.findContours(red, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
for contour in enumerate(contours):
color_detected = "Red"
(_, contours, hierarchy) = cv2.findContours(yellow, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
for contour in enumerate(contours):
color_detected = "Yellow"
(_, contours, hierarchy) = cv2.findContours(green, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
for contour in enumerate(contours):
color_detected = "Green"
if (0 <= top_left[1] and bottom_right[1] <= 437) and (244 <= top_left[0] and bottom_right[0] <= 630):
frame = cv2.putText(frame, color_detected, bottom_right, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)
frame = cv2.putText(frame, color_detected, bottom_right, cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)
return frame, color_detected
def distance_to_car(frame, top_left, bottom_right):
distance = None
# myRoi_array= np.array([[(0, 490), (309, 269), (490, 270), (800,473)]])
# process_img = region_of_interest(frame, myRoi_array)
# cv2.imshow("precess_img", process_img)
# roi = Polygon([(15, 472), (330, 321), (470, 321), (796, 495)])
roi = Polygon([(100, 470), (350, 280), (450, 280), (700, 470)])
car = box(top_left[0], top_left[1], bottom_right[0], bottom_right[1])
if roi.intersects(car):
mid_x = (bottom_right[0] + top_left[0]) / 2
mid_y = (top_left[1] + bottom_right[1]) / 2
distance = round((1 - ((bottom_right[0] / 800) - (top_left[0] / 800))) ** 4, 1)
frame = cv2.putText(frame, '{}'.format(distance), (int(mid_x), int(mid_y)), cv2.FONT_HERSHEY_SIMPLEX, 0.7,
(255, 255, 255), 2)
cv2.putText(frame[top_left[1]:bottom_right[1], top_left[0]:bottom_right[0]],
'WARNING!', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 3)
return frame, distance
def distance_to_human(frame, top_left, bottom_right):
distance = None
roi = Polygon([(90, 470), (350, 280), (450, 280), (700, 470)])
person = box(top_left[0], top_left[1], bottom_right[0], bottom_right[1])
if roi.intersects(person):
mid_x = (bottom_right[0] + top_left[0]) / 2
mid_y = (top_left[1] + bottom_right[1]) / 2
distance = round((1 - ((bottom_right[0] / 800) - (top_left[0] / 800))) ** 15, 1)
frame = cv2.putText(frame, '{}'.format(distance), (int(mid_x), int(mid_y)), cv2.FONT_HERSHEY_SIMPLEX, 0.7,
(255, 255, 255), 2)
cv2.putText(frame[top_left[1]:bottom_right[1], top_left[0]:bottom_right[0]],
'WARNING!', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 3)
return frame, distance
def yolo_detection(screen, direct):
# find objects on a frame by using YOLO
results = tfnet.return_predict(screen[:-130, :, :])
# create a list of detected traffic lights (might be several on a frame)
traffic_lights = []
color_detected = None
distance = 1
for color, color2, result in zip(colors, colors2, results):
top_left = (result['topleft']['x'], result['topleft']['y'])
bottom_right = (result['bottomright']['x'], result['bottomright']['y'])
label = result['label']
confidence = result['confidence']
text = '{}: {:.0f}%'.format(label, confidence * 100)
if label == 'traffic light' and confidence > 0.3:
if 220 <= result['topleft']['x'] <= 630:
traffic_lights.append(result)
color = color2
screen = cv2.rectangle(screen, top_left, bottom_right, color, 6)
screen = cv2.putText(screen, text, top_left, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)
if label == 'car' or label == 'bus' or label == 'truck' or label == 'train':
screen, car_distance = distance_to_car(screen, top_left, bottom_right)
if car_distance and 0 <= car_distance < distance:
distance = car_distance
screen = cv2.rectangle(screen, top_left, bottom_right, color, 6)
screen = cv2.putText(screen, text, top_left, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)
if label == 'person':
screen, person_distance = distance_to_human(screen, top_left, bottom_right)
if person_distance and 0 <= person_distance < distance:
distance = person_distance
screen = cv2.rectangle(screen, top_left, bottom_right, color, 6)
screen = cv2.putText(screen, text, top_left, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)
if traffic_lights:
screen, color_detected = light_recog(screen, direct, traffic_lights)
return screen, color_detected, distance
def main():
while True:
screen = grab_screen()
screen, color_detected, obj_distance = yolo_detection(screen, 0)
if color_detected:
print("Color detected: " + color_detected)
if obj_distance != 1:
print("Distance to obstacle: {}".format(obj_distance))
cv2.imshow("Frame", screen)
key = cv2.waitKey(1) & 0xFF
if key == ord("q"):
cv2.destroyAllWindows()
break
if __name__ == '__main__':
main()
================================================
FILE: requirements.txt
================================================
### To install the packages type in the console:
### pip install -r requirements.txt
numpy
opencv-python
# tensorflow
tensorflow-gpu
# Python for Window Extensions
pywin32
# For data management
h5py
# A high-level neural networks API capable of running on top of TensorFlow
Keras
# Tools for data mining and data analysis
scikit-learn
# To read information from a gamepad
inputs
# for object detection module
Shapely
# for YOLO
Cython
================================================
FILE: training/model.py
================================================
"""
NN model
"""
from keras.layers import Lambda, Conv2D, Dropout, Dense, Flatten, Concatenate, Input, MaxPooling2D
from keras.models import Model
from training.utils import INPUT_SHAPE, RADAR_SHAPE
# original Nvidia model
# def build_model(args):
# """
# NVIDIA model used
# Image normalization to avoid saturation and make gradients work better.
# Convolution: 5x5, filter: 24, strides: 2x2, activation: ELU
# Convolution: 5x5, filter: 36, strides: 2x2, activation: ELU
# Convolution: 5x5, filter: 48, strides: 2x2, activation: ELU
# Convolution: 3x3, filter: 64, strides: 1x1, activation: ELU
# Convolution: 3x3, filter: 64, strides: 1x1, activation: ELU
# Drop out (0.5)
# Fully connected: neurons: 100, activation: ELU
# Fully connected: neurons: 50, activation: ELU
# Fully connected: neurons: 10, activation: ELU
# Fully connected: neurons: 1 (output)
# # the convolution layers are meant to handle feature engineering
# the fully connected layer for predicting the steering angle.
# dropout avoids overfitting
# ELU(Exponential linear unit) function takes care of the Vanishing gradient problem.
# """
# model = Sequential()
# model.add(Lambda(lambda x: x / 127.5 - 1.0, input_shape=INPUT_SHAPE))
# model.add(Conv2D(24, (5, 5), activation='elu', strides=(2, 2)))
# model.add(Conv2D(36, (5, 5), activation='elu', strides=(2, 2)))
# model.add(Conv2D(48, (5, 5), activation='elu', strides=(2, 2)))
# model.add(Conv2D(64, (3, 3), activation='elu'))
# model.add(Conv2D(64, (3, 3), activation='elu'))
# model.add(Dropout(args.keep_prob))
# model.add(Flatten())
# model.add(Dense(100, activation='elu'))
# model.add(Dense(50, activation='elu'))
# model.add(Dense(10, activation='elu'))
# model.add(Dense(1))
# model.summary()
#
# return model
# original + radar added
# def build_model(args):
# # image model
# img_input = Input(shape=INPUT_SHAPE)
# img_model = (Lambda(lambda x: x / 127.5 - 1.0, input_shape=INPUT_SHAPE))(img_input)
# img_model = (Conv2D(24, (5, 5), activation='elu', strides=(2, 2)))(img_model)
# img_model = (Conv2D(36, (5, 5), activation='elu', strides=(2, 2)))(img_model)
# img_model = (Conv2D(48, (5, 5), activation='elu', strides=(2, 2)))(img_model)
# img_model = (Conv2D(64, (3, 3), activation='elu'))(img_model)
# img_model = (Conv2D(64, (3, 3), activation='elu'))(img_model)
# img_model = (Dropout(args.keep_prob))(img_model)
# img_model = (Flatten())(img_model)
# img_model = (Dense(100, activation='elu'))(img_model)
#
# # radar model
# radar_input = Input(shape=RADAR_SHAPE)
# radar_model = (Conv2D(10, (5, 5), activation='elu'))(radar_input)
# radar_model = (MaxPooling2D((2, 2)))(radar_model)
# radar_model = (Conv2D(20, (5, 5), activation='elu'))(radar_model)
# radar_model = (MaxPooling2D((2, 2)))(radar_model)
# radar_model = (Dropout(args.keep_prob / 2))(radar_model)
# radar_model = (Flatten())(radar_model)
# radar_model = (Dense(30, activation='elu'))(radar_model)
#
# # combined model
# out = Concatenate()([img_model, radar_model])
# out = (Dense(50, activation='elu'))(out)
# out = (Dense(10, activation='elu'))(out)
# out = (Dense(1))(out)
#
# final_model = Model(inputs=[img_input, radar_input], outputs=out)
# final_model.summary()
#
# return final_model
# original + radar and speed info added
def build_model(args):
# image model
img_input = Input(shape=INPUT_SHAPE)
img_model = (Lambda(lambda x: x / 127.5 - 1.0, input_shape=INPUT_SHAPE))(img_input)
img_model = (Conv2D(24, (5, 5), activation='elu', strides=(2, 2)))(img_model)
img_model = (Conv2D(36, (5, 5), activation='elu', strides=(2, 2)))(img_model)
img_model = (Conv2D(48, (5, 5), activation='elu', strides=(2, 2)))(img_model)
img_model = (Conv2D(64, (3, 3), activation='elu'))(img_model)
img_model = (Conv2D(64, (3, 3), activation='elu'))(img_model)
img_model = (Dropout(args.keep_prob))(img_model)
img_model = (Flatten())(img_model)
img_model = (Dense(100, activation='elu'))(img_model)
# radar model
radar_input = Input(shape=RADAR_SHAPE)
radar_model = (Conv2D(32, (5, 5), activation='elu'))(radar_input)
radar_model = (MaxPooling2D((2, 2), strides=(2, 2)))(radar_model)
radar_model = (Conv2D(64, (5, 5), activation='elu'))(radar_model)
radar_model = (MaxPooling2D((2, 2), strides=(2, 2)))(radar_model)
radar_model = (Dropout(args.keep_prob / 2))(radar_model)
radar_model = (Flatten())(radar_model)
radar_model = (Dense(10, activation='elu'))(radar_model)
# speed
speed_input = Input(shape=(1,))
# combined model
out = Concatenate()([img_model, radar_model])
out = (Dense(50, activation='elu'))(out)
out = Concatenate()([out, speed_input])
out = (Dense(10, activation='elu'))(out)
out = (Dense(1))(out)
final_model = Model(inputs=[img_input, radar_input, speed_input], outputs=out)
final_model.summary()
return final_model
# original + throttle control
# def build_model(args):
# """
# NVIDIA model used
# Image normalization to avoid saturation and make gradients work better.
# Convolution: 5x5, filter: 24, strides: 2x2, activation: ELU
# Convolution: 5x5, filter: 36, strides: 2x2, activation: ELU
# Convolution: 5x5, filter: 48, strides: 2x2, activation: ELU
# Convolution: 3x3, filter: 64, strides: 1x1, activation: ELU
# Convolution: 3x3, filter: 64, strides: 1x1, activation: ELU
# Drop out (0.5)
# Fully connected: neurons: 100, activation: ELU
# Fully connected: neurons: 50, activation: ELU
# Fully connected: neurons: 10, activation: ELU
# Fully connected: neurons: 1 (output)
# # the convolution layers are meant to handle feature engineering
# the fully connected layer for predicting the steering angle.
# dropout avoids overfitting
# ELU(Exponential linear unit) function takes care of the Vanishing gradient problem.
# """
# # image model
# img_input = Input(shape=INPUT_SHAPE)
# img_model = (Lambda(lambda x: x / 127.5 - 1.0, input_shape=INPUT_SHAPE))(img_input)
# img_model = (Conv2D(24, (5, 5), activation='elu', strides=(2, 2)))(img_model)
# img_model = (Conv2D(36, (5, 5), activation='elu', strides=(2, 2)))(img_model)
# img_model = (Conv2D(48, (5, 5), activation='elu', strides=(2, 2)))(img_model)
# img_model = (Conv2D(64, (3, 3), activation='elu'))(img_model)
# img_model = (Conv2D(64, (3, 3), activation='elu'))(img_model)
# img_model = (Dropout(args.keep_prob))(img_model)
# img_model = (Flatten())(img_model)
# img_model = (Dense(100, activation='elu'))(img_model)
#
# # speed and direction model
# metrics_input = Input(shape=(2,))
# metrics_model = Dense(2, activation='elu')(metrics_input)
#
# # combined model
# out = Concatenate()([img_model, metrics_model])
# out = (Dense(50, activation='elu'))(out)
# out = (Dense(10, activation='elu'))(out)
# out = (Dense(2))(out)
#
# final_model = Model(inputs=[img_input, metrics_input], outputs=out)
# final_model.summary()
#
# return final_model
================================================
FILE: training/train.py
================================================
# This code based on Siraj Raval's code (https://github.com/llSourcell/How_to_simulate_a_self_driving_car)
"""
Training module. Based on "End to End Learning for Self-Driving Cars" research paper by Nvidia.
"""
import argparse
import h5py
import numpy as np
from keras.callbacks import ModelCheckpoint
from keras.models import load_model
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split # to split out training and testing data
# path with training files
from data_collection.data_collect import path
from training.model import build_model
# helper class
from training.utils import batch_generator
# for debugging, allows for reproducible (deterministic) results
np.random.seed(0)
def load_data(args):
"""
Load training data and split it into training and validation set
"""
data = h5py.File(path, 'r')
# list of all possible indexes
indexes = list(range(data['img'].shape[0]))
# split the data into a training (80), testing(20), and validation set
indexes_train, indexes_valid = train_test_split(indexes, test_size=args.test_size, random_state=0)
return data, indexes_train, indexes_valid
def load_weights(model):
"""
Load weights from previously trained model
"""
prev_model = load_model("..\\training\\base_model.h5")
model.set_weights(prev_model.get_weights())
return model
def train_model(model, args, data, indexes_train, indexes_valid):
"""
Train the model
"""
# Saves the model after every epoch.
# quantity to monitor, verbosity i.e logging mode (0 or 1),
# if save_best_only is true the latest best model according to the quantity monitored will not be overwritten.
# mode: one of {auto, min, max}. If save_best_only=True, the decision to overwrite the current save file is
# made based on either the maximization or the minimization of the monitored quantity. For val_acc,
# this should be max, for val_loss this should be min, etc. In auto mode, the direction is automatically
# inferred from the name of the monitored quantity.
checkpoint = ModelCheckpoint('model-{epoch:03d}.h5',
monitor='val_loss',
verbose=0,
save_best_only=args.save_best_only,
mode='auto')
# calculate the difference between expected steering angle and actual steering angle
# square the difference
# add up all those differences for as many data points as we have
# divide by the number of them
# that value is our mean squared error! this is what we want to minimize via
# gradient descent
model.compile(loss='mean_squared_error', optimizer=Adam(lr=args.learning_rate))
# Fits the model on data generated batch-by-batch by a Python generator.
# The generator is run in parallel to the model, for efficiency.
# For instance, this allows you to do real-time data augmentation on images on CPU in
# parallel to training your model on GPU.
# so we reshape our data into their appropriate batches and train our model simultaneously
model.fit_generator(batch_generator(data, indexes_train, args.batch_size, True),
steps_per_epoch=len(indexes_train) / args.batch_size,
epochs=args.nb_epoch,
max_queue_size=1,
validation_data=batch_generator(data, indexes_valid, args.batch_size, False),
validation_steps=len(indexes_valid) / args.batch_size,
callbacks=[checkpoint],
verbose=1)
# for command line args
def s2b(s):
"""
Converts a string to boolean value
"""
s = s.lower()
return s == 'true' or s == 'yes' or s == 'y' or s == '1'
def main():
"""
Load train/validation data set and train the model
"""
# The argparse module makes it easy to write user-friendly command-line interfaces.
parser = argparse.ArgumentParser(description='Behavioral Cloning Training Program')
parser.add_argument('-d', help='data directory', dest='data_dir', type=str, default=path)
parser.add_argument('-t', help='test size fraction', dest='test_size', type=float, default=0.2)
parser.add_argument('-k', help='drop out probability', dest='keep_prob', type=float, default=0.5)
parser.add_argument('-n', help='number of epochs', dest='nb_epoch', type=int, default=200)
parser.add_argument('-b', help='batch size', dest='batch_size', type=int, default=500)
parser.add_argument('-o', help='save best models only', dest='save_best_only', type=s2b, default='true')
parser.add_argument('-l', help='learning rate', dest='learning_rate', type=float, default=1.0e-4)
args = parser.parse_args()
# print parameters
print('-' * 30)
print('Parameters')
print('-' * 30)
for key, value in vars(args).items():
print('{:<20} := {}'.format(key, value))
print('-' * 30)
# load data
data = load_data(args)
# build model
model = build_model(args)
# load previous weights
model = load_weights(model)
# train model on data, it saves as model.h5
train_model(model, args, *data)
if __name__ == '__main__':
main()
================================================
FILE: training/utils.py
================================================
# This code based on Siraj Raval's code (https://github.com/llSourcell/How_to_simulate_a_self_driving_car)
import math
import cv2
import numpy as np
import tensorflow as tf
IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS = 66, 200, 3
INPUT_SHAPE = (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS)
RADAR_HEIGHT, RADAR_WIDTH, RADAR_CHANNELS = 20, 20, 1
RADAR_SHAPE = (RADAR_HEIGHT, RADAR_WIDTH, RADAR_CHANNELS)
def crop(image):
"""
Crop the image (removing the sky at the top and the car front at the bottom)
"""
return image[90:-50, :, :]
def resize(image):
"""
Resize the image to the input shape used by the network model
"""
return cv2.resize(image, (IMAGE_WIDTH, IMAGE_HEIGHT), cv2.INTER_AREA)
def rgb2yuv(image):
"""
Convert the image from RGB to YUV (This is what the NVIDIA model does)
"""
return cv2.cvtColor(image, cv2.COLOR_RGB2YUV)
def preprocess(image):
"""
Combine all preprocess functions into one
"""
image = crop(image)
image = resize(image)
image = rgb2yuv(image)
return image
# def choose_image(data_dir, center, left, right, steering_angle):
# """
# Randomly choose an image from the center, left or right, and adjust
# the steering angle.
# """
# choice = np.random.choice(3)
# if choice == 0:
# return load_image(data_dir, left), steering_angle + 0.2
# elif choice == 1:
# return load_image(data_dir, right), steering_angle - 0.2
# return load_image(data_dir, center), steering_angle
# flip image causes car riding on the opposite direction lane
# def random_flip(image, steering_angle):
# """
# Randomly flip the image left <-> right, and adjust the steering angle.
# """
# if np.random.rand() < 0.5:
# image = cv2.flip(image, 1)
# steering_angle = -steering_angle
# return image, steering_angle
def random_translate(image, steering_angle, range_x, range_y):
"""
Randomly shift the image vertically and horizontally (translation).
"""
trans_x = range_x * (np.random.rand() - 0.5)
trans_y = range_y * (np.random.rand() - 0.5)
# adjusting steering angle
t_x = trans_x / 25
if t_x > 0:
t_x = math.ceil(t_x)
if t_x > 2:
steering_angle += (t_x - 2)
if steering_angle > 10:
steering_angle = 10
else:
t_x = math.floor(t_x)
if t_x < -2:
steering_angle += (t_x + 2)
if steering_angle < -10:
steering_angle = -10
trans_m = np.float32([[1, 0, trans_x], [0, 1, trans_y]])
height, width = image.shape[:2]
# apply an affine transformation to an image
image = cv2.warpAffine(image, trans_m, (width, height))
return image, steering_angle
def random_shadow(image):
"""
Generates and adds random shadow
"""
# (x1, y1) and (x2, y2) forms a line
# xm, ym gives all the locations of the image
x1, y1 = IMAGE_WIDTH * np.random.rand(), 0
x2, y2 = IMAGE_WIDTH * np.random.rand(), IMAGE_HEIGHT
xm, ym = np.mgrid[0:IMAGE_HEIGHT, 0:IMAGE_WIDTH]
# mathematically speaking, we want to set 1 below the line and zero otherwise
# Our coordinate is up side down. So, the above the line:
# (ym-y1)/(xm-x1) > (y2-y1)/(x2-x1)
# as x2 == x1 causes zero-division problem, we'll write it in the below form:
# (ym-y1)*(x2-x1) - (y2-y1)*(xm-x1) > 0
mask = np.zeros_like(image[:, :, 1])
mask[np.where((ym - y1) * (x2 - x1) - (y2 - y1) * (xm - x1) > 0)] = 1
# choose which side should have shadow and adjust saturation
cond = mask == np.random.randint(2)
s_ratio = np.random.uniform(low=0.2, high=0.5)
# adjust Saturation in HLS(Hue, Light, Saturation)
hls = cv2.cvtColor(image, cv2.COLOR_RGB2HLS)
hls[:, :, 1][cond] = hls[:, :, 1][cond] * s_ratio
return cv2.cvtColor(hls, cv2.COLOR_HLS2RGB)
def random_brightness(image):
"""
Randomly adjust brightness of the image.
"""
# HSV (Hue, Saturation, Value) is also called HSB ('B' for Brightness).
hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
ratio = 1.0 + 0.4 * (np.random.rand() - 0.5)
hsv[:, :, 2] = hsv[:, :, 2] * ratio
return cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)
def augment(image, steering_angle, range_x=250, range_y=20):
"""
Generate an augmented image and adjust steering angle.
(The steering angle is associated with the center image)
"""
# image, steering_angle = choose_image(data_dir, center, left, right, steering_angle)
# image, steering_angle = random_flip(image, steering_angle)
image, steering_angle = random_translate(image, steering_angle, range_x, range_y)
image = random_shadow(image)
image = random_brightness(image)
return image, steering_angle
def batch_generator(data, indexes, batch_size, is_training):
"""
Generate training image give image paths and associated steering angles
"""
# preprocessing on the CPU
with tf.device('/cpu:0'):
images = np.empty([batch_size, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS])
radars = np.empty([batch_size, RADAR_HEIGHT, RADAR_WIDTH, RADAR_CHANNELS])
# metrics = np.empty([batch_size, 2])
# controls = np.empty([batch_size, 2])
speeds = np.empty(batch_size)
controls = np.empty(batch_size)
while True:
i = 0
for index in np.random.permutation(indexes):
camera = data['img'][index]
radar = cv2.cvtColor(camera[206:226, 25:45, :], cv2.COLOR_RGB2BGR)
steer = data['controls'][index][1]
# augmentation
if is_training:
prob = np.random.rand()
if (abs(steer) < 0.4 and prob > 0.2) or (prob < 0.6):
camera, steer = augment(camera, steer)
# add the image and steering angle to the batch
images[i] = preprocess(camera)
radars[i] = radar[:, :, 2:3]
# controls[i] = [data['controls'][index][0] / 10, steer / 10] # normalized throttle and steering
controls[i] = steer / 10
speeds[i] = data['metrics'][index][0]
# metrics[i] = data['metrics'][index]
i += 1
if i == batch_size:
break
# yield [images, metrics], controls
yield [images, radars, speeds], controls
gitextract_4lc496_7/
├── README.md
├── data_collection/
│ ├── data_balancing.py
│ ├── data_collect.py
│ ├── gamepad_cap.py
│ ├── histogram.py
│ ├── img_process.py
│ ├── key_cap.py
│ └── resources/
│ ├── arrows.npy
│ ├── arrows_labels.npy
│ ├── digits.npy
│ └── digits_labels.npy
├── drivers.txt
├── driving/
│ ├── drive.py
│ └── gamepad.py
├── game_plugins.txt
├── object_detection/
│ ├── direction.py
│ ├── lane_detect.py
│ └── object_detect.py
├── requirements.txt
└── training/
├── base_model.h5
├── model.py
├── models/
│ └── original + radar/
│ └── base_model.h5
├── train.py
└── utils.py
SYMBOL INDEX (65 symbols across 13 files)
FILE: data_collection/data_balancing.py
function save (line 13) | def save(data_img, controls, metrics):
function main (line 23) | def main():
FILE: data_collection/data_collect.py
function save (line 34) | def save(data_img, controls, metrics):
function delete (line 47) | def delete(session):
function main (line 54) | def main():
FILE: data_collection/gamepad_cap.py
class Gamepad (line 23) | class Gamepad:
method __init__ (line 24) | def __init__(self):
method open (line 32) | def open(self):
method run (line 36) | def run(self):
method get_state (line 53) | def get_state(self):
method get_RB (line 75) | def get_RB(self):
method get_LB (line 78) | def get_LB(self):
method close (line 81) | def close(self):
FILE: data_collection/img_process.py
function initKNN (line 13) | def initKNN(data, labels, shape):
function grab_screen (line 28) | def grab_screen(winName: str = "Grand Theft Auto V"):
function predict (line 77) | def predict(img, knn):
function preprocess (line 82) | def preprocess(img):
function convert_speed (line 88) | def convert_speed(num1, num2, num3):
function img_process (line 106) | def img_process(winName: str = "Grand Theft Auto V"):
FILE: data_collection/key_cap.py
function key_check (line 14) | def key_check():
FILE: driving/drive.py
function set_gamepad (line 30) | def set_gamepad(controls):
function drive (line 53) | def drive(model):
function main (line 169) | def main():
FILE: driving/gamepad.py
class XInputDevice (line 28) | class XInputDevice:
method __init__ (line 29) | def __init__(self, port):
method PlugIn (line 34) | def PlugIn(self):
method UnPlug (line 37) | def UnPlug(self, force=False):
method SetBtn (line 43) | def SetBtn(self, button, value):
method SetTrigger (line 61) | def SetTrigger(self, trigger, value):
method SetAxis (line 70) | def SetAxis(self, axis, value):
method SetDpad (line 81) | def SetDpad(self, direction, value=0):
method GetLedNumber (line 96) | def GetLedNumber(self, pLed):
method GetVibration (line 99) | def GetVibration(self, pVib):
FILE: object_detection/direction.py
class Direct (line 4) | class Direct(Enum):
FILE: object_detection/lane_detect.py
function crop (line 11) | def crop(image):
function grayscale (line 18) | def grayscale(img):
function canny (line 26) | def canny(img, low_threshold=100, high_threshold=300):
function gaussian_blur (line 33) | def gaussian_blur(img, kernel_size):
function region_of_interest (line 40) | def region_of_interest(img, vertices):
function construct_lane (line 66) | def construct_lane(lines):
function hough_lines (line 193) | def hough_lines(img, rho=6, theta=np.pi / 120, threshold=160, min_line_l...
function add_images (line 205) | def add_images(img, initial_img):
function draw_lane (line 220) | def draw_lane(original_img, lane, stop_line, left_color, right_color, th...
function detect_lane (line 267) | def detect_lane(screen):
function main (line 285) | def main():
FILE: object_detection/object_detect.py
function light_recog (line 24) | def light_recog(frame, direct, traffic_lights):
function distance_to_car (line 90) | def distance_to_car(frame, top_left, bottom_right):
function distance_to_human (line 113) | def distance_to_human(frame, top_left, bottom_right):
function yolo_detection (line 131) | def yolo_detection(screen, direct):
function main (line 178) | def main():
FILE: training/model.py
function build_model (line 86) | def build_model(args):
FILE: training/train.py
function load_data (line 26) | def load_data(args):
function load_weights (line 39) | def load_weights(model):
function train_model (line 49) | def train_model(model, args, data, indexes_train, indexes_valid):
function s2b (line 91) | def s2b(s):
function main (line 99) | def main():
FILE: training/utils.py
function crop (line 15) | def crop(image):
function resize (line 22) | def resize(image):
function rgb2yuv (line 29) | def rgb2yuv(image):
function preprocess (line 36) | def preprocess(image):
function random_translate (line 70) | def random_translate(image, steering_angle, range_x, range_y):
function random_shadow (line 99) | def random_shadow(image):
function random_brightness (line 127) | def random_brightness(image):
function augment (line 138) | def augment(image, steering_angle, range_x=250, range_y=20):
function batch_generator (line 151) | def batch_generator(data, indexes, batch_size, is_training):
Condensed preview — 24 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (64K chars).
[
{
"path": "README.md",
"chars": 306,
"preview": "# Self-Driving Car for GTA V\n### Overview\nThe aim of this project is to create a self-driving car using a virtual simila"
},
{
"path": "data_collection/data_balancing.py",
"chars": 2236,
"preview": "import h5py\n\nfrom data_collection.data_collect import path as source_path\n\ndest_path = \"F:\\Graduation_Project\\\\training_"
},
{
"path": "data_collection/data_collect.py",
"chars": 4683,
"preview": "\"\"\"\nData collection module (saves data in H5 format).\nSaves screen captures and pressed keys into a file\nfor further tra"
},
{
"path": "data_collection/gamepad_cap.py",
"chars": 2090,
"preview": "\"\"\"\nModule for reading information from an Xbox gamepad\n\"\"\"\n\nimport threading\n\nfrom inputs import get_gamepad\n\n# Gamepad"
},
{
"path": "data_collection/histogram.py",
"chars": 392,
"preview": "\"\"\"\nHistogram of turns (for future balancing of data)\n\"\"\"\n\nimport h5py\nimport matplotlib.pyplot as plt\nimport numpy as n"
},
{
"path": "data_collection/img_process.py",
"chars": 4182,
"preview": "\"\"\"\nModule for preprocessing screen captures\n\"\"\"\n\nimport win32gui\nimport win32ui\n\nimport cv2\nimport numpy as np\nimport w"
},
{
"path": "data_collection/key_cap.py",
"chars": 381,
"preview": "# Citation: Box Of Hats (https://github.com/Box-Of-Hats)\n\n\"\"\"\nModule for reading keys from a keyboard\n\"\"\"\n\nimport win32a"
},
{
"path": "drivers.txt",
"chars": 266,
"preview": "# For testing AI an XBox controller emulator is needed\n# https://github.com/shauleiz/ScpVBus/releases\n\nScpVBus\n\n# Instal"
},
{
"path": "driving/drive.py",
"chars": 5601,
"preview": "\"\"\"\nCar driving module.\n\"\"\"\n\n# reading and writing files\nimport os\nimport time\n\nimport cv2\nimport numpy as np\n# load our"
},
{
"path": "driving/gamepad.py",
"chars": 2750,
"preview": "# This code based on Musi13's code (https://github.com/Musi13/pyvxbox)\n\n\"\"\"\nGamepad emulating module.\n\"\"\"\n\nimport sys\nfr"
},
{
"path": "game_plugins.txt",
"chars": 226,
"preview": "### List of plugins used in GTA V\n### for generating better conditions for AI\n\n# allows installation of plugins\nScript H"
},
{
"path": "object_detection/direction.py",
"chars": 164,
"preview": "from enum import Enum\n\n\nclass Direct(Enum):\n STRAIGHT = 0\n LEFT = 1\n RIGHT = 2\n SLIGHTLY_LEFT = 3\n SLIGHT"
},
{
"path": "object_detection/lane_detect.py",
"chars": 10918,
"preview": "import math\n\nimport cv2\nimport numpy as np\n\nfrom data_collection.img_process import grab_screen\n\nprev_lines = [[], [], ["
},
{
"path": "object_detection/object_detect.py",
"chars": 7608,
"preview": "import cv2\nimport numpy as np\nfrom darkflow.net.build import TFNet\nfrom shapely.geometry import box, Polygon\n\nfrom data_"
},
{
"path": "requirements.txt",
"chars": 437,
"preview": "### To install the packages type in the console:\n### pip install -r requirements.txt\n\nnumpy\nopencv-python\n# tensorflow\nt"
},
{
"path": "training/model.py",
"chars": 7272,
"preview": "\"\"\"\nNN model\n\"\"\"\n\nfrom keras.layers import Lambda, Conv2D, Dropout, Dense, Flatten, Concatenate, Input, MaxPooling2D\nfro"
},
{
"path": "training/train.py",
"chars": 5299,
"preview": "# This code based on Siraj Raval's code (https://github.com/llSourcell/How_to_simulate_a_self_driving_car)\n\n\"\"\"\nTraining"
},
{
"path": "training/utils.py",
"chars": 6475,
"preview": "# This code based on Siraj Raval's code (https://github.com/llSourcell/How_to_simulate_a_self_driving_car)\n\nimport math\n"
}
]
// ... and 6 more files (download for full content)
About this extraction
This page contains the full source code of the hadipash/AI_GTA5 GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 24 files (59.8 KB), approximately 17.0k tokens, and a symbol index with 65 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.