Full Code of hadipash/AI_GTA5 for AI

master 424e615419d9 cached
24 files
59.8 KB
17.0k tokens
65 symbols
1 requests
Download .txt
Repository: hadipash/AI_GTA5
Branch: master
Commit: 424e615419d9
Files: 24
Total size: 59.8 KB

Directory structure:
gitextract_4lc496_7/

├── README.md
├── data_collection/
│   ├── data_balancing.py
│   ├── data_collect.py
│   ├── gamepad_cap.py
│   ├── histogram.py
│   ├── img_process.py
│   ├── key_cap.py
│   └── resources/
│       ├── arrows.npy
│       ├── arrows_labels.npy
│       ├── digits.npy
│       └── digits_labels.npy
├── drivers.txt
├── driving/
│   ├── drive.py
│   └── gamepad.py
├── game_plugins.txt
├── object_detection/
│   ├── direction.py
│   ├── lane_detect.py
│   └── object_detect.py
├── requirements.txt
└── training/
    ├── base_model.h5
    ├── model.py
    ├── models/
    │   └── original + radar/
    │       └── base_model.h5
    ├── train.py
    └── utils.py

================================================
FILE CONTENTS
================================================

================================================
FILE: README.md
================================================
# Self-Driving Car for GTA V
### Overview
The aim of this project is to create a self-driving car using a virtual similator (particularly GTA V).

### [Youtube Video](https://www.youtube.com/watch?v=BRK0wm7rrfQ)
<p align="center">
  <img src="https://github.com/hadipash/AI_GTA5/raw/master/demo.gif">
</p>


================================================
FILE: data_collection/data_balancing.py
================================================
import h5py

from data_collection.data_collect import path as source_path

dest_path = "F:\Graduation_Project\\training_data_balanced.h5"

destination = h5py.File(dest_path, 'w')
destination.create_dataset('img', (0, 240, 320, 3), dtype='u1', maxshape=(None, 240, 320, 3), chunks=(30, 240, 320, 3))
destination.create_dataset('controls', (0, 2), dtype='i1', maxshape=(None, 2), chunks=(30, 2))
destination.create_dataset('metrics', (0, 2), dtype='u1', maxshape=(None, 2), chunks=(30, 2))


def save(data_img, controls, metrics):
    if data_img:  # if the list is not empty
        destination["img"].resize((destination["img"].shape[0] + len(data_img)), axis=0)
        destination["img"][-len(data_img):] = data_img
        destination["controls"].resize((destination["controls"].shape[0] + len(controls)), axis=0)
        destination["controls"][-len(controls):] = controls
        destination["metrics"].resize((destination["metrics"].shape[0] + len(metrics)), axis=0)
        destination["metrics"][-len(metrics):] = metrics


def main():
    source = h5py.File(source_path, 'r')
    images = []
    controls = []
    metrics = []

    tuples = 0
    straights = 0
    for i in range(source['img'].shape[0]):
        # if speed is not 0 and not arrived at the destination
        if source['metrics'][i][0] != 0 and source['metrics'][i][1] != 6:
            # save only each 5th straight drive frame
            if source['controls'][i][1] == 0:
                add = (straights % 5 == 0)
                straights += 1
            # save all turns
            else:
                add = True

            if add:
                images.append(source['img'][i])
                controls.append(source['controls'][i])
                metrics.append(source['metrics'][i])
                tuples += 1

                if tuples % 10000 == 0:  # every 2.5 GB
                    print(tuples)
                    save(images, controls, metrics)
                    images = []
                    controls = []
                    metrics = []

    save(images, controls, metrics)
    print("Copied: {:d} tuples from the source file".format(tuples))

    source.close()
    destination.close()


if __name__ == '__main__':
    main()


================================================
FILE: data_collection/data_collect.py
================================================
"""
Data collection module (saves data in H5 format).
Saves screen captures and pressed keys into a file
for further trainings of NN.
"""

import os
import threading
import time
import winsound

import h5py

from data_collection.gamepad_cap import Gamepad
from data_collection.img_process import img_process
from data_collection.key_cap import key_check

lock = threading.Lock()

# open the data file
path = "F:\Graduation_Project\\training_data.h5"
data_file = None
if os.path.isfile(path):
    data_file = h5py.File(path, 'a')
else:
    data_file = h5py.File(path, 'w')
    # Write data in chunks for faster writing and reading by NN
    data_file.create_dataset('img', (0, 240, 320, 3), dtype='u1',
                             maxshape=(None, 240, 320, 3), chunks=(30, 240, 320, 3))
    data_file.create_dataset('controls', (0, 2), dtype='i1', maxshape=(None, 2), chunks=(30, 2))
    data_file.create_dataset('metrics', (0, 2), dtype='u1', maxshape=(None, 2), chunks=(30, 2))


def save(data_img, controls, metrics):
    with lock:  # make sure that data is consistent
        if data_img:  # if the list is not empty
            # last_time = time.time()
            data_file["img"].resize((data_file["img"].shape[0] + len(data_img)), axis=0)
            data_file["img"][-len(data_img):] = data_img
            data_file["controls"].resize((data_file["controls"].shape[0] + len(controls)), axis=0)
            data_file["controls"][-len(controls):] = controls
            data_file["metrics"].resize((data_file["metrics"].shape[0] + len(metrics)), axis=0)
            data_file["metrics"][-len(metrics):] = metrics
            # print('Saving took {} seconds'.format(time.time() - last_time))


def delete(session):
    frames = session if session < 500 else 500
    data_file["img"].resize((data_file["img"].shape[0] - frames), axis=0)
    data_file["controls"].resize((data_file["controls"].shape[0] - frames), axis=0)
    data_file["metrics"].resize((data_file["metrics"].shape[0] - frames), axis=0)


def main():
    # initialize gamepad
    gamepad = Gamepad()
    gamepad.open()

    # last_time = time.time()   # to measure the number of frames
    alert_time = time.time()  # to signal about exceeding speed limit
    close = False  # to exit execution
    pause = True  # to pause execution
    session = 0  # number of frames recorded in one session
    training_img = []  # lists for storing training data
    controls = []
    metrics = []

    print("Press RB on your gamepad to start recording")
    while not close:
        while not pause:
            # read throttle and steering values from the gamepad
            throttle, steering = gamepad.get_state()
            # get screen, speed and direction
            ignore, screen, speed, direction = img_process("Grand Theft Auto V")

            training_img.append(screen)
            controls.append([throttle, steering])
            metrics.append([speed, direction])
            session += 1

            if speed > 60 and time.time() - alert_time > 1:
                winsound.PlaySound('.\\resources\\alert.wav', winsound.SND_ASYNC)
                alert_time = time.time()

            # save the data every 30 iterations
            if len(training_img) % 30 == 0:
                # print("-" * 30 + "Saving" + "-" * 30)
                threading.Thread(target=save, args=(training_img, controls, metrics)).start()
                training_img = []
                controls = []
                metrics = []

            time.sleep(0.015)  # in order to slow down fps
            # print('Main loop took {} seconds'.format(time.time() - last_time))
            # last_time = time.time()

            if gamepad.get_RB():
                pause = True
                print('Paused. Save the last 15 seconds?')

                keys = key_check()
                while ('Y' not in keys) and ('N' not in keys):
                    keys = key_check()

                if 'N' in keys:
                    delete(session)
                    training_img = []
                    controls = []
                    metrics = []
                    print('Deleted.')
                else:
                    print('Saved.')

                print('To exit the program press LB.')
                session = 0
                time.sleep(0.5)

        if gamepad.get_RB():
            pause = False
            print('Unpaused')
            time.sleep(1)
        elif gamepad.get_LB():
            gamepad.close()
            close = True
            print('Saving data and closing the program.')
            save(training_img, controls, metrics)

    data_file.close()


if __name__ == '__main__':
    main()


================================================
FILE: data_collection/gamepad_cap.py
================================================
"""
Module for reading information from an Xbox gamepad
"""

import threading

from inputs import get_gamepad

# Gamepad part
AXIS_MAX = 32767
AXIS_MIN = -32768
TRIGGER_MAX = 255
TRIGGER_MIN = -255

AXIS_MAX_NORM = 10 / AXIS_MAX
AXIS_MIN_NORM = -10 / AXIS_MIN
TRIGGER_MAX_NORM = 10 / TRIGGER_MAX
TRIGGER_MIN_NORM = -10 / TRIGGER_MIN

DEADZONE = 3


class Gamepad:
    def __init__(self):
        self.x_axis = 0
        self.y_axisP = 0
        self.y_axisN = 0
        self.RB = 0
        self.LB = 0
        self.stop = False

    def open(self):
        self.stop = False
        threading.Thread(target=self.run).start()

    def run(self):
        while not self.stop:
            events = get_gamepad()
            for event in events:
                if event.code == "ABS_X":
                    self.x_axis = event.state
                elif event.code == "ABS_RZ":
                    self.y_axisP = event.state
                elif event.code == "ABS_Z":
                    self.y_axisN = -event.state
                elif event.code == "BTN_TR":
                    self.RB = event.state
                elif event.code == "BTN_TL":
                    self.LB = event.state
                else:
                    pass  # we're not interested in the remain signals

    def get_state(self):
        xAxis = self.x_axis
        yAxis = self.y_axisP if self.y_axisP > 60 else self.y_axisN

        # normalize x axis
        if xAxis > 0:
            xAxis = int(round(xAxis * AXIS_MAX_NORM))
        else:
            xAxis = int(round(xAxis * AXIS_MIN_NORM))
        if -DEADZONE < xAxis < DEADZONE:
            xAxis = 0
        # normalize y axis
        if yAxis > 0:
            yAxis = int(round(yAxis * TRIGGER_MAX_NORM))
        else:
            yAxis = int(round(yAxis * TRIGGER_MIN_NORM))
        if -DEADZONE < yAxis < DEADZONE:
            yAxis = 0

        # return throttle and then steering
        return yAxis, xAxis

    def get_RB(self):
        return self.RB

    def get_LB(self):
        return self.LB

    def close(self):
        self.stop = True


================================================
FILE: data_collection/histogram.py
================================================
"""
Histogram of turns (for future balancing of data)
"""

import h5py
import matplotlib.pyplot as plt
import numpy as np

from data_collection.data_collect import path

n_bins = [x - 0.5 for x in range(-10, 12)]

data = h5py.File(path, 'r')

fig, axs = plt.subplots()
axs.hist([d[1] for d in data['controls'][:]], bins=n_bins)

data.close()
plt.xticks(np.arange(-10, 11, step=1))
plt.show()


================================================
FILE: data_collection/img_process.py
================================================
"""
Module for preprocessing screen captures
"""

import win32gui
import win32ui

import cv2
import numpy as np
import win32con


def initKNN(data, labels, shape):
    knn = cv2.ml.KNearest_create()
    train = np.load(data).reshape(-1, shape).astype(np.float32)
    train_labels = np.load(labels)
    knn.train(train, cv2.ml.ROW_SAMPLE, train_labels)
    return knn


knnDigits = initKNN('..\data_collection\\resources\digits.npy',
                    '..\data_collection\\resources\digits_labels.npy', 40)
knnArrows = initKNN('..\data_collection\\resources\\arrows.npy',
                    '..\data_collection\\resources\\arrows_labels.npy', 90)


# Done by Frannecklp
def grab_screen(winName: str = "Grand Theft Auto V"):
    desktop = win32gui.GetDesktopWindow()

    # get area by a window name
    gtawin = win32gui.FindWindow(None, winName)
    # get the bounding box of the window
    left, top, x2, y2 = win32gui.GetWindowRect(gtawin)
    # cut window boarders
    top += 32
    left += 3
    y2 -= 4
    x2 -= 4
    width = x2 - left + 1
    height = y2 - top + 1

    # the device context(DC) for the entire window (title bar, menus, scroll bars, etc.)
    hwindc = win32gui.GetWindowDC(desktop)
    # Create a DC object from an integer handle
    srcdc = win32ui.CreateDCFromHandle(hwindc)
    # Create a memory device context that is compatible with the source DC
    memdc = srcdc.CreateCompatibleDC()
    # Create a bitmap object
    bmp = win32ui.CreateBitmap()
    # Create a bitmap compatible with the specified device context
    bmp.CreateCompatibleBitmap(srcdc, width, height)
    # Select an object into the device context.
    memdc.SelectObject(bmp)
    # Copy a bitmap from the source device context to this device context
    # parameters: destPos, size, dc, srcPos, rop(the raster operation))
    memdc.BitBlt((0, 0), (width, height), srcdc, (left, top), win32con.SRCCOPY)

    # the bitmap bits
    signedIntsArray = bmp.GetBitmapBits(True)
    # form a 1-D array initialized from text data in a string.
    img = np.fromstring(signedIntsArray, dtype='uint8')
    img.shape = (height, width, 4)

    # Delete all resources associated with the device context
    srcdc.DeleteDC()
    memdc.DeleteDC()
    # Releases the device context
    win32gui.ReleaseDC(desktop, hwindc)
    # Delete the bitmap and freeing all system resources associated with the object.
    # After the object is deleted, the specified handle is no longer valid.
    win32gui.DeleteObject(bmp.GetHandle())

    return cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)


def predict(img, knn):
    ret, result, neighbours, dist = knn.findNearest(img, k=1)
    return result


def preprocess(img):
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    thr = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 7, -5)
    return thr


def convert_speed(num1, num2, num3):
    hundreds = 1
    tens = 1
    speed = 0

    if num3[0][0] != 10:
        hundreds = 10
        tens = 10
        speed += int(num3[0][0])
    if num2[0][0] != 10:
        speed += tens * int(num2[0][0])
        hundreds = tens * 10
    if num1[0][0] != 10:
        speed += hundreds * int(num1[0][0])

    return speed


def img_process(winName: str = "Grand Theft Auto V"):
    screen = grab_screen(winName)

    # Ji Hyun's computer
    numbers = preprocess(screen[567:575, 683:702, :])
    # Rustam's computer
    # numbers = preprocess(screen[573:581, 683:702, :])

    # three fields for numbers
    num1 = predict(numbers[:, :5].reshape(-1, 40).astype(np.float32), knnDigits)
    num2 = predict(numbers[:, 7:12].reshape(-1, 40).astype(np.float32), knnDigits)
    num3 = predict(numbers[:, -5:].reshape(-1, 40).astype(np.float32), knnDigits)

    # one field for direction arrows
    # Ji Hyun's computer
    direct = preprocess(screen[561:570, 18:28, :]).reshape(-1, 90).astype(np.float32)
    # Rustam's computer
    # direct = preprocess(screen[567:576, 18:28, :]).reshape(-1, 90).astype(np.float32)
    direct = int(predict(direct, knnArrows)[0][0])

    speed = convert_speed(num1, num2, num3)
    resized = cv2.resize(screen, (320, 240))

    return screen, resized, speed, direct


================================================
FILE: data_collection/key_cap.py
================================================
# Citation: Box Of Hats (https://github.com/Box-Of-Hats)

"""
Module for reading keys from a keyboard
"""

import win32api as wapi

keyList = ["\b"]
for char in "ABCDEFGHIJKLMNOPQRSTUVWXYZ 0123456789,.'£$/\\":
    keyList.append(char)


def key_check():
    keys = []
    for key in keyList:
        if wapi.GetAsyncKeyState(ord(key)):
            keys.append(key)
    return keys


================================================
FILE: drivers.txt
================================================
# For testing AI an XBox controller emulator is needed
# https://github.com/shauleiz/ScpVBus/releases

ScpVBus

# Installation:
# In CMD (administrator): devcon.exe install ScpVBus.inf Root\ScpVBus
# Removal:
# In CMD (administrator): devcon.exe remove Root\ScpVBus


================================================
FILE: driving/drive.py
================================================
"""
Car driving module.
"""

# reading and writing files
import os
import time

import cv2
import numpy as np
# load our saved model
from keras.models import load_model

# helper classes
from data_collection.img_process import img_process
from data_collection.key_cap import key_check
# gamepad axes limits and gamepad module
from driving.gamepad import AXIS_MIN, AXIS_MAX, TRIGGER_MAX, XInputDevice
from object_detection.direction import Direct
# YOLO algorithm
from object_detection.object_detect import yolo_detection
# lane detection algorithm
from object_detection.lane_detect import detect_lane, draw_lane
from training.utils import preprocess

model_path = "..\\training"
gamepad = None


def set_gamepad(controls):
    # trigger value
    trigger = int(round(controls[0][1] * TRIGGER_MAX))
    if trigger >= 0:
        # set left trigger to zero
        gamepad.SetTrigger('L', 0)
        gamepad.SetTrigger('R', trigger)
    else:
        # inverse value
        trigger = -trigger
        # set right trigger to zero
        gamepad.SetTrigger('L', trigger)
        gamepad.SetTrigger('R', 0)

    # axis value
    axis = 0
    if controls[0][0] >= 0:
        axis = int(round(controls[0][0] * AXIS_MAX))
    else:
        axis = int(round(controls[0][0] * (-AXIS_MIN)))
    gamepad.SetAxis('X', axis)


def drive(model):
    global gamepad
    gamepad = XInputDevice(1)
    gamepad.PlugIn()

    # last_time = time.time()  # to measure the number of frames
    close = False  # to exit execution
    pause = True  # to pause execution
    stop = False    # to stop the car
    throttle = 0
    left_line_max = 75
    right_line_max = 670

    print("Press T to start driving")

    while not close:
        yolo_screen, resized, speed, direct = img_process("Grand Theft Auto V")
        cv2.imshow("Driving-mode", yolo_screen)
        cv2.waitKey(1)

        while not pause:
            # apply the preprocessing
            screen, resized, speed, direct = img_process("Grand Theft Auto V")
            radar = cv2.cvtColor(resized[206:226, 25:45, :], cv2.COLOR_RGB2BGR)[:, :, 2:3]
            resized = preprocess(resized)
            left_line_color = [0, 255, 0]
            right_line_color = [0, 255, 0]

            # predict steering angle for the image
            # original + radar (small) + speed
            controls = model.predict([np.array([resized]), np.array([radar]), np.array([speed])], batch_size=1)
            # check that the car is following lane
            lane, stop_line = detect_lane(screen)
            # detect objects
            yolo_screen, color_detected, obj_distance = yolo_detection(screen, direct)

            if not stop:
                # adjusting speed
                if speed < 45:
                    throttle = 0.4
                elif speed > 50:
                    throttle = 0.0

                if 0 <= obj_distance <= 0.6:
                    if speed < 5:
                        throttle = 0
                    else:
                        throttle = -0.7 if obj_distance <= 0.4 else -0.3

                elif color_detected == "Red":
                    if stop_line:
                        if speed < 5:
                            throttle = 0
                        elif 0 <= stop_line[0][1] <= 50:
                            throttle = -0.5
                        elif 50 < stop_line[0][1] <= 120:
                            throttle = -1
                    # else:
                    #     throttle = -0.5
            elif speed > 5:
                throttle = -1
            else:
                throttle = 0
                cv2.destroyAllWindows()
                pause = True

            # adjusting steering angle
            if lane[0] and lane[0][0] > left_line_max:
                if abs(controls[0][0]) < 0.27:
                    controls[0][0] = 0.27
                    left_line_color = [0, 0, 255]
            elif lane[1] and lane[1][0] < right_line_max:
                if abs(controls[0][0]) < 0.27:
                    controls[0][0] = -0.27
                    right_line_color = [0, 0, 255]

            # set the gamepad values
            set_gamepad([[controls[0][0], throttle]])

            # print('Main loop took {} seconds'.format(time.time() - last_time))
            # last_time = time.time()

            screen[280:-130, :, :] = draw_lane(screen[280:-130, :, :], lane, stop_line,
                                               left_line_color, right_line_color)
            cv2.imshow("Driving-mode", yolo_screen)
            cv2.waitKey(1)

            if direct == 6:
                print("Arrived at destination.")
                stop = True

            # print('Main loop took {} seconds'.format(time.time() - last_time))
            # last_time = time.time()

            keys = key_check()
            if 'T' in keys:
                cv2.destroyAllWindows()
                pause = True
                # release gamepad keys
                set_gamepad([[0, 0]])
                print('Paused. To exit the program press Z.')
                time.sleep(0.5)

        keys = key_check()
        if 'T' in keys:
            pause = False
            stop = False
            print('Unpaused')
            time.sleep(1)
        elif 'Z' in keys:
            cv2.destroyAllWindows()
            close = True
            print('Closing the program.')
            gamepad.UnPlug()


def main():
    # load model
    location = os.path.join(model_path, 'base_model.h5')
    model = load_model(location)
    # control a car
    drive(model)


if __name__ == '__main__':
    main()


================================================
FILE: driving/gamepad.py
================================================
# This code based on Musi13's code (https://github.com/Musi13/pyvxbox)

"""
Gamepad emulating module.
"""

import sys
from ctypes import *

dll_path = "vXboxInterface.dll"

try:
    _vx = cdll.LoadLibrary(dll_path)
except OSError as e:
    print(e)
    sys.exit("Unable to load vXbox SDK DLL. Ensure that %s is present" % dll_path)

if not _vx.isVBusExists():
    raise Exception('Xbox VBus does not exist')

AXIS_MAX = 32767
AXIS_MIN = -32768
TRIGGER_MAX = 255
BTN_ON = True
BTN_OFF = False


class XInputDevice:
    def __init__(self, port):
        if _vx.isControllerExists(port):
            raise Exception('Port %d is already used' % port)
        self.UserIndex = port

    def PlugIn(self):
        _vx.PlugIn(self.UserIndex)

    def UnPlug(self, force=False):
        if not force:
            _vx.UnPlug(self.UserIndex)
        else:
            _vx.UnPlugForce(self.UserIndex)

    def SetBtn(self, button, value):
        function = {
            'A': _vx.SetBtnA,
            'B': _vx.SetBtnB,
            'X': _vx.SetBtnX,
            'Y': _vx.SetBtnY,
            'Start': _vx.SetBtnStart,
            'Back': _vx.SetBtnBack,
            'LT': _vx.SetBtnLT,
            'RT': _vx.SetBtnRT,
            'LB': _vx.SetBtnLB,
            'RB': _vx.SetBtnRB,
            'GD': _vx.SetBtnGD
        }.get(button, None)
        if function is None:
            raise Exception('Unknown button %s' % str(button))
        function(self.UserIndex, value)

    def SetTrigger(self, trigger, value):
        function = {
            'L': _vx.SetTriggerL,
            'R': _vx.SetTriggerR
        }.get(trigger, None)
        if function is None:
            raise Exception('Unknown trigger %s' % str(trigger))
        function(self.UserIndex, value)

    def SetAxis(self, axis, value):
        function = {
            'X': _vx.SetAxisX,
            'Y': _vx.SetAxisY,
            'Rx': _vx.SetAxisRx,
            'Ry': _vx.SetAxisRy
        }.get(axis, None)
        if function is None:
            raise Exception('Unknown axis %s' % str(axis))
        function(self.UserIndex, value)

    def SetDpad(self, direction, value=0):
        function = {
            'Up': _vx.SetDpadUp,
            'Right': _vx.SetDpadRight,
            'Down': _vx.SetDpadDown,
            'Left': _vx.SetDpadLeft,
            '': _vx.SetDpad
        }.get(direction, None)
        if function is None:
            raise Exception('Unknown direction %s' % str(direction))
        if direction == '':
            function(self.UserIndex, value)
        else:
            function(self.UserIndex)

    def GetLedNumber(self, pLed):
        _vx.GetLedNumber(self.UserIndex, pLed)

    def GetVibration(self, pVib):
        _vx.GetVibration(self.UserIndex, pVib)


================================================
FILE: game_plugins.txt
================================================
### List of plugins used in GTA V
### for generating better conditions for AI

# allows installation of plugins
Script Hook V
# for adjusting weather conditions, time, amount of car, pedestrians, etc.
Simple Trainer for GTA V


================================================
FILE: object_detection/direction.py
================================================
from enum import Enum


class Direct(Enum):
    STRAIGHT = 0
    LEFT = 1
    RIGHT = 2
    SLIGHTLY_LEFT = 3
    SLIGHTLY_RIGHT = 4
    U_TURN = 5
    ARRIVED = 6


================================================
FILE: object_detection/lane_detect.py
================================================
import math

import cv2
import numpy as np

from data_collection.img_process import grab_screen

prev_lines = [[], [], []]


def crop(image):
    """
    Crop the image (removing the sky at the top and the car front at the bottom)
    """
    return image[280:-130, :, :]


def grayscale(img):
    """
    Applies the Grayscale transform
    This will return an image with only one color channel
    """
    return cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)


def canny(img, low_threshold=100, high_threshold=300):
    """
    Applies the Canny transform
    """
    return cv2.Canny(img, low_threshold, high_threshold)


def gaussian_blur(img, kernel_size):
    """
    Applies a Gaussian Noise kernel
    """
    return cv2.GaussianBlur(img, (kernel_size, kernel_size), sigmaX=30, sigmaY=30)


def region_of_interest(img, vertices):
    """
    Applies an image mask.

    Only keeps the region of the image defined by the polygon
    formed from `vertices`. The rest of the image is set to black.
    `vertices` should be a numpy array of integer points.
    """
    # defining a blank mask to start with
    mask = np.zeros_like(img)

    # defining a 3 channel or 1 channel color to fill the mask with depending on the input image
    if len(img.shape) > 2:
        channel_count = img.shape[2]  # i.e. 3 or 4 depending on your image
        ignore_mask_color = (255,) * channel_count
    else:
        ignore_mask_color = 255

    # filling pixels inside the polygon defined by "vertices" with the fill color
    cv2.fillPoly(mask, vertices, ignore_mask_color)

    # returning the image only where mask pixels are nonzero
    masked_image = cv2.bitwise_and(img, mask)
    return masked_image


def construct_lane(lines):
    """
    NOTE: this is the function you might want to use as a starting point once you want to
    average/extrapolate the line segments you detect to map out the full
    extent of the lane (going from the result shown in raw-lines-example.mp4
    to that shown in P1_example.mp4).

    Think about things like separating line segments by their
    slope ((y2-y1)/(x2-x1)) to decide which segments are part of the left
    line vs. the right line.  Then, you can average the position of each of
    the lines and extrapolate to the top and bottom of the lane.

    This function draws `lines` with `color` and `thickness`.
    Lines are drawn on the image inplace (mutates the image).
    If you want to make the lines semi-transparent, think about combining
    this function with the add_images() function below
    """
    left_line_x = []
    left_line_y = []
    right_line_x = []
    right_line_y = []
    stop_line_x_first = []
    stop_line_y_first = []
    stop_line_x_second = []
    stop_line_y_second = []

    lane = [[], []]
    stop_line = []

    min_y = 0
    max_y = 190

    if lines is not None:
        for line in lines:
            for x1, y1, x2, y2 in line:
                slope = (y2 - y1) / (x2 - x1) if x1 != x2 else 0  # <-- Calculating the slope.
                if 0.05 < math.fabs(slope) < 0.3:  # not interested
                    continue
                if math.fabs(slope) <= 0.05:  # stop line
                    if (y1 > 20) and (y2 > 20):
                        # we need to detect two stop lines (top and bottom)
                        if not stop_line_x_first or abs(stop_line_y_first[0] - y1) < 15:
                            stop_line_x_first.extend([x1, x2])
                            stop_line_y_first.extend([y1, y2])
                        else:
                            stop_line_x_second.extend([x1, x2])
                            stop_line_y_second.extend([y1, y2])
                elif slope <= 0:  # <-- If the slope is negative, left group.
                    left_line_x.extend([x1, x2])
                    left_line_y.extend([y1, y2])
                else:  # <-- Otherwise, right group.
                    right_line_x.extend([x1, x2])
                    right_line_y.extend([y1, y2])

        offset = 7
        if left_line_x:
            poly_left = np.poly1d(np.polyfit(
                left_line_y,
                left_line_x,
                deg=1
            ))

            x1 = int(poly_left(max_y))
            x2 = int(poly_left(min_y))
            if prev_lines[0]:
                # recalculate x1
                if abs(x1 - prev_lines[0][0]) > offset:
                    x1 = prev_lines[0][0] - offset if prev_lines[0][0] > x1 else prev_lines[0][0] + offset
                # recalculate x2
                if abs(x2 - prev_lines[0][1]) > offset:
                    x2 = prev_lines[0][1] - offset if prev_lines[0][1] > x2 else prev_lines[0][1] + offset

            prev_lines[0] = [x1, x2]
            lane[0] = [x1, max_y, x2, min_y]
        elif prev_lines[0]:
            lane[0] = [prev_lines[0][0], max_y, prev_lines[0][1], min_y]
            prev_lines[0] = []

        if right_line_x:
            poly_right = np.poly1d(np.polyfit(
                right_line_y,
                right_line_x,
                deg=1
            ))

            x1 = int(poly_right(max_y))
            x2 = int(poly_right(min_y))
            if prev_lines[1]:
                # recalculate x1
                if abs(x1 - prev_lines[1][0]) > offset:
                    x1 = prev_lines[1][0] - offset if prev_lines[1][0] > x1 else prev_lines[1][0] + offset
                # recalculate x2
                if abs(x2 - prev_lines[1][1]) > offset:
                    x2 = prev_lines[1][1] - offset if prev_lines[1][1] > x2 else prev_lines[1][1] + offset

            prev_lines[1] = [x1, x2]
            lane[1] = [x1, max_y, x2, min_y]
        elif prev_lines[1]:
            lane[1] = [prev_lines[1][0], max_y, prev_lines[1][1], min_y]
            prev_lines[1] = []

        if stop_line_x_second:
            poly_stop = np.poly1d(np.polyfit(
                stop_line_x_first,
                stop_line_y_first,
                deg=1
            ))

            y1 = int(poly_stop(50))
            y2 = int(poly_stop(750))
            if prev_lines[2]:
                # recalculate y1
                if abs(y1 - prev_lines[2][0]) > offset:
                    y1 = prev_lines[2][0] - offset if prev_lines[2][0] > y1 else prev_lines[2][0] + offset
                # recalculate y2
                if abs(y2 - prev_lines[2][1]) > offset:
                    y2 = prev_lines[2][1] - offset if prev_lines[2][1] > y2 else prev_lines[2][1] + offset

            prev_lines[2] = [y1, y2]
            stop_line.append([50, y1, 750, y2])
        elif prev_lines[2]:
            stop_line.append([50, prev_lines[2][0], 750, prev_lines[2][1]])
            prev_lines[2] = []

    return lane, stop_line


def hough_lines(img, rho=6, theta=np.pi / 120, threshold=160, min_line_len=60, max_line_gap=10):
    """
    `img` should be the output of a Canny transform.

    Returns an image with hough lines drawn.
    """
    lines = cv2.HoughLinesP(img, rho, theta, threshold, np.array([]), minLineLength=min_line_len,
                            maxLineGap=max_line_gap)
    return lines


# Python 3 has support for cool math symbols.
def add_images(img, initial_img):
    """
    `img` is the output of the hough_lines(), An image with lines drawn on it.
    Should be a blank image (all black) with lines drawn on it.

    `initial_img` should be the image before any processing.

    The result image is computed as follows:

    initial_img * α + img * β + γ
    NOTE: initial_img and img must be the same shape!
    """
    return cv2.add(initial_img, img)


def draw_lane(original_img, lane, stop_line, left_color, right_color, thickness=5):
    img = np.zeros((original_img.shape[0], original_img.shape[1], 3), dtype=np.uint8)
    polygon_points = None
    offset_from_lane_edge = 8

    # draw lane lines
    if lane[0]:
        for x1, y1, x2, y2 in [lane[0]]:
            cv2.line(img, (int(x1), int(y1)), (int(x2), int(y2)), left_color, thickness)
    if lane[1]:
        for x1, y1, x2, y2 in [lane[1]]:
            cv2.line(img, (int(x1), int(y1)), (int(x2), int(y2)), right_color, thickness)

    # color the lane
    if lane[0] and lane[1]:
        lane_color = [40, 60, 0]
        for x1, y1, x2, y2 in [lane[0]]:
            p1 = (x1 + offset_from_lane_edge, y1)
            p2 = (x2 + offset_from_lane_edge, y2)

        for x1, y1, x2, y2 in [lane[1]]:
            p3 = (x2 - offset_from_lane_edge, y2)
            p4 = (x1 - offset_from_lane_edge, y1)

        polygon_points = np.array([[p1, p2, p3, p4]], np.int32)
        cv2.fillPoly(img, polygon_points, lane_color)

    # draw stop line
    if stop_line:
        for x1, y1, x2, y2 in stop_line:
            cv2.line(img, (int(x1), int(y1)), (int(x2), int(y2)), [0, 0, 255], thickness * 3)
            if polygon_points is not None:
                for px1, py1, px2, py2 in [lane[0]]:
                    p1 = (px1 - offset_from_lane_edge, py1)
                    p2 = (px2 - offset_from_lane_edge, py2)

                for px1, py1, px2, py2 in [lane[1]]:
                    p3 = (px2 + offset_from_lane_edge, py2)
                    p4 = (px1 + offset_from_lane_edge, py1)

                polygon_points = np.array([[p1, p2, p3, p4]], np.int32)

                img = region_of_interest(img, polygon_points)

    return add_images(img, original_img)


def detect_lane(screen):
    # 0. Crop the image
    image = crop(screen)
    # 1. convert to gray
    image = grayscale(image)
    # 2. apply gaussian filter
    image = gaussian_blur(image, 7)
    # 3. canny
    image = canny(image, 50, 100)
    # 4. ROI
    image = region_of_interest(image, np.array([[(0, 190), (0, 70), (187, 0),
                                                 (613, 0), (800, 70), (800, 190)]], np.int32))
    # 5. Hough lines
    lines = hough_lines(image)
    # 6. construct lane
    return construct_lane(lines)


def main():
    while True:
        original_img = grab_screen()
        # 1. convert to gray
        image = grayscale(crop(original_img))
        # 2. apply gaussian filter
        image = gaussian_blur(image, 7)
        # 3. canny
        image = canny(image, 50, 100)
        # 4. ROI
        image = region_of_interest(image, np.array([[(0, 190), (0, 70), (187, 0),
                                                     (613, 0), (800, 70), (800, 190)]], np.int32))
        # 5. Hough lines
        lines = hough_lines(image)
        # 6. construct lane
        lane, stop_line = construct_lane(lines)
        # 7. Place lane detection output on the original image
        original_img[280:-130, :, :] = draw_lane(original_img[280:-130, :, :], lane, stop_line, [0, 255, 0],
                                                 [0, 255, 0])

        cv2.imshow("Frame", original_img)
        key = cv2.waitKey(1) & 0xFF
        if key == ord("q"):
            cv2.destroyAllWindows()
            break


if __name__ == '__main__':
    main()


================================================
FILE: object_detection/object_detect.py
================================================
import cv2
import numpy as np
from darkflow.net.build import TFNet
from shapely.geometry import box, Polygon

from data_collection.img_process import grab_screen
from object_detection.direction import Direct

# set YOLO options
options = {
    'model': 'cfg/yolo.cfg',
    'load': 'yolov2.weights',
    'threshold': 0.3,
    'gpu': 0.5
}
tfnet = TFNet(options)

# capture = cv2.VideoCapture('gta2.mp4')
t = (0, 0, 0)
colors = [tuple(255 * np.random.rand(3)) for i in range(5)]
colors2 = [tuple(t) for j in range(15)]


def light_recog(frame, direct, traffic_lights):
    traffic_light = traffic_lights[0]

    # find out which traffic light to follow, if there are several
    if len(traffic_lights) > 1:
        # if we need to go to the right
        if direct == Direct.RIGHT or direct == Direct.SLIGHTLY_RIGHT:
            for tl in traffic_lights:
                if tl['topleft']['x'] > traffic_light['topleft']['x']:
                    traffic_light = tl
        # straight or left
        else:
            for tl in traffic_lights:
                if tl['topleft']['x'] < traffic_light['topleft']['x']:
                    traffic_light = tl

    # coordinates of the traffic light
    top_left = (traffic_light['topleft']['x'], traffic_light['topleft']['y'])
    bottom_right = (traffic_light['bottomright']['x'], traffic_light['bottomright']['y'])
    # crop the frame to the traffic light
    roi = frame[top_left[1]:bottom_right[1], top_left[0]:bottom_right[0]]
    hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
    color_detected = ''

    # possible color ranges for traffic lights
    red_lower = np.array([136, 87, 111], dtype=np.uint8)
    red_upper = np.array([180, 255, 255], dtype=np.uint8)

    yellow_lower = np.array([22, 60, 200], dtype=np.uint8)
    yellow_upper = np.array([60, 255, 255], dtype=np.uint8)

    green_lower = np.array([50, 100, 100], dtype=np.uint8)
    green_upper = np.array([70, 255, 255], dtype=np.uint8)

    # find what color the traffic light is showing
    red = cv2.inRange(hsv, red_lower, red_upper)
    yellow = cv2.inRange(hsv, yellow_lower, yellow_upper)
    green = cv2.inRange(hsv, green_lower, green_upper)

    kernel = np.ones((5, 5), np.uint8)

    red = cv2.dilate(red, kernel)
    res = cv2.bitwise_and(roi, roi, mask=red)
    green = cv2.dilate(green, kernel)
    res2 = cv2.bitwise_and(roi, roi, mask=green)

    (_, contours, hierarchy) = cv2.findContours(red, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    for contour in enumerate(contours):
        color_detected = "Red"

    (_, contours, hierarchy) = cv2.findContours(yellow, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    for contour in enumerate(contours):
        color_detected = "Yellow"

    (_, contours, hierarchy) = cv2.findContours(green, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    for contour in enumerate(contours):
        color_detected = "Green"

    if (0 <= top_left[1] and bottom_right[1] <= 437) and (244 <= top_left[0] and bottom_right[0] <= 630):
        frame = cv2.putText(frame, color_detected, bottom_right, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)

    frame = cv2.putText(frame, color_detected, bottom_right, cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)

    return frame, color_detected


def distance_to_car(frame, top_left, bottom_right):
    distance = None

    # myRoi_array= np.array([[(0, 490), (309, 269), (490, 270), (800,473)]])
    # process_img = region_of_interest(frame, myRoi_array)
    # cv2.imshow("precess_img", process_img)

    # roi = Polygon([(15, 472), (330, 321), (470, 321), (796, 495)])
    roi = Polygon([(100, 470), (350, 280), (450, 280), (700, 470)])
    car = box(top_left[0], top_left[1], bottom_right[0], bottom_right[1])

    if roi.intersects(car):
        mid_x = (bottom_right[0] + top_left[0]) / 2
        mid_y = (top_left[1] + bottom_right[1]) / 2
        distance = round((1 - ((bottom_right[0] / 800) - (top_left[0] / 800))) ** 4, 1)
        frame = cv2.putText(frame, '{}'.format(distance), (int(mid_x), int(mid_y)), cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                            (255, 255, 255), 2)
        cv2.putText(frame[top_left[1]:bottom_right[1], top_left[0]:bottom_right[0]],
                    'WARNING!', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 3)

    return frame, distance


def distance_to_human(frame, top_left, bottom_right):
    distance = None

    roi = Polygon([(90, 470), (350, 280), (450, 280), (700, 470)])
    person = box(top_left[0], top_left[1], bottom_right[0], bottom_right[1])

    if roi.intersects(person):
        mid_x = (bottom_right[0] + top_left[0]) / 2
        mid_y = (top_left[1] + bottom_right[1]) / 2
        distance = round((1 - ((bottom_right[0] / 800) - (top_left[0] / 800))) ** 15, 1)
        frame = cv2.putText(frame, '{}'.format(distance), (int(mid_x), int(mid_y)), cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                            (255, 255, 255), 2)
        cv2.putText(frame[top_left[1]:bottom_right[1], top_left[0]:bottom_right[0]],
                    'WARNING!', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 3)

    return frame, distance


def yolo_detection(screen, direct):
    # find objects on a frame by using YOLO
    results = tfnet.return_predict(screen[:-130, :, :])
    # create a list of detected traffic lights (might be several on a frame)
    traffic_lights = []
    color_detected = None
    distance = 1

    for color, color2, result in zip(colors, colors2, results):
        top_left = (result['topleft']['x'], result['topleft']['y'])
        bottom_right = (result['bottomright']['x'], result['bottomright']['y'])
        label = result['label']
        confidence = result['confidence']
        text = '{}: {:.0f}%'.format(label, confidence * 100)

        if label == 'traffic light' and confidence > 0.3:
            if 220 <= result['topleft']['x'] <= 630:
                traffic_lights.append(result)

            color = color2
            screen = cv2.rectangle(screen, top_left, bottom_right, color, 6)
            screen = cv2.putText(screen, text, top_left, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)

        if label == 'car' or label == 'bus' or label == 'truck' or label == 'train':
            screen, car_distance = distance_to_car(screen, top_left, bottom_right)

            if car_distance and 0 <= car_distance < distance:
                distance = car_distance

            screen = cv2.rectangle(screen, top_left, bottom_right, color, 6)
            screen = cv2.putText(screen, text, top_left, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)

        if label == 'person':
            screen, person_distance = distance_to_human(screen, top_left, bottom_right)

            if person_distance and 0 <= person_distance < distance:
                distance = person_distance

            screen = cv2.rectangle(screen, top_left, bottom_right, color, 6)
            screen = cv2.putText(screen, text, top_left, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)

    if traffic_lights:
        screen, color_detected = light_recog(screen, direct, traffic_lights)

    return screen, color_detected, distance


def main():
    while True:
        screen = grab_screen()
        screen, color_detected, obj_distance = yolo_detection(screen, 0)

        if color_detected:
            print("Color detected: " + color_detected)
        if obj_distance != 1:
            print("Distance to obstacle: {}".format(obj_distance))

        cv2.imshow("Frame", screen)
        key = cv2.waitKey(1) & 0xFF
        if key == ord("q"):
            cv2.destroyAllWindows()
            break


if __name__ == '__main__':
    main()


================================================
FILE: requirements.txt
================================================
### To install the packages type in the console:
### pip install -r requirements.txt

numpy
opencv-python
# tensorflow
tensorflow-gpu

# Python for Window Extensions
pywin32
# For data management
h5py
# A high-level neural networks API capable of running on top of TensorFlow
Keras
# Tools for data mining and data analysis
scikit-learn
# To read information from a gamepad
inputs
# for object detection module
Shapely
# for YOLO
Cython


================================================
FILE: training/model.py
================================================
"""
NN model
"""

from keras.layers import Lambda, Conv2D, Dropout, Dense, Flatten, Concatenate, Input, MaxPooling2D
from keras.models import Model

from training.utils import INPUT_SHAPE, RADAR_SHAPE


# original Nvidia model
# def build_model(args):
#     """
#     NVIDIA model used
#     Image normalization to avoid saturation and make gradients work better.
#     Convolution: 5x5, filter: 24, strides: 2x2, activation: ELU
#     Convolution: 5x5, filter: 36, strides: 2x2, activation: ELU
#     Convolution: 5x5, filter: 48, strides: 2x2, activation: ELU
#     Convolution: 3x3, filter: 64, strides: 1x1, activation: ELU
#     Convolution: 3x3, filter: 64, strides: 1x1, activation: ELU
#     Drop out (0.5)
#     Fully connected: neurons: 100, activation: ELU
#     Fully connected: neurons: 50, activation: ELU
#     Fully connected: neurons: 10, activation: ELU
#     Fully connected: neurons: 1 (output)
#     # the convolution layers are meant to handle feature engineering
#     the fully connected layer for predicting the steering angle.
#     dropout avoids overfitting
#     ELU(Exponential linear unit) function takes care of the Vanishing gradient problem.
#     """
#     model = Sequential()
#     model.add(Lambda(lambda x: x / 127.5 - 1.0, input_shape=INPUT_SHAPE))
#     model.add(Conv2D(24, (5, 5), activation='elu', strides=(2, 2)))
#     model.add(Conv2D(36, (5, 5), activation='elu', strides=(2, 2)))
#     model.add(Conv2D(48, (5, 5), activation='elu', strides=(2, 2)))
#     model.add(Conv2D(64, (3, 3), activation='elu'))
#     model.add(Conv2D(64, (3, 3), activation='elu'))
#     model.add(Dropout(args.keep_prob))
#     model.add(Flatten())
#     model.add(Dense(100, activation='elu'))
#     model.add(Dense(50, activation='elu'))
#     model.add(Dense(10, activation='elu'))
#     model.add(Dense(1))
#     model.summary()
#
#     return model


# original + radar added
# def build_model(args):
#     # image model
#     img_input = Input(shape=INPUT_SHAPE)
#     img_model = (Lambda(lambda x: x / 127.5 - 1.0, input_shape=INPUT_SHAPE))(img_input)
#     img_model = (Conv2D(24, (5, 5), activation='elu', strides=(2, 2)))(img_model)
#     img_model = (Conv2D(36, (5, 5), activation='elu', strides=(2, 2)))(img_model)
#     img_model = (Conv2D(48, (5, 5), activation='elu', strides=(2, 2)))(img_model)
#     img_model = (Conv2D(64, (3, 3), activation='elu'))(img_model)
#     img_model = (Conv2D(64, (3, 3), activation='elu'))(img_model)
#     img_model = (Dropout(args.keep_prob))(img_model)
#     img_model = (Flatten())(img_model)
#     img_model = (Dense(100, activation='elu'))(img_model)
#
#     # radar model
#     radar_input = Input(shape=RADAR_SHAPE)
#     radar_model = (Conv2D(10, (5, 5), activation='elu'))(radar_input)
#     radar_model = (MaxPooling2D((2, 2)))(radar_model)
#     radar_model = (Conv2D(20, (5, 5), activation='elu'))(radar_model)
#     radar_model = (MaxPooling2D((2, 2)))(radar_model)
#     radar_model = (Dropout(args.keep_prob / 2))(radar_model)
#     radar_model = (Flatten())(radar_model)
#     radar_model = (Dense(30, activation='elu'))(radar_model)
#
#     # combined model
#     out = Concatenate()([img_model, radar_model])
#     out = (Dense(50, activation='elu'))(out)
#     out = (Dense(10, activation='elu'))(out)
#     out = (Dense(1))(out)
#
#     final_model = Model(inputs=[img_input, radar_input], outputs=out)
#     final_model.summary()
#
#     return final_model


# original + radar and speed info added
def build_model(args):
    # image model
    img_input = Input(shape=INPUT_SHAPE)
    img_model = (Lambda(lambda x: x / 127.5 - 1.0, input_shape=INPUT_SHAPE))(img_input)
    img_model = (Conv2D(24, (5, 5), activation='elu', strides=(2, 2)))(img_model)
    img_model = (Conv2D(36, (5, 5), activation='elu', strides=(2, 2)))(img_model)
    img_model = (Conv2D(48, (5, 5), activation='elu', strides=(2, 2)))(img_model)
    img_model = (Conv2D(64, (3, 3), activation='elu'))(img_model)
    img_model = (Conv2D(64, (3, 3), activation='elu'))(img_model)
    img_model = (Dropout(args.keep_prob))(img_model)
    img_model = (Flatten())(img_model)
    img_model = (Dense(100, activation='elu'))(img_model)

    # radar model
    radar_input = Input(shape=RADAR_SHAPE)
    radar_model = (Conv2D(32, (5, 5), activation='elu'))(radar_input)
    radar_model = (MaxPooling2D((2, 2), strides=(2, 2)))(radar_model)
    radar_model = (Conv2D(64, (5, 5), activation='elu'))(radar_model)
    radar_model = (MaxPooling2D((2, 2), strides=(2, 2)))(radar_model)
    radar_model = (Dropout(args.keep_prob / 2))(radar_model)
    radar_model = (Flatten())(radar_model)
    radar_model = (Dense(10, activation='elu'))(radar_model)

    # speed
    speed_input = Input(shape=(1,))

    # combined model
    out = Concatenate()([img_model, radar_model])
    out = (Dense(50, activation='elu'))(out)
    out = Concatenate()([out, speed_input])
    out = (Dense(10, activation='elu'))(out)
    out = (Dense(1))(out)

    final_model = Model(inputs=[img_input, radar_input, speed_input], outputs=out)
    final_model.summary()

    return final_model

# original + throttle control
# def build_model(args):
#     """
#     NVIDIA model used
#     Image normalization to avoid saturation and make gradients work better.
#     Convolution: 5x5, filter: 24, strides: 2x2, activation: ELU
#     Convolution: 5x5, filter: 36, strides: 2x2, activation: ELU
#     Convolution: 5x5, filter: 48, strides: 2x2, activation: ELU
#     Convolution: 3x3, filter: 64, strides: 1x1, activation: ELU
#     Convolution: 3x3, filter: 64, strides: 1x1, activation: ELU
#     Drop out (0.5)
#     Fully connected: neurons: 100, activation: ELU
#     Fully connected: neurons: 50, activation: ELU
#     Fully connected: neurons: 10, activation: ELU
#     Fully connected: neurons: 1 (output)
#     # the convolution layers are meant to handle feature engineering
#     the fully connected layer for predicting the steering angle.
#     dropout avoids overfitting
#     ELU(Exponential linear unit) function takes care of the Vanishing gradient problem.
#     """
#     # image model
#     img_input = Input(shape=INPUT_SHAPE)
#     img_model = (Lambda(lambda x: x / 127.5 - 1.0, input_shape=INPUT_SHAPE))(img_input)
#     img_model = (Conv2D(24, (5, 5), activation='elu', strides=(2, 2)))(img_model)
#     img_model = (Conv2D(36, (5, 5), activation='elu', strides=(2, 2)))(img_model)
#     img_model = (Conv2D(48, (5, 5), activation='elu', strides=(2, 2)))(img_model)
#     img_model = (Conv2D(64, (3, 3), activation='elu'))(img_model)
#     img_model = (Conv2D(64, (3, 3), activation='elu'))(img_model)
#     img_model = (Dropout(args.keep_prob))(img_model)
#     img_model = (Flatten())(img_model)
#     img_model = (Dense(100, activation='elu'))(img_model)
#
#     # speed and direction model
#     metrics_input = Input(shape=(2,))
#     metrics_model = Dense(2, activation='elu')(metrics_input)
#
#     # combined model
#     out = Concatenate()([img_model, metrics_model])
#     out = (Dense(50, activation='elu'))(out)
#     out = (Dense(10, activation='elu'))(out)
#     out = (Dense(2))(out)
#
#     final_model = Model(inputs=[img_input, metrics_input], outputs=out)
#     final_model.summary()
#
#     return final_model


================================================
FILE: training/train.py
================================================
# This code based on Siraj Raval's code (https://github.com/llSourcell/How_to_simulate_a_self_driving_car)

"""
Training module. Based on "End to End Learning for Self-Driving Cars" research paper by Nvidia.
"""

import argparse

import h5py
import numpy as np
from keras.callbacks import ModelCheckpoint
from keras.models import load_model
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split  # to split out training and testing data

# path with training files
from data_collection.data_collect import path
from training.model import build_model
# helper class
from training.utils import batch_generator

# for debugging, allows for reproducible (deterministic) results
np.random.seed(0)


def load_data(args):
    """
    Load training data and split it into training and validation set
    """
    data = h5py.File(path, 'r')
    # list of all possible indexes
    indexes = list(range(data['img'].shape[0]))
    # split the data into a training (80), testing(20), and validation set
    indexes_train, indexes_valid = train_test_split(indexes, test_size=args.test_size, random_state=0)

    return data, indexes_train, indexes_valid


def load_weights(model):
    """
    Load weights from previously trained model
    """
    prev_model = load_model("..\\training\\base_model.h5")
    model.set_weights(prev_model.get_weights())

    return model


def train_model(model, args, data, indexes_train, indexes_valid):
    """
    Train the model
    """
    # Saves the model after every epoch.
    # quantity to monitor, verbosity i.e logging mode (0 or 1),
    # if save_best_only is true the latest best model according to the quantity monitored will not be overwritten.
    # mode: one of {auto, min, max}. If save_best_only=True, the decision to overwrite the current save file is
    # made based on either the maximization or the minimization of the monitored quantity. For val_acc,
    # this should be max, for val_loss this should be min, etc. In auto mode, the direction is automatically
    # inferred from the name of the monitored quantity.
    checkpoint = ModelCheckpoint('model-{epoch:03d}.h5',
                                 monitor='val_loss',
                                 verbose=0,
                                 save_best_only=args.save_best_only,
                                 mode='auto')

    # calculate the difference between expected steering angle and actual steering angle
    # square the difference
    # add up all those differences for as many data points as we have
    # divide by the number of them
    # that value is our mean squared error! this is what we want to minimize via
    # gradient descent
    model.compile(loss='mean_squared_error', optimizer=Adam(lr=args.learning_rate))

    # Fits the model on data generated batch-by-batch by a Python generator.

    # The generator is run in parallel to the model, for efficiency.
    # For instance, this allows you to do real-time data augmentation on images on CPU in
    # parallel to training your model on GPU.
    # so we reshape our data into their appropriate batches and train our model simultaneously
    model.fit_generator(batch_generator(data, indexes_train, args.batch_size, True),
                        steps_per_epoch=len(indexes_train) / args.batch_size,
                        epochs=args.nb_epoch,
                        max_queue_size=1,
                        validation_data=batch_generator(data, indexes_valid, args.batch_size, False),
                        validation_steps=len(indexes_valid) / args.batch_size,
                        callbacks=[checkpoint],
                        verbose=1)


# for command line args
def s2b(s):
    """
    Converts a string to boolean value
    """
    s = s.lower()
    return s == 'true' or s == 'yes' or s == 'y' or s == '1'


def main():
    """
    Load train/validation data set and train the model
    """
    # The argparse module makes it easy to write user-friendly command-line interfaces.
    parser = argparse.ArgumentParser(description='Behavioral Cloning Training Program')
    parser.add_argument('-d', help='data directory', dest='data_dir', type=str, default=path)
    parser.add_argument('-t', help='test size fraction', dest='test_size', type=float, default=0.2)
    parser.add_argument('-k', help='drop out probability', dest='keep_prob', type=float, default=0.5)
    parser.add_argument('-n', help='number of epochs', dest='nb_epoch', type=int, default=200)
    parser.add_argument('-b', help='batch size', dest='batch_size', type=int, default=500)
    parser.add_argument('-o', help='save best models only', dest='save_best_only', type=s2b, default='true')
    parser.add_argument('-l', help='learning rate', dest='learning_rate', type=float, default=1.0e-4)
    args = parser.parse_args()

    # print parameters
    print('-' * 30)
    print('Parameters')
    print('-' * 30)
    for key, value in vars(args).items():
        print('{:<20} := {}'.format(key, value))
    print('-' * 30)

    # load data
    data = load_data(args)
    # build model
    model = build_model(args)
    # load previous weights
    model = load_weights(model)
    # train model on data, it saves as model.h5
    train_model(model, args, *data)


if __name__ == '__main__':
    main()


================================================
FILE: training/utils.py
================================================
# This code based on Siraj Raval's code (https://github.com/llSourcell/How_to_simulate_a_self_driving_car)

import math

import cv2
import numpy as np
import tensorflow as tf

IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS = 66, 200, 3
INPUT_SHAPE = (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS)
RADAR_HEIGHT, RADAR_WIDTH, RADAR_CHANNELS = 20, 20, 1
RADAR_SHAPE = (RADAR_HEIGHT, RADAR_WIDTH, RADAR_CHANNELS)


def crop(image):
    """
    Crop the image (removing the sky at the top and the car front at the bottom)
    """
    return image[90:-50, :, :]


def resize(image):
    """
    Resize the image to the input shape used by the network model
    """
    return cv2.resize(image, (IMAGE_WIDTH, IMAGE_HEIGHT), cv2.INTER_AREA)


def rgb2yuv(image):
    """
    Convert the image from RGB to YUV (This is what the NVIDIA model does)
    """
    return cv2.cvtColor(image, cv2.COLOR_RGB2YUV)


def preprocess(image):
    """
    Combine all preprocess functions into one
    """
    image = crop(image)
    image = resize(image)
    image = rgb2yuv(image)
    return image


# def choose_image(data_dir, center, left, right, steering_angle):
#     """
#     Randomly choose an image from the center, left or right, and adjust
#     the steering angle.
#     """
#     choice = np.random.choice(3)
#     if choice == 0:
#         return load_image(data_dir, left), steering_angle + 0.2
#     elif choice == 1:
#         return load_image(data_dir, right), steering_angle - 0.2
#     return load_image(data_dir, center), steering_angle


# flip image causes car riding on the opposite direction lane
# def random_flip(image, steering_angle):
#     """
#     Randomly flip the image left <-> right, and adjust the steering angle.
#     """
#     if np.random.rand() < 0.5:
#         image = cv2.flip(image, 1)
#         steering_angle = -steering_angle
#     return image, steering_angle


def random_translate(image, steering_angle, range_x, range_y):
    """
    Randomly shift the image vertically and horizontally (translation).
    """
    trans_x = range_x * (np.random.rand() - 0.5)
    trans_y = range_y * (np.random.rand() - 0.5)

    # adjusting steering angle
    t_x = trans_x / 25
    if t_x > 0:
        t_x = math.ceil(t_x)
        if t_x > 2:
            steering_angle += (t_x - 2)
            if steering_angle > 10:
                steering_angle = 10
    else:
        t_x = math.floor(t_x)
        if t_x < -2:
            steering_angle += (t_x + 2)
            if steering_angle < -10:
                steering_angle = -10

    trans_m = np.float32([[1, 0, trans_x], [0, 1, trans_y]])
    height, width = image.shape[:2]
    # apply an affine transformation to an image
    image = cv2.warpAffine(image, trans_m, (width, height))
    return image, steering_angle


def random_shadow(image):
    """
    Generates and adds random shadow
    """
    # (x1, y1) and (x2, y2) forms a line
    # xm, ym gives all the locations of the image
    x1, y1 = IMAGE_WIDTH * np.random.rand(), 0
    x2, y2 = IMAGE_WIDTH * np.random.rand(), IMAGE_HEIGHT
    xm, ym = np.mgrid[0:IMAGE_HEIGHT, 0:IMAGE_WIDTH]

    # mathematically speaking, we want to set 1 below the line and zero otherwise
    # Our coordinate is up side down.  So, the above the line:
    # (ym-y1)/(xm-x1) > (y2-y1)/(x2-x1)
    # as x2 == x1 causes zero-division problem, we'll write it in the below form:
    # (ym-y1)*(x2-x1) - (y2-y1)*(xm-x1) > 0
    mask = np.zeros_like(image[:, :, 1])
    mask[np.where((ym - y1) * (x2 - x1) - (y2 - y1) * (xm - x1) > 0)] = 1

    # choose which side should have shadow and adjust saturation
    cond = mask == np.random.randint(2)
    s_ratio = np.random.uniform(low=0.2, high=0.5)

    # adjust Saturation in HLS(Hue, Light, Saturation)
    hls = cv2.cvtColor(image, cv2.COLOR_RGB2HLS)
    hls[:, :, 1][cond] = hls[:, :, 1][cond] * s_ratio
    return cv2.cvtColor(hls, cv2.COLOR_HLS2RGB)


def random_brightness(image):
    """
    Randomly adjust brightness of the image.
    """
    # HSV (Hue, Saturation, Value) is also called HSB ('B' for Brightness).
    hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    ratio = 1.0 + 0.4 * (np.random.rand() - 0.5)
    hsv[:, :, 2] = hsv[:, :, 2] * ratio
    return cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)


def augment(image, steering_angle, range_x=250, range_y=20):
    """
    Generate an augmented image and adjust steering angle.
    (The steering angle is associated with the center image)
    """
    # image, steering_angle = choose_image(data_dir, center, left, right, steering_angle)
    # image, steering_angle = random_flip(image, steering_angle)
    image, steering_angle = random_translate(image, steering_angle, range_x, range_y)
    image = random_shadow(image)
    image = random_brightness(image)
    return image, steering_angle


def batch_generator(data, indexes, batch_size, is_training):
    """
    Generate training image give image paths and associated steering angles
    """
    # preprocessing on the CPU
    with tf.device('/cpu:0'):
        images = np.empty([batch_size, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS])
        radars = np.empty([batch_size, RADAR_HEIGHT, RADAR_WIDTH, RADAR_CHANNELS])
        # metrics = np.empty([batch_size, 2])
        # controls = np.empty([batch_size, 2])
        speeds = np.empty(batch_size)
        controls = np.empty(batch_size)
        while True:
            i = 0
            for index in np.random.permutation(indexes):
                camera = data['img'][index]
                radar = cv2.cvtColor(camera[206:226, 25:45, :], cv2.COLOR_RGB2BGR)
                steer = data['controls'][index][1]

                # augmentation
                if is_training:
                    prob = np.random.rand()
                    if (abs(steer) < 0.4 and prob > 0.2) or (prob < 0.6):
                        camera, steer = augment(camera, steer)

                # add the image and steering angle to the batch
                images[i] = preprocess(camera)
                radars[i] = radar[:, :, 2:3]
                # controls[i] = [data['controls'][index][0] / 10, steer / 10]  # normalized throttle and steering
                controls[i] = steer / 10
                speeds[i] = data['metrics'][index][0]
                # metrics[i] = data['metrics'][index]
                i += 1
                if i == batch_size:
                    break
            # yield [images, metrics], controls
            yield [images, radars, speeds], controls
Download .txt
gitextract_4lc496_7/

├── README.md
├── data_collection/
│   ├── data_balancing.py
│   ├── data_collect.py
│   ├── gamepad_cap.py
│   ├── histogram.py
│   ├── img_process.py
│   ├── key_cap.py
│   └── resources/
│       ├── arrows.npy
│       ├── arrows_labels.npy
│       ├── digits.npy
│       └── digits_labels.npy
├── drivers.txt
├── driving/
│   ├── drive.py
│   └── gamepad.py
├── game_plugins.txt
├── object_detection/
│   ├── direction.py
│   ├── lane_detect.py
│   └── object_detect.py
├── requirements.txt
└── training/
    ├── base_model.h5
    ├── model.py
    ├── models/
    │   └── original + radar/
    │       └── base_model.h5
    ├── train.py
    └── utils.py
Download .txt
SYMBOL INDEX (65 symbols across 13 files)

FILE: data_collection/data_balancing.py
  function save (line 13) | def save(data_img, controls, metrics):
  function main (line 23) | def main():

FILE: data_collection/data_collect.py
  function save (line 34) | def save(data_img, controls, metrics):
  function delete (line 47) | def delete(session):
  function main (line 54) | def main():

FILE: data_collection/gamepad_cap.py
  class Gamepad (line 23) | class Gamepad:
    method __init__ (line 24) | def __init__(self):
    method open (line 32) | def open(self):
    method run (line 36) | def run(self):
    method get_state (line 53) | def get_state(self):
    method get_RB (line 75) | def get_RB(self):
    method get_LB (line 78) | def get_LB(self):
    method close (line 81) | def close(self):

FILE: data_collection/img_process.py
  function initKNN (line 13) | def initKNN(data, labels, shape):
  function grab_screen (line 28) | def grab_screen(winName: str = "Grand Theft Auto V"):
  function predict (line 77) | def predict(img, knn):
  function preprocess (line 82) | def preprocess(img):
  function convert_speed (line 88) | def convert_speed(num1, num2, num3):
  function img_process (line 106) | def img_process(winName: str = "Grand Theft Auto V"):

FILE: data_collection/key_cap.py
  function key_check (line 14) | def key_check():

FILE: driving/drive.py
  function set_gamepad (line 30) | def set_gamepad(controls):
  function drive (line 53) | def drive(model):
  function main (line 169) | def main():

FILE: driving/gamepad.py
  class XInputDevice (line 28) | class XInputDevice:
    method __init__ (line 29) | def __init__(self, port):
    method PlugIn (line 34) | def PlugIn(self):
    method UnPlug (line 37) | def UnPlug(self, force=False):
    method SetBtn (line 43) | def SetBtn(self, button, value):
    method SetTrigger (line 61) | def SetTrigger(self, trigger, value):
    method SetAxis (line 70) | def SetAxis(self, axis, value):
    method SetDpad (line 81) | def SetDpad(self, direction, value=0):
    method GetLedNumber (line 96) | def GetLedNumber(self, pLed):
    method GetVibration (line 99) | def GetVibration(self, pVib):

FILE: object_detection/direction.py
  class Direct (line 4) | class Direct(Enum):

FILE: object_detection/lane_detect.py
  function crop (line 11) | def crop(image):
  function grayscale (line 18) | def grayscale(img):
  function canny (line 26) | def canny(img, low_threshold=100, high_threshold=300):
  function gaussian_blur (line 33) | def gaussian_blur(img, kernel_size):
  function region_of_interest (line 40) | def region_of_interest(img, vertices):
  function construct_lane (line 66) | def construct_lane(lines):
  function hough_lines (line 193) | def hough_lines(img, rho=6, theta=np.pi / 120, threshold=160, min_line_l...
  function add_images (line 205) | def add_images(img, initial_img):
  function draw_lane (line 220) | def draw_lane(original_img, lane, stop_line, left_color, right_color, th...
  function detect_lane (line 267) | def detect_lane(screen):
  function main (line 285) | def main():

FILE: object_detection/object_detect.py
  function light_recog (line 24) | def light_recog(frame, direct, traffic_lights):
  function distance_to_car (line 90) | def distance_to_car(frame, top_left, bottom_right):
  function distance_to_human (line 113) | def distance_to_human(frame, top_left, bottom_right):
  function yolo_detection (line 131) | def yolo_detection(screen, direct):
  function main (line 178) | def main():

FILE: training/model.py
  function build_model (line 86) | def build_model(args):

FILE: training/train.py
  function load_data (line 26) | def load_data(args):
  function load_weights (line 39) | def load_weights(model):
  function train_model (line 49) | def train_model(model, args, data, indexes_train, indexes_valid):
  function s2b (line 91) | def s2b(s):
  function main (line 99) | def main():

FILE: training/utils.py
  function crop (line 15) | def crop(image):
  function resize (line 22) | def resize(image):
  function rgb2yuv (line 29) | def rgb2yuv(image):
  function preprocess (line 36) | def preprocess(image):
  function random_translate (line 70) | def random_translate(image, steering_angle, range_x, range_y):
  function random_shadow (line 99) | def random_shadow(image):
  function random_brightness (line 127) | def random_brightness(image):
  function augment (line 138) | def augment(image, steering_angle, range_x=250, range_y=20):
  function batch_generator (line 151) | def batch_generator(data, indexes, batch_size, is_training):
Condensed preview — 24 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (64K chars).
[
  {
    "path": "README.md",
    "chars": 306,
    "preview": "# Self-Driving Car for GTA V\n### Overview\nThe aim of this project is to create a self-driving car using a virtual simila"
  },
  {
    "path": "data_collection/data_balancing.py",
    "chars": 2236,
    "preview": "import h5py\n\nfrom data_collection.data_collect import path as source_path\n\ndest_path = \"F:\\Graduation_Project\\\\training_"
  },
  {
    "path": "data_collection/data_collect.py",
    "chars": 4683,
    "preview": "\"\"\"\nData collection module (saves data in H5 format).\nSaves screen captures and pressed keys into a file\nfor further tra"
  },
  {
    "path": "data_collection/gamepad_cap.py",
    "chars": 2090,
    "preview": "\"\"\"\nModule for reading information from an Xbox gamepad\n\"\"\"\n\nimport threading\n\nfrom inputs import get_gamepad\n\n# Gamepad"
  },
  {
    "path": "data_collection/histogram.py",
    "chars": 392,
    "preview": "\"\"\"\nHistogram of turns (for future balancing of data)\n\"\"\"\n\nimport h5py\nimport matplotlib.pyplot as plt\nimport numpy as n"
  },
  {
    "path": "data_collection/img_process.py",
    "chars": 4182,
    "preview": "\"\"\"\nModule for preprocessing screen captures\n\"\"\"\n\nimport win32gui\nimport win32ui\n\nimport cv2\nimport numpy as np\nimport w"
  },
  {
    "path": "data_collection/key_cap.py",
    "chars": 381,
    "preview": "# Citation: Box Of Hats (https://github.com/Box-Of-Hats)\n\n\"\"\"\nModule for reading keys from a keyboard\n\"\"\"\n\nimport win32a"
  },
  {
    "path": "drivers.txt",
    "chars": 266,
    "preview": "# For testing AI an XBox controller emulator is needed\n# https://github.com/shauleiz/ScpVBus/releases\n\nScpVBus\n\n# Instal"
  },
  {
    "path": "driving/drive.py",
    "chars": 5601,
    "preview": "\"\"\"\nCar driving module.\n\"\"\"\n\n# reading and writing files\nimport os\nimport time\n\nimport cv2\nimport numpy as np\n# load our"
  },
  {
    "path": "driving/gamepad.py",
    "chars": 2750,
    "preview": "# This code based on Musi13's code (https://github.com/Musi13/pyvxbox)\n\n\"\"\"\nGamepad emulating module.\n\"\"\"\n\nimport sys\nfr"
  },
  {
    "path": "game_plugins.txt",
    "chars": 226,
    "preview": "### List of plugins used in GTA V\n### for generating better conditions for AI\n\n# allows installation of plugins\nScript H"
  },
  {
    "path": "object_detection/direction.py",
    "chars": 164,
    "preview": "from enum import Enum\n\n\nclass Direct(Enum):\n    STRAIGHT = 0\n    LEFT = 1\n    RIGHT = 2\n    SLIGHTLY_LEFT = 3\n    SLIGHT"
  },
  {
    "path": "object_detection/lane_detect.py",
    "chars": 10918,
    "preview": "import math\n\nimport cv2\nimport numpy as np\n\nfrom data_collection.img_process import grab_screen\n\nprev_lines = [[], [], ["
  },
  {
    "path": "object_detection/object_detect.py",
    "chars": 7608,
    "preview": "import cv2\nimport numpy as np\nfrom darkflow.net.build import TFNet\nfrom shapely.geometry import box, Polygon\n\nfrom data_"
  },
  {
    "path": "requirements.txt",
    "chars": 437,
    "preview": "### To install the packages type in the console:\n### pip install -r requirements.txt\n\nnumpy\nopencv-python\n# tensorflow\nt"
  },
  {
    "path": "training/model.py",
    "chars": 7272,
    "preview": "\"\"\"\nNN model\n\"\"\"\n\nfrom keras.layers import Lambda, Conv2D, Dropout, Dense, Flatten, Concatenate, Input, MaxPooling2D\nfro"
  },
  {
    "path": "training/train.py",
    "chars": 5299,
    "preview": "# This code based on Siraj Raval's code (https://github.com/llSourcell/How_to_simulate_a_self_driving_car)\n\n\"\"\"\nTraining"
  },
  {
    "path": "training/utils.py",
    "chars": 6475,
    "preview": "# This code based on Siraj Raval's code (https://github.com/llSourcell/How_to_simulate_a_self_driving_car)\n\nimport math\n"
  }
]

// ... and 6 more files (download for full content)

About this extraction

This page contains the full source code of the hadipash/AI_GTA5 GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 24 files (59.8 KB), approximately 17.0k tokens, and a symbol index with 65 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!