Repository: DanielSlater/PyGamePlayer Branch: master Commit: b4889a7f9860 Files: 40 Total size: 74.2 KB Directory structure: gitextract_cxdjfk24/ ├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── examples/ │ ├── __init__.py │ ├── deep_q_half_pong_networks_40x40_6/ │ │ ├── checkpoint │ │ ├── network-1190000 │ │ ├── network-1190000.meta │ │ ├── network-1200000 │ │ ├── network-1200000.meta │ │ ├── network-1210000 │ │ ├── network-1210000.meta │ │ ├── network-1220000 │ │ ├── network-1220000.meta │ │ ├── network-1230000 │ │ └── network-1230000.meta │ ├── deep_q_half_pong_networks_40x40_8/ │ │ ├── checkpoint │ │ ├── network-1260000 │ │ ├── network-1260000.meta │ │ ├── network-1270000 │ │ ├── network-1270000.meta │ │ ├── network-1280000 │ │ ├── network-1280000.meta │ │ ├── network-1290000 │ │ ├── network-1290000.meta │ │ ├── network-1300000 │ │ └── network-1300000.meta │ ├── deep_q_half_pong_player.py │ ├── deep_q_pong_player.py │ ├── pong_player.py │ └── tetris_player.py ├── games/ │ ├── __init__.py │ ├── half_pong.py │ ├── mini_pong.py │ ├── pong.py │ └── tetris.py ├── pygame_player.py └── tests/ ├── __init__.py └── test_pygame_player.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitattributes ================================================ # Auto detect text files and perform LF normalization * text=auto # Custom for Visual Studio *.cs diff=csharp # Standard to msysgit *.doc diff=astextplain *.DOC diff=astextplain *.docx diff=astextplain *.DOCX diff=astextplain *.dot diff=astextplain *.DOT diff=astextplain *.pdf diff=astextplain *.PDF diff=astextplain *.rtf diff=astextplain *.RTF diff=astextplain ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *,cover # Translations *.mo *.pot # Django stuff: *.log # Sphinx documentation docs/_build/ # PyBuilder target/ # ========================= # Operating System Files # ========================= # OSX # ========================= .DS_Store .AppleDouble .LSOverride # Thumbnails ._* # Files that might appear in the root of a volume .DocumentRevisions-V100 .fseventsd .Spotlight-V100 .TemporaryItems .Trashes .VolumeIcon.icns # Directories potentially created on remote AFP share .AppleDB .AppleDesktop Network Trash Folder Temporary Items .apdisk # Windows # ========================= # Windows image file caches Thumbs.db ehthumbs.db # Folder config file Desktop.ini # Recycle Bin used on file shares $RECYCLE.BIN/ # Windows Installer files *.cab *.msi *.msm *.msp # Windows shortcuts *.lnk ================================================ FILE: LICENSE ================================================ The MIT License (MIT) Copyright (c) 2016 Daniel Slater Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # PyGamePlayer Module to help with running learning agents against PyGame games. Hooks into the PyGame screen update and event.get methods so you can run PyGame games with zero touches to the underlying game file. Can even deal with games with no main() method. Project contains three examples of running this, 2 a minimal examles one with Pong and one with Tetris and a full example of Deep-Q learning against Pong with tensorflow. More information available here http://www.danielslater.net/2015/12/how-to-run-learning-agents-against.html Requirements ---------- - python 2 or 3 - pygame - numpy Getting started ----------- PyGame is probably the best supported library for games in Python it can be downloaded and installed from http://www.pygame.org/download.shtml [Numpy](http://www.scipy.org/scipylib/download.html) is also required Create a Python 2 or 3 environment with both of these in it. Import this project and whatever PyGame game you want to train against into your working area. A bunch of PyGame games can be found here http://www.pygame.org/projects/6 or alternatly just use Pong or Tetris that are included with this project. [exmples/deep_q_pong_player.py](https://github.com/DanielSlater/PyGamePlayer/blob/master/examples/deep_q_pong_player.py) also requires that [tensorflow](https://www.tensorflow.org/versions/r0.8/get_started/os_setup.html) and [matplotlib](http://matplotlib.org/users/installing.html) be installed Example usage for Pong game ----------- ``` from pygame_player import PyGamePlayer class PongPlayer(PyGamePlayer): def __init__(self): super(PongPlayer, self).__init__(force_game_fps=10) # force_game_fps fixes the game clock so that no matter how many real seconds it takes to run a fame # the game behaves as if each frame took the same amount of time # use run_real_time so the game will actually play at the force_game_fps frame rate self.last_bar1_score = 0.0 self.last_bar2_score = 0.0 def get_keys_pressed(self, screen_array, feedback): # TODO: put an actual learning agent here from pygame.constants import K_DOWN return [K_DOWN] # just returns the down key def get_feedback(self): # import must be done here because otherwise importing would cause the game to start playing from games.pong import bar1_score, bar2_score # get the difference in score between this and the last run score_change = (bar1_score - self.last_bar1_score) - (bar2_score - self.last_bar2_score) self.last_bar1_score = bar1_score self.last_bar2_score = bar2_score return score_change if __name__ == '__main__': player = PongPlayer() player.start() ``` Games -------- - [Pong](https://github.com/DanielSlater/PyGamePlayer/blob/master/games/pong.py) - [Tetris](https://github.com/DanielSlater/PyGamePlayer/blob/master/games/tetris.py) - [Mini Pong](https://github.com/DanielSlater/PyGamePlayer/blob/master/games/mini_pong.py) - modified version of pong to run in lower resolutions - [Half Pong](https://github.com/DanielSlater/PyGamePlayer/blob/master/games/half_pong.py) - simplified version of pong with just one bar ================================================ FILE: __init__.py ================================================ __author__ = 'Daniel' ================================================ FILE: examples/__init__.py ================================================ __author__ = 'Daniel' ================================================ FILE: examples/deep_q_half_pong_networks_40x40_6/checkpoint ================================================ model_checkpoint_path: "network-1230000" all_model_checkpoint_paths: "network-1190000" all_model_checkpoint_paths: "network-1200000" all_model_checkpoint_paths: "network-1210000" all_model_checkpoint_paths: "network-1220000" all_model_checkpoint_paths: "network-1230000" ================================================ FILE: examples/deep_q_half_pong_networks_40x40_8/checkpoint ================================================ model_checkpoint_path: "network-1300000" all_model_checkpoint_paths: "network-1260000" all_model_checkpoint_paths: "network-1270000" all_model_checkpoint_paths: "network-1280000" all_model_checkpoint_paths: "network-1290000" all_model_checkpoint_paths: "network-1300000" ================================================ FILE: examples/deep_q_half_pong_player.py ================================================ # This is heavily based off https://github.com/asrivat1/DeepLearningVideoGames # deep q learning agent that runs against Half-Pong. Runs on a much smaller screen and with fewer layers. # Performs significantly above random, but still has someway to go to match google deep mind performance... # To see a trained version of this network start it with the kwargs checkpoint_path="deep_q_half_pong_networks_40x40_8" # and playback_mode="True" import os import random from collections import deque import tensorflow as tf import numpy as np import cv2 from pygame.constants import K_DOWN, K_UP from pygame_player import PyGamePlayer class DeepQHalfPongPlayer(PyGamePlayer): ACTIONS_COUNT = 3 # number of valid actions. In this case up, still and down FUTURE_REWARD_DISCOUNT = 0.99 # decay rate of past observations OBSERVATION_STEPS = 50000. # time steps to observe before training EXPLORE_STEPS = 500000. # frames over which to anneal epsilon INITIAL_RANDOM_ACTION_PROB = 1.0 # starting chance of an action being random FINAL_RANDOM_ACTION_PROB = 0.05 # final chance of an action being random MEMORY_SIZE = 500000 # number of observations to remember MINI_BATCH_SIZE = 200 # size of mini batches STATE_FRAMES = 4 # number of frames to store in the state OBS_LAST_STATE_INDEX, OBS_ACTION_INDEX, OBS_REWARD_INDEX, OBS_CURRENT_STATE_INDEX, OBS_TERMINAL_INDEX = range(5) SAVE_EVERY_X_STEPS = 10000 LEARN_RATE = 1e-6 STORE_SCORES_LEN = 200. SCREEN_WIDTH = 40 SCREEN_HEIGHT = 40 def __init__(self, # to see a trained network change checkpoint_path="deep_q_half_pong_networks_40x40_8" and # playback_mode="True" checkpoint_path="deep_q_half_pong_networks", playback_mode=True, verbose_logging=True): """ Example of deep q network for pong :param checkpoint_path: directory to store checkpoints in :type checkpoint_path: str :param playback_mode: if true games runs in real time mode and demos itself running :type playback_mode: bool :param verbose_logging: If true then extra log information is printed to std out :type verbose_logging: bool """ self._playback_mode = playback_mode self.last_score = 0 super(DeepQHalfPongPlayer, self).__init__(force_game_fps=8, run_real_time=playback_mode) self.verbose_logging = verbose_logging self._checkpoint_path = checkpoint_path self._session = tf.Session() self._input_layer, self._output_layer = self._create_network() self._action = tf.placeholder("float", [None, self.ACTIONS_COUNT]) self._target = tf.placeholder("float", [None]) readout_action = tf.reduce_sum(tf.mul(self._output_layer, self._action), reduction_indices=1) cost = tf.reduce_mean(tf.square(self._target - readout_action)) self._train_operation = tf.train.AdamOptimizer(self.LEARN_RATE).minimize(cost) self._observations = deque() self._last_scores = deque() # set the first action to do nothing self._last_action = np.zeros(self.ACTIONS_COUNT) self._last_action[1] = 1 self._last_state = None self._probability_of_random_action = self.INITIAL_RANDOM_ACTION_PROB self._time = 0 self._session.run(tf.initialize_all_variables()) if not os.path.exists(self._checkpoint_path): os.mkdir(self._checkpoint_path) self._saver = tf.train.Saver() checkpoint = tf.train.get_checkpoint_state(self._checkpoint_path) if checkpoint and checkpoint.model_checkpoint_path: self._saver.restore(self._session, checkpoint.model_checkpoint_path) print("Loaded checkpoints %s" % checkpoint.model_checkpoint_path) elif playback_mode: raise Exception("Could not load checkpoints for playback") def get_keys_pressed(self, screen_array, reward, terminal): # images will be black or white _, screen_binary = cv2.threshold(cv2.cvtColor(screen_array, cv2.COLOR_BGR2GRAY), 1, 255, cv2.THRESH_BINARY) if reward != 0.0: self._last_scores.append(reward) if len(self._last_scores) > self.STORE_SCORES_LEN: self._last_scores.popleft() # first frame must be handled differently if self._last_state is None: # the _last_state will contain the image data from the last self.STATE_FRAMES frames self._last_state = np.stack(tuple(screen_binary for _ in range(self.STATE_FRAMES)), axis=2) return DeepQHalfPongPlayer._key_presses_from_action(self._last_action) screen_binary = np.reshape(screen_binary, (self.SCREEN_WIDTH, self.SCREEN_HEIGHT, 1)) current_state = np.append(self._last_state[:, :, 1:], screen_binary, axis=2) if not self._playback_mode: # store the transition in previous_observations self._observations.append((self._last_state, self._last_action, reward, current_state, terminal)) if len(self._observations) > self.MEMORY_SIZE: self._observations.popleft() # only train if done observing if len(self._observations) > self.OBSERVATION_STEPS: self._train() self._time += 1 # update the old values self._last_state = current_state self._last_action = self._choose_next_action() if not self._playback_mode: # gradually reduce the probability of a random actionself. if self._probability_of_random_action > self.FINAL_RANDOM_ACTION_PROB \ and len(self._observations) > self.OBSERVATION_STEPS: self._probability_of_random_action -= \ (self.INITIAL_RANDOM_ACTION_PROB - self.FINAL_RANDOM_ACTION_PROB) / self.EXPLORE_STEPS print("Time: %s random_action_prob: %s reward %s scores differential %s" % (self._time, self._probability_of_random_action, reward, sum(self._last_scores) / self.STORE_SCORES_LEN)) return DeepQHalfPongPlayer._key_presses_from_action(self._last_action) def _choose_next_action(self): new_action = np.zeros([self.ACTIONS_COUNT]) if (not self._playback_mode) and (random.random() <= self._probability_of_random_action): # choose an action randomly action_index = random.randrange(self.ACTIONS_COUNT) else: # choose an action given our last state readout_t = self._session.run(self._output_layer, feed_dict={self._input_layer: [self._last_state]})[0] if self.verbose_logging: print("Action Q-Values are %s" % readout_t) action_index = np.argmax(readout_t) new_action[action_index] = 1 return new_action def _train(self): # sample a mini_batch to train on mini_batch = random.sample(self._observations, self.MINI_BATCH_SIZE) # get the batch variables previous_states = [d[self.OBS_LAST_STATE_INDEX] for d in mini_batch] actions = [d[self.OBS_ACTION_INDEX] for d in mini_batch] rewards = [d[self.OBS_REWARD_INDEX] for d in mini_batch] current_states = [d[self.OBS_CURRENT_STATE_INDEX] for d in mini_batch] agents_expected_reward = [] # this gives us the agents expected reward for each action we might take agents_reward_per_action = self._session.run(self._output_layer, feed_dict={self._input_layer: current_states}) for i in range(len(mini_batch)): if mini_batch[i][self.OBS_TERMINAL_INDEX]: # this was a terminal frame so there is no future reward... agents_expected_reward.append(rewards[i]) else: agents_expected_reward.append( rewards[i] + self.FUTURE_REWARD_DISCOUNT * np.max(agents_reward_per_action[i])) # learn that these actions in these states lead to this reward self._session.run(self._train_operation, feed_dict={ self._input_layer: previous_states, self._action: actions, self._target: agents_expected_reward}) # save checkpoints for later if self._time % self.SAVE_EVERY_X_STEPS == 0: self._saver.save(self._session, self._checkpoint_path + '/network', global_step=self._time) def _create_network(self): # network weights convolution_weights_1 = tf.Variable(tf.truncated_normal([8, 8, self.STATE_FRAMES, 32], stddev=0.01)) convolution_bias_1 = tf.Variable(tf.constant(0.01, shape=[32])) convolution_weights_2 = tf.Variable(tf.truncated_normal([4, 4, 32, 64], stddev=0.01)) convolution_bias_2 = tf.Variable(tf.constant(0.01, shape=[64])) feed_forward_weights_1 = tf.Variable(tf.truncated_normal([256, 256], stddev=0.01)) feed_forward_bias_1 = tf.Variable(tf.constant(0.01, shape=[256])) feed_forward_weights_2 = tf.Variable(tf.truncated_normal([256, self.ACTIONS_COUNT], stddev=0.01)) feed_forward_bias_2 = tf.Variable(tf.constant(0.01, shape=[self.ACTIONS_COUNT])) input_layer = tf.placeholder("float", [None, self.SCREEN_WIDTH, self.SCREEN_HEIGHT, self.STATE_FRAMES]) hidden_convolutional_layer_1 = tf.nn.relu( tf.nn.conv2d(input_layer, convolution_weights_1, strides=[1, 4, 4, 1], padding="SAME") + convolution_bias_1) hidden_max_pooling_layer_1 = tf.nn.max_pool(hidden_convolutional_layer_1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") hidden_convolutional_layer_2 = tf.nn.relu( tf.nn.conv2d(hidden_max_pooling_layer_1, convolution_weights_2, strides=[1, 2, 2, 1], padding="SAME") + convolution_bias_2) hidden_max_pooling_layer_2 = tf.nn.max_pool(hidden_convolutional_layer_2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") hidden_convolutional_layer_3_flat = tf.reshape(hidden_max_pooling_layer_2, [-1, 256]) final_hidden_activations = tf.nn.relu( tf.matmul(hidden_convolutional_layer_3_flat, feed_forward_weights_1) + feed_forward_bias_1) output_layer = tf.matmul(final_hidden_activations, feed_forward_weights_2) + feed_forward_bias_2 return input_layer, output_layer @staticmethod def _key_presses_from_action(action_set): if action_set[0] == 1: return [K_DOWN] elif action_set[1] == 1: return [] elif action_set[2] == 1: return [K_UP] raise Exception("Unexpected action") def get_feedback(self): from games.half_pong import score # get the difference in score between this and the last run score_change = (score - self.last_score) self.last_score = score return float(score_change), score_change == -1 def start(self): super(DeepQHalfPongPlayer, self).start() from games.half_pong import run run(screen_width=self.SCREEN_WIDTH, screen_height=self.SCREEN_HEIGHT) if __name__ == '__main__': # to see a trained network add the args checkpoint_path="deep_q_half_pong_networks_40x40_8" and # playback_mode="True" player = DeepQHalfPongPlayer() player.start() ================================================ FILE: examples/deep_q_pong_player.py ================================================ # This is heavily based off https://github.com/asrivat1/DeepLearningVideoGames import os import random from collections import deque from pong_player import PongPlayer import tensorflow as tf import numpy as np import cv2 from pygame.constants import K_DOWN, K_UP class DeepQPongPlayer(PongPlayer): ACTIONS_COUNT = 3 # number of valid actions. In this case up, still and down FUTURE_REWARD_DISCOUNT = 0.99 # decay rate of past observations OBSERVATION_STEPS = 50000. # time steps to observe before training EXPLORE_STEPS = 500000. # frames over which to anneal epsilon INITIAL_RANDOM_ACTION_PROB = 1.0 # starting chance of an action being random FINAL_RANDOM_ACTION_PROB = 0.05 # final chance of an action being random MEMORY_SIZE = 500000 # number of observations to remember MINI_BATCH_SIZE = 100 # size of mini batches STATE_FRAMES = 4 # number of frames to store in the state RESIZED_SCREEN_X, RESIZED_SCREEN_Y = (80, 80) OBS_LAST_STATE_INDEX, OBS_ACTION_INDEX, OBS_REWARD_INDEX, OBS_CURRENT_STATE_INDEX, OBS_TERMINAL_INDEX = range(5) SAVE_EVERY_X_STEPS = 10000 LEARN_RATE = 1e-6 STORE_SCORES_LEN = 200. def __init__(self, checkpoint_path="deep_q_pong_networks", playback_mode=False, verbose_logging=False): """ Example of deep q network for pong :param checkpoint_path: directory to store checkpoints in :type checkpoint_path: str :param playback_mode: if true games runs in real time mode and demos itself running :type playback_mode: bool :param verbose_logging: If true then extra log information is printed to std out :type verbose_logging: bool """ self._playback_mode = playback_mode super(DeepQPongPlayer, self).__init__(force_game_fps=8, run_real_time=playback_mode) self.verbose_logging = verbose_logging self._checkpoint_path = checkpoint_path self._session = tf.Session() self._input_layer, self._output_layer = DeepQPongPlayer._create_network() self._action = tf.placeholder("float", [None, self.ACTIONS_COUNT]) self._target = tf.placeholder("float", [None]) readout_action = tf.reduce_sum(tf.mul(self._output_layer, self._action), reduction_indices=1) cost = tf.reduce_mean(tf.square(self._target - readout_action)) self._train_operation = tf.train.AdamOptimizer(self.LEARN_RATE).minimize(cost) self._observations = deque() self._last_scores = deque() # set the first action to do nothing self._last_action = np.zeros(self.ACTIONS_COUNT) self._last_action[1] = 1 self._last_state = None self._probability_of_random_action = self.INITIAL_RANDOM_ACTION_PROB self._time = 0 self._session.run(tf.initialize_all_variables()) if not os.path.exists(self._checkpoint_path): os.mkdir(self._checkpoint_path) self._saver = tf.train.Saver() checkpoint = tf.train.get_checkpoint_state(self._checkpoint_path) if checkpoint and checkpoint.model_checkpoint_path: self._saver.restore(self._session, checkpoint.model_checkpoint_path) print("Loaded checkpoints %s" % checkpoint.model_checkpoint_path) elif playback_mode: raise Exception("Could not load checkpoints for playback") def get_keys_pressed(self, screen_array, reward, terminal): # scale down screen image screen_resized_grayscaled = cv2.cvtColor(cv2.resize(screen_array, (self.RESIZED_SCREEN_X, self.RESIZED_SCREEN_Y)), cv2.COLOR_BGR2GRAY) # set the pixels to all be 0. or 1. _, screen_resized_binary = cv2.threshold(screen_resized_grayscaled, 1, 255, cv2.THRESH_BINARY) if reward != 0.0: self._last_scores.append(reward) if len(self._last_scores) > self.STORE_SCORES_LEN: self._last_scores.popleft() # first frame must be handled differently if self._last_state is None: # the _last_state will contain the image data from the last self.STATE_FRAMES frames self._last_state = np.stack(tuple(screen_resized_binary for _ in range(self.STATE_FRAMES)), axis=2) return DeepQPongPlayer._key_presses_from_action(self._last_action) screen_resized_binary = np.reshape(screen_resized_binary, (self.RESIZED_SCREEN_X, self.RESIZED_SCREEN_Y, 1)) current_state = np.append(self._last_state[:, :, 1:], screen_resized_binary, axis=2) if not self._playback_mode: # store the transition in previous_observations self._observations.append((self._last_state, self._last_action, reward, current_state, terminal)) if len(self._observations) > self.MEMORY_SIZE: self._observations.popleft() # only train if done observing if len(self._observations) > self.OBSERVATION_STEPS: self._train() self._time += 1 # update the old values self._last_state = current_state self._last_action = self._choose_next_action() if not self._playback_mode: # gradually reduce the probability of a random actionself. if self._probability_of_random_action > self.FINAL_RANDOM_ACTION_PROB \ and len(self._observations) > self.OBSERVATION_STEPS: self._probability_of_random_action -= \ (self.INITIAL_RANDOM_ACTION_PROB - self.FINAL_RANDOM_ACTION_PROB) / self.EXPLORE_STEPS print("Time: %s random_action_prob: %s reward %s scores differential %s" % (self._time, self._probability_of_random_action, reward, sum(self._last_scores) / self.STORE_SCORES_LEN)) return DeepQPongPlayer._key_presses_from_action(self._last_action) def _choose_next_action(self): new_action = np.zeros([self.ACTIONS_COUNT]) if (not self._playback_mode) and (random.random() <= self._probability_of_random_action): # choose an action randomly action_index = random.randrange(self.ACTIONS_COUNT) else: # choose an action given our last state readout_t = self._session.run(self._output_layer, feed_dict={self._input_layer: [self._last_state]})[0] if self.verbose_logging: print("Action Q-Values are %s" % readout_t) action_index = np.argmax(readout_t) new_action[action_index] = 1 return new_action def _train(self): # sample a mini_batch to train on mini_batch = random.sample(self._observations, self.MINI_BATCH_SIZE) # get the batch variables previous_states = [d[self.OBS_LAST_STATE_INDEX] for d in mini_batch] actions = [d[self.OBS_ACTION_INDEX] for d in mini_batch] rewards = [d[self.OBS_REWARD_INDEX] for d in mini_batch] current_states = [d[self.OBS_CURRENT_STATE_INDEX] for d in mini_batch] agents_expected_reward = [] # this gives us the agents expected reward for each action we might agents_reward_per_action = self._session.run(self._output_layer, feed_dict={self._input_layer: current_states}) for i in range(len(mini_batch)): if mini_batch[i][self.OBS_TERMINAL_INDEX]: # this was a terminal frame so there is no future reward... agents_expected_reward.append(rewards[i]) else: agents_expected_reward.append( rewards[i] + self.FUTURE_REWARD_DISCOUNT * np.max(agents_reward_per_action[i])) # learn that these actions in these states lead to this reward self._session.run(self._train_operation, feed_dict={ self._input_layer: previous_states, self._action: actions, self._target: agents_expected_reward}) # save checkpoints for later if self._time % self.SAVE_EVERY_X_STEPS == 0: self._saver.save(self._session, self._checkpoint_path + '/network', global_step=self._time) @staticmethod def _create_network(): # network weights convolution_weights_1 = tf.Variable(tf.truncated_normal([8, 8, DeepQPongPlayer.STATE_FRAMES, 32], stddev=0.01)) convolution_bias_1 = tf.Variable(tf.constant(0.01, shape=[32])) convolution_weights_2 = tf.Variable(tf.truncated_normal([4, 4, 32, 64], stddev=0.01)) convolution_bias_2 = tf.Variable(tf.constant(0.01, shape=[64])) convolution_weights_3 = tf.Variable(tf.truncated_normal([3, 3, 64, 64], stddev=0.01)) convolution_bias_3 = tf.Variable(tf.constant(0.01, shape=[64])) feed_forward_weights_1 = tf.Variable(tf.truncated_normal([256, 256], stddev=0.01)) feed_forward_bias_1 = tf.Variable(tf.constant(0.01, shape=[256])) feed_forward_weights_2 = tf.Variable(tf.truncated_normal([256, DeepQPongPlayer.ACTIONS_COUNT], stddev=0.01)) feed_forward_bias_2 = tf.Variable(tf.constant(0.01, shape=[DeepQPongPlayer.ACTIONS_COUNT])) input_layer = tf.placeholder("float", [None, DeepQPongPlayer.RESIZED_SCREEN_X, DeepQPongPlayer.RESIZED_SCREEN_Y, DeepQPongPlayer.STATE_FRAMES]) hidden_convolutional_layer_1 = tf.nn.relu( tf.nn.conv2d(input_layer, convolution_weights_1, strides=[1, 4, 4, 1], padding="SAME") + convolution_bias_1) hidden_max_pooling_layer_1 = tf.nn.max_pool(hidden_convolutional_layer_1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") hidden_convolutional_layer_2 = tf.nn.relu( tf.nn.conv2d(hidden_max_pooling_layer_1, convolution_weights_2, strides=[1, 2, 2, 1], padding="SAME") + convolution_bias_2) hidden_max_pooling_layer_2 = tf.nn.max_pool(hidden_convolutional_layer_2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") hidden_convolutional_layer_3 = tf.nn.relu( tf.nn.conv2d(hidden_max_pooling_layer_2, convolution_weights_3, strides=[1, 1, 1, 1], padding="SAME") + convolution_bias_3) hidden_max_pooling_layer_3 = tf.nn.max_pool(hidden_convolutional_layer_3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") hidden_convolutional_layer_3_flat = tf.reshape(hidden_max_pooling_layer_3, [-1, 256]) final_hidden_activations = tf.nn.relu( tf.matmul(hidden_convolutional_layer_3_flat, feed_forward_weights_1) + feed_forward_bias_1) output_layer = tf.matmul(final_hidden_activations, feed_forward_weights_2) + feed_forward_bias_2 return input_layer, output_layer @staticmethod def _key_presses_from_action(action_set): if action_set[0] == 1: return [K_DOWN] elif action_set[1] == 1: return [] elif action_set[2] == 1: return [K_UP] raise Exception("Unexpected action") if __name__ == '__main__': player = DeepQPongPlayer() player.start() ================================================ FILE: examples/pong_player.py ================================================ from pygame.constants import K_DOWN from pygame_player import PyGamePlayer class PongPlayer(PyGamePlayer): def __init__(self, force_game_fps=10, run_real_time=False): """ Example class for playing Pong """ super(PongPlayer, self).__init__(force_game_fps=force_game_fps, run_real_time=run_real_time) self.last_bar1_score = 0.0 self.last_bar2_score = 0.0 def get_keys_pressed(self, screen_array, feedback, terminal): # TODO: put an actual learning agent here return [K_DOWN] def get_feedback(self): # import must be done here because otherwise importing would cause the game to start playing from games.pong import bar1_score, bar2_score # get the difference in score between this and the last run score_change = (bar1_score - self.last_bar1_score) - (bar2_score - self.last_bar2_score) self.last_bar1_score = bar1_score self.last_bar2_score = bar2_score return float(score_change), score_change != 0 def start(self): super(PongPlayer, self).start() import games.pong if __name__ == '__main__': player = PongPlayer() player.start() ================================================ FILE: examples/tetris_player.py ================================================ from pygame.constants import K_LEFT from pygame_player import PyGamePlayer, function_intercept import games.tetris class TetrisPlayer(PyGamePlayer): def __init__(self): """ Example class for playing Tetris """ super(TetrisPlayer, self).__init__(force_game_fps=5) self._toggle_down_key = True self._new_reward = 0.0 self._terminal = False def add_removed_lines_to_reward(lines_removed, *args, **kwargs): self._new_reward += lines_removed return lines_removed def check_for_game_over(ret, text): if text == 'Game Over': self._terminal = True # to get the reward we will intercept the removeCompleteLines method and store what it returns games.tetris.removeCompleteLines = function_intercept(games.tetris.removeCompleteLines, add_removed_lines_to_reward) # find out if we have had a game over games.tetris.showTextScreen = function_intercept(games.tetris.showTextScreen, check_for_game_over) def get_keys_pressed(self, screen_array, feedback, terminal): # TODO: put an actual learning agent here # toggle key presses so we get through the start menu if self._toggle_down_key: self._toggle_down_key = False return [K_LEFT] else: self._toggle_down_key = True return [] def get_feedback(self): temp = self._new_reward self._new_reward = 0.0 terminal = self._terminal self._terminal = False return temp, terminal def start(self): super(TetrisPlayer, self).start() games.tetris.main() if __name__ == '__main__': player = TetrisPlayer() player.start() ================================================ FILE: games/__init__.py ================================================ __author__ = 'Daniel' ================================================ FILE: games/half_pong.py ================================================ # Modified from http://www.pygame.org/project-Very+simple+Pong+game-816-.html import pygame from pygame.locals import * score = 0 def run(screen_width=40., screen_height=40.): global score pygame.init() bar_width, bar_height = screen_width / 32., screen_height / 6. bar_dist_from_edge = screen_width / 64. circle_diameter = screen_height / 16. circle_radius = circle_diameter / 2. bar_1_start_x = bar_dist_from_edge bar_start_y = (screen_height - bar_height) / 2. bar_max_y = screen_height - bar_height - bar_dist_from_edge circle_start_x, circle_start_y = (screen_width - circle_diameter), (screen_width - circle_diameter) / 2. screen = pygame.display.set_mode((int(screen_width), int(screen_height)), 0, 32) # Creating 2 bars, a ball and background. back = pygame.Surface((int(screen_width), int(screen_height))) background = back.convert() background.fill((0, 0, 0)) bar = pygame.Surface((int(bar_width), int(bar_height))) bar1 = bar.convert() bar1.fill((255, 255, 255)) circle_surface = pygame.Surface((int(circle_diameter), int(circle_diameter))) pygame.draw.circle(circle_surface, (255, 255, 255), (int(circle_radius), int(circle_radius)), int(circle_radius)) circle = circle_surface.convert() circle.set_colorkey((0, 0, 0)) # some definitions bar1_x = bar_1_start_x bar1_y = bar_start_y circle_x, circle_y = circle_start_x, circle_start_y bar1_move, bar2_move = 0., 0. speed_x, speed_y, speed_bar = -screen_width / 1.28, screen_height / 1.92, screen_height * 1.2 clock = pygame.time.Clock() done = False while not done: for event in pygame.event.get(): # User did something if event.type == pygame.QUIT: # If user clicked close done = True # Flag that we are done so we exit this loop if event.type == KEYDOWN: if event.key == K_UP: bar1_move = -ai_speed elif event.key == K_DOWN: bar1_move = ai_speed elif event.type == KEYUP: if event.key == K_UP: bar1_move = 0. elif event.key == K_DOWN: bar1_move = 0. screen.blit(background, (0, 0)) screen.blit(bar1, (bar1_x, bar1_y)) screen.blit(circle, (circle_x, circle_y)) bar1_y += bar1_move # movement of circle time_passed = clock.tick(30) time_sec = time_passed / 1000.0 circle_x += speed_x * time_sec circle_y += speed_y * time_sec ai_speed = speed_bar * time_sec if bar1_y >= bar_max_y: bar1_y = bar_max_y elif bar1_y <= bar_dist_from_edge: bar1_y = bar_dist_from_edge if circle_x < bar_dist_from_edge + bar_width: if circle_y >= bar1_y - circle_radius and circle_y <= bar1_y + bar_height + circle_radius: circle_x = bar_dist_from_edge + bar_width speed_x = -speed_x if circle_x < -circle_radius: score -= 1 circle_x, circle_y = circle_start_x, circle_start_y bar1_y, bar_2_y = bar_start_y, bar_start_y elif circle_x > screen_width - circle_diameter: score += 1 speed_x = -speed_x if circle_y <= bar_dist_from_edge: speed_y = -speed_y circle_y = bar_dist_from_edge elif circle_y >= screen_height - circle_diameter - circle_radius: speed_y = -speed_y circle_y = screen_height - circle_diameter - circle_radius pygame.display.update() pygame.quit() if __name__ == '__main__': run() ================================================ FILE: games/mini_pong.py ================================================ # Modified from http://www.pygame.org/project-Very+simple+Pong+game-816-.html import pygame from pygame.locals import * bar1_score, bar2_score = 0, 0 def run(screen_width=40., screen_height=40.): global bar1_score, bar2_score pygame.init() bar_width, bar_height = screen_width / 32., screen_height / 9.6 bar_dist_from_edge = screen_width / 64. circle_diameter = screen_height / 16. circle_radius = circle_diameter / 2. bar_1_start_x, bar_2_start_x = bar_dist_from_edge, screen_width - bar_dist_from_edge - bar_width bar_start_y = (screen_height - bar_height) / 2. bar_max_y = screen_height - bar_height - bar_dist_from_edge circle_start_x, circle_start_y = (screen_width - circle_diameter) / 2., (screen_width - circle_diameter) / 2. screen = pygame.display.set_mode((int(screen_width), int(screen_height)), 0, 32) # Creating 2 bars, a ball and background. back = pygame.Surface((int(screen_width), int(screen_height))) background = back.convert() background.fill((0, 0, 0)) bar = pygame.Surface((int(bar_width), int(bar_height))) bar1 = bar.convert() bar1.fill((255, 255, 255)) bar2 = bar.convert() bar2.fill((255, 255, 255)) circle_surface = pygame.Surface((int(circle_diameter), int(circle_diameter))) pygame.draw.circle(circle_surface, (255, 255, 255), (int(circle_radius), int(circle_radius)), int(circle_radius)) circle = circle_surface.convert() circle.set_colorkey((0, 0, 0)) # some definitions bar1_x, bar2_x = bar_1_start_x, bar_2_start_x bar1_y, bar2_y = bar_start_y, bar_start_y circle_x, circle_y = circle_start_x, circle_start_y bar1_move, bar2_move = 0., 0. speed_x, speed_y, speed_circle = screen_width / 2.56, screen_height / 1.92, screen_width / 2.56 # 250., 250., 250. clock = pygame.time.Clock() done = False while not done: for event in pygame.event.get(): # User did something if event.type == pygame.QUIT: # If user clicked close done = True # Flag that we are done so we exit this loop if event.type == KEYDOWN: if event.key == K_UP: bar1_move = -ai_speed elif event.key == K_DOWN: bar1_move = ai_speed elif event.type == KEYUP: if event.key == K_UP: bar1_move = 0. elif event.key == K_DOWN: bar1_move = 0. screen.blit(background, (0, 0)) screen.blit(bar1, (bar1_x, bar1_y)) screen.blit(bar2, (bar2_x, bar2_y)) screen.blit(circle, (circle_x, circle_y)) bar1_y += bar1_move # movement of circle time_passed = clock.tick(30) time_sec = time_passed / 1000.0 circle_x += speed_x * time_sec circle_y += speed_y * time_sec ai_speed = speed_circle * time_sec # AI of the computer. if circle_x >= (screen_width / 2.) - circle_diameter: if not bar2_y == circle_y + circle_radius: if bar2_y < circle_y + circle_radius: bar2_y += ai_speed if bar2_y > circle_y - (bar_height - circle_radius): bar2_y -= ai_speed else: bar2_y == circle_y + circle_radius if bar1_y >= bar_max_y: bar1_y = bar_max_y elif bar1_y <= bar_dist_from_edge: bar1_y = bar_dist_from_edge if bar2_y >= bar_max_y: bar2_y = bar_max_y elif bar2_y <= bar_dist_from_edge: bar2_y = bar_dist_from_edge # since i don't know anything about collision, ball hitting bars goes like this. if circle_x <= bar1_x + bar_dist_from_edge: if circle_y >= bar1_y - circle_radius and circle_y <= bar1_y + (bar_height - circle_radius): circle_x = bar_dist_from_edge + bar_width speed_x = -speed_x if circle_x >= bar2_x - circle_diameter: if circle_y >= bar2_y - circle_radius and circle_y <= bar2_y + (bar_height - circle_radius): circle_x = screen_width - bar_dist_from_edge - bar_width - circle_diameter speed_x = -speed_x if circle_x < -circle_radius: bar2_score += 1 circle_x, circle_y = (screen_width + circle_diameter) / 2., circle_start_y bar1_y, bar_2_y = bar_start_y, bar_start_y elif circle_x > screen_width - circle_diameter: bar1_score += 1 circle_x, circle_y = circle_start_x, circle_start_y bar1_y, bar2_y = bar_start_y, bar_start_y if circle_y <= bar_dist_from_edge: speed_y = -speed_y circle_y = bar_dist_from_edge elif circle_y >= screen_height - circle_diameter - circle_radius: speed_y = -speed_y circle_y = screen_height - circle_diameter - circle_radius pygame.display.update() pygame.quit() if __name__ == '__main__': run() ================================================ FILE: games/pong.py ================================================ #Modified from http://www.pygame.org/project-Very+simple+Pong+game-816-.html # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. import numpy import pygame from pygame.locals import * from sys import exit import random import pygame.surfarray as surfarray pygame.init() screen = pygame.display.set_mode((640,480),0,32) #Creating 2 bars, a ball and background. back = pygame.Surface((640,480)) background = back.convert() background.fill((0,0,0)) bar = pygame.Surface((10,50)) bar1 = bar.convert() bar1.fill((255,255,255)) bar2 = bar.convert() bar2.fill((255,255,255)) circ_sur = pygame.Surface((15,15)) circ = pygame.draw.circle(circ_sur,(255,255,255),(int(15/2),int(15/2)),int(15/2)) circle = circ_sur.convert() circle.set_colorkey((0,0,0)) # some definitions bar1_x, bar2_x = 10. , 620. bar1_y, bar2_y = 215. , 215. circle_x, circle_y = 307.5, 232.5 bar1_move, bar2_move = 0. , 0. speed_x, speed_y, speed_circ = 250., 250., 250. bar1_score, bar2_score = 0,0 #clock and font objects clock = pygame.time.Clock() font = pygame.font.SysFont("calibri",40) done = False while done==False: for event in pygame.event.get(): # User did something if event.type == pygame.QUIT: # If user clicked close done = True # Flag that we are done so we exit this loop if event.type == KEYDOWN: if event.key == K_UP: bar1_move = -ai_speed elif event.key == K_DOWN: bar1_move = ai_speed elif event.type == KEYUP: if event.key == K_UP: bar1_move = 0. elif event.key == K_DOWN: bar1_move = 0. score1 = font.render(str(bar1_score), True,(255,255,255)) score2 = font.render(str(bar2_score), True,(255,255,255)) screen.blit(background,(0,0)) frame = pygame.draw.rect(screen,(255,255,255),Rect((5,5),(630,470)),2) middle_line = pygame.draw.aaline(screen,(255,255,255),(330,5),(330,475)) screen.blit(bar1,(bar1_x,bar1_y)) screen.blit(bar2,(bar2_x,bar2_y)) screen.blit(circle,(circle_x,circle_y)) screen.blit(score1,(250.,210.)) screen.blit(score2,(380.,210.)) bar1_y += bar1_move # movement of circle time_passed = clock.tick(30) time_sec = time_passed / 1000.0 circle_x += speed_x * time_sec circle_y += speed_y * time_sec ai_speed = speed_circ * time_sec #AI of the computer. if circle_x >= 305.: if not bar2_y == circle_y + 7.5: if bar2_y < circle_y + 7.5: bar2_y += ai_speed if bar2_y > circle_y - 42.5: bar2_y -= ai_speed else: bar2_y == circle_y + 7.5 if bar1_y >= 420.: bar1_y = 420. elif bar1_y <= 10. : bar1_y = 10. if bar2_y >= 420.: bar2_y = 420. elif bar2_y <= 10.: bar2_y = 10. #since i don't know anything about collision, ball hitting bars goes like this. if circle_x <= bar1_x + 10.: if circle_y >= bar1_y - 7.5 and circle_y <= bar1_y + 42.5: circle_x = 20. speed_x = -speed_x if circle_x >= bar2_x - 15.: if circle_y >= bar2_y - 7.5 and circle_y <= bar2_y + 42.5: circle_x = 605. speed_x = -speed_x if circle_x < 5.: bar2_score += 1 circle_x, circle_y = 320., 232.5 bar1_y,bar_2_y = 215., 215. elif circle_x > 620.: bar1_score += 1 circle_x, circle_y = 307.5, 232.5 bar1_y, bar2_y = 215., 215. if circle_y <= 10.: speed_y = -speed_y circle_y = 10. elif circle_y >= 457.5: speed_y = -speed_y circle_y = 457.5 pygame.display.update() pygame.quit() ================================================ FILE: games/tetris.py ================================================ # Tetromino (a Tetris clone) # By Al Sweigart al@inventwithpython.com # http://inventwithpython.com/pygame # Released under a "Simplified BSD" license import random, time, pygame, sys from pygame.locals import * FPS = 25 WINDOWWIDTH = 640 WINDOWHEIGHT = 480 BOXSIZE = 20 BOARDWIDTH = 10 BOARDHEIGHT = 20 BLANK = '.' MOVESIDEWAYSFREQ = 0.15 MOVEDOWNFREQ = 0.1 XMARGIN = int((WINDOWWIDTH - BOARDWIDTH * BOXSIZE) / 2) TOPMARGIN = WINDOWHEIGHT - (BOARDHEIGHT * BOXSIZE) - 5 # R G B WHITE = (255, 255, 255) GRAY = (185, 185, 185) BLACK = ( 0, 0, 0) RED = (155, 0, 0) LIGHTRED = (175, 20, 20) GREEN = ( 0, 155, 0) LIGHTGREEN = ( 20, 175, 20) BLUE = ( 0, 0, 155) LIGHTBLUE = ( 20, 20, 175) YELLOW = (155, 155, 0) LIGHTYELLOW = (175, 175, 20) BORDERCOLOR = BLUE BGCOLOR = BLACK TEXTCOLOR = WHITE TEXTSHADOWCOLOR = GRAY COLORS = ( BLUE, GREEN, RED, YELLOW) LIGHTCOLORS = (LIGHTBLUE, LIGHTGREEN, LIGHTRED, LIGHTYELLOW) assert len(COLORS) == len(LIGHTCOLORS) # each color must have light color TEMPLATEWIDTH = 5 TEMPLATEHEIGHT = 5 S_SHAPE_TEMPLATE = [['.....', '.....', '..OO.', '.OO..', '.....'], ['.....', '..O..', '..OO.', '...O.', '.....']] Z_SHAPE_TEMPLATE = [['.....', '.....', '.OO..', '..OO.', '.....'], ['.....', '..O..', '.OO..', '.O...', '.....']] I_SHAPE_TEMPLATE = [['..O..', '..O..', '..O..', '..O..', '.....'], ['.....', '.....', 'OOOO.', '.....', '.....']] O_SHAPE_TEMPLATE = [['.....', '.....', '.OO..', '.OO..', '.....']] J_SHAPE_TEMPLATE = [['.....', '.O...', '.OOO.', '.....', '.....'], ['.....', '..OO.', '..O..', '..O..', '.....'], ['.....', '.....', '.OOO.', '...O.', '.....'], ['.....', '..O..', '..O..', '.OO..', '.....']] L_SHAPE_TEMPLATE = [['.....', '...O.', '.OOO.', '.....', '.....'], ['.....', '..O..', '..O..', '..OO.', '.....'], ['.....', '.....', '.OOO.', '.O...', '.....'], ['.....', '.OO..', '..O..', '..O..', '.....']] T_SHAPE_TEMPLATE = [['.....', '..O..', '.OOO.', '.....', '.....'], ['.....', '..O..', '..OO.', '..O..', '.....'], ['.....', '.....', '.OOO.', '..O..', '.....'], ['.....', '..O..', '.OO..', '..O..', '.....']] PIECES = {'S': S_SHAPE_TEMPLATE, 'Z': Z_SHAPE_TEMPLATE, 'J': J_SHAPE_TEMPLATE, 'L': L_SHAPE_TEMPLATE, 'I': I_SHAPE_TEMPLATE, 'O': O_SHAPE_TEMPLATE, 'T': T_SHAPE_TEMPLATE} def main(): global FPSCLOCK, DISPLAYSURF, BASICFONT, BIGFONT pygame.init() FPSCLOCK = pygame.time.Clock() DISPLAYSURF = pygame.display.set_mode((WINDOWWIDTH, WINDOWHEIGHT)) BASICFONT = pygame.font.Font('freesansbold.ttf', 18) BIGFONT = pygame.font.Font('freesansbold.ttf', 100) pygame.display.set_caption('Tetromino') showTextScreen('Tetromino') while True: # game loop #if random.randint(0, 1) == 0: # pygame.mixer.music.load('tetrisb.mid') #else: # pygame.mixer.music.load('tetrisc.mid') #pygame.mixer.music.play(-1, 0.0) runGame() #pygame.mixer.music.stop() showTextScreen('Game Over') def runGame(): # setup variables for the start of the game board = getBlankBoard() lastMoveDownTime = time.time() lastMoveSidewaysTime = time.time() lastFallTime = time.time() movingDown = False # note: there is no movingUp variable movingLeft = False movingRight = False score = 0 level, fallFreq = calculateLevelAndFallFreq(score) fallingPiece = getNewPiece() nextPiece = getNewPiece() while True: # game loop if fallingPiece == None: # No falling piece in play, so start a new piece at the top fallingPiece = nextPiece nextPiece = getNewPiece() lastFallTime = time.time() # reset lastFallTime if not isValidPosition(board, fallingPiece): return # can't fit a new piece on the board, so game over checkForQuit() for event in pygame.event.get(): # event handling loop if event.type == KEYUP: if (event.key == K_p): # Pausing the game DISPLAYSURF.fill(BGCOLOR) pygame.mixer.music.stop() showTextScreen('Paused') # pause until a key press pygame.mixer.music.play(-1, 0.0) lastFallTime = time.time() lastMoveDownTime = time.time() lastMoveSidewaysTime = time.time() elif (event.key == K_LEFT or event.key == K_a): movingLeft = False elif (event.key == K_RIGHT or event.key == K_d): movingRight = False elif (event.key == K_DOWN or event.key == K_s): movingDown = False elif event.type == KEYDOWN: # moving the piece sideways if (event.key == K_LEFT or event.key == K_a) and isValidPosition(board, fallingPiece, adjX=-1): fallingPiece['x'] -= 1 movingLeft = True movingRight = False lastMoveSidewaysTime = time.time() elif (event.key == K_RIGHT or event.key == K_d) and isValidPosition(board, fallingPiece, adjX=1): fallingPiece['x'] += 1 movingRight = True movingLeft = False lastMoveSidewaysTime = time.time() # rotating the piece (if there is room to rotate) elif (event.key == K_UP or event.key == K_w): fallingPiece['rotation'] = (fallingPiece['rotation'] + 1) % len(PIECES[fallingPiece['shape']]) if not isValidPosition(board, fallingPiece): fallingPiece['rotation'] = (fallingPiece['rotation'] - 1) % len(PIECES[fallingPiece['shape']]) elif (event.key == K_q): # rotate the other direction fallingPiece['rotation'] = (fallingPiece['rotation'] - 1) % len(PIECES[fallingPiece['shape']]) if not isValidPosition(board, fallingPiece): fallingPiece['rotation'] = (fallingPiece['rotation'] + 1) % len(PIECES[fallingPiece['shape']]) # making the piece fall faster with the down key elif (event.key == K_DOWN or event.key == K_s): movingDown = True if isValidPosition(board, fallingPiece, adjY=1): fallingPiece['y'] += 1 lastMoveDownTime = time.time() # move the current piece all the way down elif event.key == K_SPACE: movingDown = False movingLeft = False movingRight = False for i in range(1, BOARDHEIGHT): if not isValidPosition(board, fallingPiece, adjY=i): break fallingPiece['y'] += i - 1 # handle moving the piece because of user input if (movingLeft or movingRight) and time.time() - lastMoveSidewaysTime > MOVESIDEWAYSFREQ: if movingLeft and isValidPosition(board, fallingPiece, adjX=-1): fallingPiece['x'] -= 1 elif movingRight and isValidPosition(board, fallingPiece, adjX=1): fallingPiece['x'] += 1 lastMoveSidewaysTime = time.time() if movingDown and time.time() - lastMoveDownTime > MOVEDOWNFREQ and isValidPosition(board, fallingPiece, adjY=1): fallingPiece['y'] += 1 lastMoveDownTime = time.time() # let the piece fall if it is time to fall if time.time() - lastFallTime > fallFreq: # see if the piece has landed if not isValidPosition(board, fallingPiece, adjY=1): # falling piece has landed, set it on the board addToBoard(board, fallingPiece) score += removeCompleteLines(board) level, fallFreq = calculateLevelAndFallFreq(score) fallingPiece = None else: # piece did not land, just move the piece down fallingPiece['y'] += 1 lastFallTime = time.time() # drawing everything on the screen DISPLAYSURF.fill(BGCOLOR) drawBoard(board) drawStatus(score, level) drawNextPiece(nextPiece)#Here if fallingPiece != None: drawPiece(fallingPiece) pygame.display.update() FPSCLOCK.tick(FPS) def makeTextObjs(text, font, color): surf = font.render(text, True, color) return surf, surf.get_rect() def terminate(): pygame.quit() sys.exit() def checkForKeyPress(): # Go through event queue looking for a KEYUP event. # Grab KEYDOWN events to remove them from the event queue. checkForQuit() for event in pygame.event.get([KEYDOWN, KEYUP]): if event.type == KEYDOWN: continue return event.key return None def showTextScreen(text): # This function displays large text in the # center of the screen until a key is pressed. # Draw the text drop shadow titleSurf, titleRect = makeTextObjs(text, BIGFONT, TEXTSHADOWCOLOR) titleRect.center = (int(WINDOWWIDTH / 2), int(WINDOWHEIGHT / 2)) DISPLAYSURF.blit(titleSurf, titleRect) # Draw the text titleSurf, titleRect = makeTextObjs(text, BIGFONT, TEXTCOLOR) titleRect.center = (int(WINDOWWIDTH / 2) - 3, int(WINDOWHEIGHT / 2) - 3) DISPLAYSURF.blit(titleSurf, titleRect) # Draw the additional "Press a key to play." text. pressKeySurf, pressKeyRect = makeTextObjs('Press a key to play.', BASICFONT, TEXTCOLOR) pressKeyRect.center = (int(WINDOWWIDTH / 2), int(WINDOWHEIGHT / 2) + 100) DISPLAYSURF.blit(pressKeySurf, pressKeyRect) while checkForKeyPress() == None: pygame.display.update() FPSCLOCK.tick() def checkForQuit(): for event in pygame.event.get(QUIT): # get all the QUIT events terminate() # terminate if any QUIT events are present for event in pygame.event.get(KEYUP): # get all the KEYUP events if event.key == K_ESCAPE: terminate() # terminate if the KEYUP event was for the Esc key pygame.event.post(event) # put the other KEYUP event objects back def calculateLevelAndFallFreq(score): # Based on the score, return the level the player is on and # how many seconds pass until a falling piece falls one space. level = int(score / 10) + 1 fallFreq = 0.27 - (level * 0.02) return level, fallFreq def getNewPiece(): # return a random new piece in a random rotation and color shape = random.choice(list(PIECES.keys())) newPiece = {'shape': shape, 'rotation': random.randint(0, len(PIECES[shape]) - 1), 'x': int(BOARDWIDTH / 2) - int(TEMPLATEWIDTH / 2), 'y': -2, # start it above the board (i.e. less than 0) 'color': random.randint(0, len(COLORS)-1)} return newPiece def addToBoard(board, piece): # fill in the board based on piece's location, shape, and rotation for x in range(TEMPLATEWIDTH): for y in range(TEMPLATEHEIGHT): if PIECES[piece['shape']][piece['rotation']][y][x] != BLANK: board[x + piece['x']][y + piece['y']] = piece['color'] def getBlankBoard(): # create and return a new blank board data structure board = [] for i in range(BOARDWIDTH): board.append([BLANK] * BOARDHEIGHT) return board def isOnBoard(x, y): return x >= 0 and x < BOARDWIDTH and y < BOARDHEIGHT def isValidPosition(board, piece, adjX=0, adjY=0): # Return True if the piece is within the board and not colliding for x in range(TEMPLATEWIDTH): for y in range(TEMPLATEHEIGHT): isAboveBoard = y + piece['y'] + adjY < 0 if isAboveBoard or PIECES[piece['shape']][piece['rotation']][y][x] == BLANK: continue if not isOnBoard(x + piece['x'] + adjX, y + piece['y'] + adjY): return False if board[x + piece['x'] + adjX][y + piece['y'] + adjY] != BLANK: return False return True def isCompleteLine(board, y): # Return True if the line filled with boxes with no gaps. for x in range(BOARDWIDTH): if board[x][y] == BLANK: return False return True def removeCompleteLines(board): # Remove any completed lines on the board, move everything above them down, and return the number of complete lines. numLinesRemoved = 0 y = BOARDHEIGHT - 1 # start y at the bottom of the board while y >= 0: if isCompleteLine(board, y): # Remove the line and pull boxes down by one line. for pullDownY in range(y, 0, -1): for x in range(BOARDWIDTH): board[x][pullDownY] = board[x][pullDownY-1] # Set very top line to blank. for x in range(BOARDWIDTH): board[x][0] = BLANK numLinesRemoved += 1 # Note on the next iteration of the loop, y is the same. # This is so that if the line that was pulled down is also # complete, it will be removed. else: y -= 1 # move on to check next row up return numLinesRemoved def convertToPixelCoords(boxx, boxy): # Convert the given xy coordinates of the board to xy # coordinates of the location on the screen. return (XMARGIN + (boxx * BOXSIZE)), (TOPMARGIN + (boxy * BOXSIZE)) def drawBox(boxx, boxy, color, pixelx=None, pixely=None): # draw a single box (each tetromino piece has four boxes) # at xy coordinates on the board. Or, if pixelx & pixely # are specified, draw to the pixel coordinates stored in # pixelx & pixely (this is used for the "Next" piece). if color == BLANK: return if pixelx == None and pixely == None: pixelx, pixely = convertToPixelCoords(boxx, boxy) pygame.draw.rect(DISPLAYSURF, COLORS[color], (pixelx + 1, pixely + 1, BOXSIZE - 1, BOXSIZE - 1)) pygame.draw.rect(DISPLAYSURF, LIGHTCOLORS[color], (pixelx + 1, pixely + 1, BOXSIZE - 4, BOXSIZE - 4)) def drawBoard(board): # draw the border around the board pygame.draw.rect(DISPLAYSURF, BORDERCOLOR, (XMARGIN - 3, TOPMARGIN - 7, (BOARDWIDTH * BOXSIZE) + 8, (BOARDHEIGHT * BOXSIZE) + 8), 5) # fill the background of the board pygame.draw.rect(DISPLAYSURF, BGCOLOR, (XMARGIN, TOPMARGIN, BOXSIZE * BOARDWIDTH, BOXSIZE * BOARDHEIGHT)) # draw the individual boxes on the board for x in range(BOARDWIDTH): for y in range(BOARDHEIGHT): drawBox(x, y, board[x][y]) def drawStatus(score, level): # draw the score text scoreSurf = BASICFONT.render('Score: %s' % score, True, TEXTCOLOR) scoreRect = scoreSurf.get_rect() scoreRect.topleft = (WINDOWWIDTH - 150, 20) DISPLAYSURF.blit(scoreSurf, scoreRect) # draw the level text levelSurf = BASICFONT.render('Level: %s' % level, True, TEXTCOLOR) levelRect = levelSurf.get_rect() levelRect.topleft = (WINDOWWIDTH - 150, 50) DISPLAYSURF.blit(levelSurf, levelRect) def drawPiece(piece, pixelx=None, pixely=None): shapeToDraw = PIECES[piece['shape']][piece['rotation']] if pixelx == None and pixely == None: # if pixelx & pixely hasn't been specified, use the location stored in the piece data structure pixelx, pixely = convertToPixelCoords(piece['x'], piece['y']) # draw each of the boxes that make up the piece for x in range(TEMPLATEWIDTH): for y in range(TEMPLATEHEIGHT): if shapeToDraw[y][x] != BLANK: drawBox(None, None, piece['color'], pixelx + (x * BOXSIZE), pixely + (y * BOXSIZE)) def drawNextPiece(piece): # draw the "next" text nextSurf = BASICFONT.render('Next:', True, TEXTCOLOR) nextRect = nextSurf.get_rect() nextRect.topleft = (WINDOWWIDTH - 120, 80) DISPLAYSURF.blit(nextSurf, nextRect) # draw the "next" piece drawPiece(piece, pixelx=WINDOWWIDTH-120, pixely=100) if __name__ == '__main__': main() ================================================ FILE: pygame_player.py ================================================ import pygame import numpy # import is unused but required or we fail later from pygame.constants import K_DOWN, K_UP, KEYDOWN, KEYUP, QUIT import pygame.surfarray import pygame.key def function_intercept(intercepted_func, intercepting_func): """ Intercepts a method call and calls the supplied intercepting_func with the result of it's call and it's arguments Example: def get_event(result_of_real_event_get, *args, **kwargs): # do work return result_of_real_event_get pygame.event.get = function_intercept(pygame.event.get, get_event) :param intercepted_func: The function we are going to intercept :param intercepting_func: The function that will get called after the intercepted func. It is supplied the return value of the intercepted_func as the first argument and it's args and kwargs. :return: a function that combines the intercepting and intercepted function, should normally be set to the intercepted_functions location """ def wrap(*args, **kwargs): real_results = intercepted_func(*args, **kwargs) # call the function we are intercepting and get it's result intercepted_results = intercepting_func(real_results, *args, **kwargs) # call our own function a return intercepted_results return wrap class PyGamePlayer(object): def __init__(self, force_game_fps=10, run_real_time=False, pass_quit_event=True): """ Abstract class for learning agents, such as running reinforcement learning neural nets against PyGame games. The get_keys_pressed and get_feedback methods must be overriden by a subclass to use Call start method to start playing intercepting PyGame and training our machine :param force_game_fps: Fixes the pygame timer functions so the ai will get input as if it were running at this fps :type force_game_fps: int :param run_real_time: If True the game will actually run at the force_game_fps speed :type run_real_time: bool :param pass_quit_event: If True the ai will be asked for the quit event :type pass_quit_event: bool """ self.force_game_fps = force_game_fps """Fixes the pygame timer functions so the ai will get input as if it were running at this fps""" self.run_real_time = run_real_time """If True the game will actually run at the force_game_fps speed""" self.pass_quit_event = pass_quit_event """Decides whether the quit event should be passed on to the game""" self._keys_pressed = [] self._last_keys_pressed = [] self._playing = False self._default_flip = pygame.display.flip self._default_update = pygame.display.update self._default_event_get = pygame.event.get self._default_time_clock = pygame.time.Clock self._default_get_ticks = pygame.time.get_ticks self._game_time = 0.0 def get_keys_pressed(self, screen_array, feedback, terminal): """ Called whenever the screen buffer is refreshed. returns the keys we want pressed in the next until the next screen refresh :param screen_array: 3d numpy.array of float. screen_width * screen_height * rgb :param feedback: result of call to get_feedback :param terminal: boolean, True if we have reached a terminal state, meaning the next frame will be a restart :return: a list of the integer values of the keys we want pressed. See pygame.constants for values """ raise NotImplementedError("Please override this method") def get_feedback(self): """ Overriden method should hook into game events to give feeback to the learning agent :return: First = value we want to give as reward/punishment to our learning agent Second = Boolean true if we have reached a terminal state :rtype: tuple (float, boolean) """ raise NotImplementedError("Please override this method") def start(self): """ Start playing the game. We will now start listening for screen updates calling our play and reward functions and returning our intercepted key presses """ if self._playing: raise Exception("Already playing") pygame.display.flip = function_intercept(pygame.display.flip, self._on_screen_update) pygame.display.update = function_intercept(pygame.display.update, self._on_screen_update) pygame.event.get = function_intercept(pygame.event.get, self._on_event_get) pygame.time.Clock = function_intercept(pygame.time.Clock, self._on_time_clock) pygame.time.get_ticks = function_intercept(pygame.time.get_ticks, self.get_game_time_ms) # TODO: handle pygame.time.set_timer... self._playing = True def stop(self): """ Stop playing the game. Will try and return PyGame to the state it was in before we started """ if not self._playing: raise Exception("Already stopped") pygame.display.flip = self._default_flip pygame.display.update = self._default_update pygame.event.get = self._default_event_get pygame.time.Clock = self._default_time_clock pygame.time.get_ticks = self._default_get_ticks self._playing = False @property def playing(self): """ Returns if we are in a state where we are playing/intercepting PyGame calls :return: boolean """ return self._playing @playing.setter def playing(self, value): if self._playing == value: return if self._playing: self.stop() else: self.start() def get_ms_per_frame(self): return 1000.0 / self.force_game_fps def get_game_time_ms(self): return self._game_time def _on_time_clock(self, real_clock, *args, **kwargs): return self._FixedFPSClock(self, real_clock) def _on_screen_update(self, _, *args, **kwargs): surface_array = pygame.surfarray.array3d(pygame.display.get_surface()) reward, terminal = self.get_feedback() keys = self.get_keys_pressed(surface_array, reward, terminal) self._last_keys_pressed = self._keys_pressed self._keys_pressed = keys # now we have processed a frame increment the game timer self._game_time += self.get_ms_per_frame() def _on_event_get(self, _, *args, **kwargs): key_up_events = [] if len(self._last_keys_pressed) > 0: diff_list = list(set( self._last_keys_pressed) - set(self._keys_pressed)) key_up_events = [pygame.event.Event(KEYUP, {"key": x}) for x in diff_list] key_down_events = [pygame.event.Event(KEYDOWN, {"key": x}) for x in self._keys_pressed] result = [] # have to deal with arg type filters if args: if hasattr(args[0], "__iter__"): args = args[0] for type_filter in args: if type_filter == QUIT: if type_filter == QUIT: if self.pass_quit_event: for e in _: if e.type == QUIT: result.append(e) else: pass # never quit elif type_filter == KEYUP: result = result + key_up_events elif type_filter == KEYDOWN: result = result + key_down_events else: result = key_down_events + key_up_events if self.pass_quit_event: for e in _: if e.type == QUIT: result.append(e) return result def __enter__(self): self.start() return self def __exit__(self, exc_type, exc_val, exc_tb): self.stop() class _FixedFPSClock(object): def __init__(self, pygame_player, real_clock): self._pygame_player = pygame_player self._real_clock = real_clock def tick(self, _=None): if self._pygame_player.run_real_time: return self._real_clock.tick(self._pygame_player.force_game_fps) else: return self._pygame_player.get_ms_per_frame() def tick_busy_loop(self, _=None): if self._pygame_player.run_real_time: return self._real_clock.tick_busy_loop(self._pygame_player.force_game_fps) else: return self._pygame_player.get_ms_per_frame() def get_time(self): return self._pygame_player.get_game_time_ms() def get_raw_time(self): return self._pygame_player.get_game_time_ms() def get_fps(self): return int(1.0 / self._pygame_player.get_ms_per_frame()) ================================================ FILE: tests/__init__.py ================================================ ================================================ FILE: tests/test_pygame_player.py ================================================ import time import pygame from unittest import TestCase from pygame_player import PyGamePlayer class DummyPyGamePlayer(PyGamePlayer): def __init__(self, force_game_fps=10, run_real_time=False): super(DummyPyGamePlayer, self).__init__(force_game_fps=force_game_fps, run_real_time=run_real_time) def get_keys_pressed(self, screen_array, feedback, terminal): pass def get_feedback(self): return 0.0, False class TestPyGamePlayer(TestCase): DISPLAY_X = 1 DISPLAY_Y = 1 def setUp(self): pygame.init() pygame.display.set_mode((self.DISPLAY_X, self.DISPLAY_Y), 0, 32) def tearDown(self): pygame.quit() def test_restores_pygame_methods_after_exit(self): pygame_flip, pygame_update, pygame_event = pygame.display.flip, pygame.display.update, pygame.event.get with PyGamePlayer(): # methods should be replaced self.assertNotEqual(pygame_flip, pygame.display.flip) self.assertNotEqual(pygame_update, pygame.display.update) self.assertNotEqual(pygame_event, pygame.event.get) # original methods should be restored self.assertEqual(pygame_flip, pygame.display.flip) self.assertEqual(pygame_update, pygame.display.update) self.assertEqual(pygame_event, pygame.event.get) def test_fixing_frames_per_second(self): fix_fps_to = 3 with DummyPyGamePlayer(force_game_fps=fix_fps_to): clock = pygame.time.Clock() start_time_ms = clock.get_time() for _ in range(fix_fps_to): pygame.display.update() end_time_ms = clock.get_time() self.assertAlmostEqual(end_time_ms - start_time_ms, 1000.0, msg='Expected only 1000 milliseconds to have passed on the clock after screen updates') def test_get_keys_pressed_method_sets_event_get(self): fixed_key_pressed = 24 class FixedKeysReturned(DummyPyGamePlayer): def get_keys_pressed(self, screen_array, feedback, terminal): return [fixed_key_pressed] with FixedKeysReturned(): pygame.display.update() key_pressed = pygame.event.get() self.assertEqual(key_pressed[0].key, fixed_key_pressed) def test_get_screen_buffer(self): class TestScreenArray(DummyPyGamePlayer): def get_keys_pressed(inner_self, screen_array, feedback, terminal): self.assertEqual(screen_array.shape[0], self.DISPLAY_X) self.assertEqual(screen_array.shape[1], self.DISPLAY_Y) with TestScreenArray(): pygame.display.update() def test_run_real_time(self): fix_fps_to = 3 with PyGamePlayer(force_game_fps=fix_fps_to, run_real_time=True): start = time.time() clock = pygame.time.Clock() for _ in range(fix_fps_to): clock.tick(42343) end = time.time() self.assertAlmostEqual(end-start, 1.0, delta=0.1)