Repository: golsun/deep-RL-trading Branch: master Commit: 4109834e9178 Files: 19 Total size: 36.7 KB Directory structure: gitextract_vp_b1_jb/ ├── .gitignore ├── LICENSE ├── README.md ├── data/ │ ├── PairSamplerDB/ │ │ ├── randjump_100,1(10, 30)[]_A/ │ │ │ ├── db.pickle │ │ │ └── param.json │ │ └── randjump_100,1(10, 30)[]_B/ │ │ ├── db.pickle │ │ └── param.json │ └── SinSamplerDB/ │ ├── concat_half_base_A/ │ │ ├── db.pickle │ │ └── param.json │ └── concat_half_base_B/ │ ├── db.pickle │ └── param.json ├── env.yml └── src/ ├── agents.py ├── emulator.py ├── lib.py ├── main.py ├── sampler.py ├── simulators.py └── visualizer.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ *.pyc ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2018 Xiang Gao Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # **Playing trading games with deep reinforcement learning** This repo is the code for this [paper](https://arxiv.org/abs/1803.03916). Deep reinforcement learing is used to find optimal strategies in these two scenarios: * Momentum trading: capture the underlying dynamics * Arbitrage trading: utilize the hidden relation among the inputs Several neural networks are compared: * Recurrent Neural Networks (GRU/LSTM) * Convolutional Neural Network (CNN) * Multi-Layer Perception (MLP) ### Dependencies You can get all dependencies via the [Anaconda](https://conda.io/docs/user-guide/tasks/manage-environments.html#creating-an-environment-from-an-environment-yml-file) environment file, [env.yml](https://github.com/golsun/deep-RL-time-series/blob/master/env.yml): conda env create -f env.yml ### Play with it Just call the main function python main.py You can play with model parameters (specified in main.py), if you get good results or any trouble, please contact me at gxiang1228@gmail.com ================================================ FILE: data/PairSamplerDB/randjump_100,1(10, 30)[]_A/param.json ================================================ {"n_episodes": 100, "title": "randjump(5, (10, 30), 1, [])", "window_episode": 180, "forecast_horizon_range": [10, 30], "max_change_perc": 30.0, "noise_level": 5, "n_section": 1, "n_var": 2} ================================================ FILE: data/PairSamplerDB/randjump_100,1(10, 30)[]_B/param.json ================================================ {"n_episodes": 100, "title": "randjump(5, (10, 30), 1, [])", "window_episode": 180, "forecast_horizon_range": [10, 30], "max_change_perc": 30.0, "noise_level": 5, "n_section": 1, "n_var": 2} ================================================ FILE: data/SinSamplerDB/concat_half_base_A/param.json ================================================ {"n_episodes": 100, "title": "ConcatHalfSin+Base(0.5, (10, 40), (5, 80))", "window_episode": 180, "noise_amplitude_ratio": 0.5, "period_range": [10, 40], "amplitude_range": [5, 80], "can_half_period": true} ================================================ FILE: data/SinSamplerDB/concat_half_base_B/param.json ================================================ {"n_episodes": 100, "title": "ConcatHalfSin+Base(0.5, (10, 40), (5, 80))", "window_episode": 180, "noise_amplitude_ratio": 0.5, "period_range": [10, 40], "amplitude_range": [5, 80], "can_half_period": true} ================================================ FILE: env.yml ================================================ name: drlts channels: - defaults dependencies: - ca-certificates=2018.03.07=0 - certifi=2018.4.16=py36_0 - h5py=2.7.1=py36h39cdac5_0 - hdf5=1.10.1=ha036c08_1 - intel-openmp=2018.0.0=8 - keras=2.1.5=py36_0 - libcxx=4.0.1=h579ed51_0 - libcxxabi=4.0.1=hebd6815_0 - libedit=3.1=hb4e282d_0 - libffi=3.2.1=h475c297_4 - libgfortran=3.0.1=h93005f0_2 - libprotobuf=3.5.2=h2cd40f5_0 - mkl=2018.0.2=1 - ncurses=6.0=hd04f020_2 - numpy=1.12.1=py36h8871d66_1 - openssl=1.0.2o=h26aff7b_0 - pandas=0.22.0=py36h0a44026_0 - pip=9.0.3=py36_0 - protobuf=3.5.2=py36h0a44026_0 - python=3.6.5=hc167b69_0 - python-dateutil=2.7.2=py36_0 - pytz=2018.4=py36_0 - pyyaml=3.12=py36h2ba1e63_1 - readline=7.0=hc1231fa_4 - scipy=1.0.1=py36hcaad992_0 - setuptools=39.0.1=py36_0 - six=1.11.0=py36h0e22d5e_1 - sqlite=3.23.1=hf1716c9_0 - tensorflow=1.1.0=np112py36_0 - tk=8.6.7=h35a86e2_3 - werkzeug=0.14.1=py36_0 - wheel=0.31.0=py36_0 - xz=5.2.3=h0278029_2 - yaml=0.1.7=hc338f04_2 - zlib=1.2.11=hf3cbc9b_2 ================================================ FILE: src/agents.py ================================================ from lib import * class Agent: def __init__(self, model, batch_size=32, discount_factor=0.95): self.model = model self.batch_size = batch_size self.discount_factor = discount_factor self.memory = [] def remember(self, state, action, reward, next_state, done, next_valid_actions): self.memory.append((state, action, reward, next_state, done, next_valid_actions)) def replay(self): batch = random.sample(self.memory, min(len(self.memory), self.batch_size)) for state, action, reward, next_state, done, next_valid_actions in batch: q = reward if not done: q += self.discount_factor * np.nanmax(self.get_q_valid(next_state, next_valid_actions)) self.model.fit(state, action, q) def get_q_valid(self, state, valid_actions): q = self.model.predict(state) q_valid = [np.nan] * len(q) for action in valid_actions: q_valid[action] = q[action] return q_valid def act(self, state, exploration, valid_actions): if np.random.random() > exploration: q_valid = self.get_q_valid(state, valid_actions) if np.nanmin(q_valid) != np.nanmax(q_valid): return np.nanargmax(q_valid) return random.sample(valid_actions, 1)[0] def save(self, fld): makedirs(fld) attr = { 'batch_size':self.batch_size, 'discount_factor':self.discount_factor, #'memory':self.memory } pickle.dump(attr, open(os.path.join(fld, 'agent_attr.pickle'),'wb')) self.model.save(fld) def load(self, fld): path = os.path.join(fld, 'agent_attr.pickle') print(path) attr = pickle.load(open(path,'rb')) for k in attr: setattr(self, k, attr[k]) self.model.load(fld) def add_dim(x, shape): return np.reshape(x, (1,) + shape) class QModelKeras: # ref: https://keon.io/deep-q-learning/ def init(self): pass def build_model(self): pass def __init__(self, state_shape, n_action): self.state_shape = state_shape self.n_action = n_action self.attr2save = ['state_shape','n_action','model_name'] self.init() def save(self, fld): makedirs(fld) with open(os.path.join(fld, 'model.json'), 'w') as json_file: json_file.write(self.model.to_json()) self.model.save_weights(os.path.join(fld, 'weights.hdf5')) attr = dict() for a in self.attr2save: attr[a] = getattr(self, a) pickle.dump(attr, open(os.path.join(fld, 'Qmodel_attr.pickle'),'wb')) def load(self, fld, learning_rate): json_str = open(os.path.join(fld, 'model.json')).read() self.model = keras.models.model_from_json(json_str) self.model.load_weights(os.path.join(fld, 'weights.hdf5')) self.model.compile(loss='mse', optimizer=keras.optimizers.Adam(lr=learning_rate)) attr = pickle.load(open(os.path.join(fld, 'Qmodel_attr.pickle'), 'rb')) for a in attr: setattr(self, a, attr[a]) def predict(self, state): q = self.model.predict( add_dim(state, self.state_shape) )[0] if np.isnan(max(q)): print('state'+str(state)) print('q'+str(q)) raise ValueError return q def fit(self, state, action, q_action): q = self.predict(state) q[action] = q_action self.model.fit( add_dim(state, self.state_shape), add_dim(q, (self.n_action,)), epochs=1, verbose=0) class QModelMLP(QModelKeras): # multi-layer perception (MLP), i.e., dense only def init(self): self.qmodel = 'MLP' def build_model(self, n_hidden, learning_rate, activation='relu'): model = keras.models.Sequential() model.add(keras.layers.Reshape( (self.state_shape[0]*self.state_shape[1],), input_shape=self.state_shape)) for i in range(len(n_hidden)): model.add(keras.layers.Dense(n_hidden[i], activation=activation)) #model.add(keras.layers.Dropout(drop_rate)) model.add(keras.layers.Dense(self.n_action, activation='linear')) model.compile(loss='mse', optimizer=keras.optimizers.Adam(lr=learning_rate)) self.model = model self.model_name = self.qmodel + str(n_hidden) class QModelRNN(QModelKeras): """ https://keras.io/getting-started/sequential-model-guide/#example note param doesn't grow with len of sequence """ def _build_model(self, Layer, n_hidden, dense_units, learning_rate, activation='relu'): model = keras.models.Sequential() model.add(keras.layers.Reshape(self.state_shape, input_shape=self.state_shape)) m = len(n_hidden) for i in range(m): model.add(Layer(n_hidden[i], return_sequences=(i self.window_episode: break prices.append(p[:self.window_episode]) return np.array(prices).T, 'concat sin' def __sample_concat_sin_w_base(self): prices = [] p = [] while True: p = np.append(p, self.__rand_sin(full_episode=False)[0]) if len(p) > self.window_episode: break base, base_title = self.__rand_sin( period_range=self.base_period_range, amplitude_range=self.base_amplitude_range, noise_amplitude_ratio=0., full_episode=True) prices.append(p[:self.window_episode] + base) return np.array(prices).T, 'concat sin + base: '+base_title def __sample_single_sin(self): prices = [] funcs = [] p, func = self.__rand_sin(full_episode=True) prices.append(p) funcs.append(func) return np.array(prices).T, str(funcs) def test_SinSampler(): window_episode = 180 window_state = 40 noise_amplitude_ratio = 0.5 period_range = (10,40) amplitude_range = (5,80) game = 'concat_half_base' instruments = ['fake'] sampler = SinSampler(game, window_episode, noise_amplitude_ratio, period_range, amplitude_range) n_episodes = 100 """ for i in range(100): plt.plot(sampler.sample(instruments)[0]) plt.show() """ fld = os.path.join('data','SinSamplerDB',game+'_B') sampler.build_db(n_episodes, fld) def test_PairSampler(): fhr = (10,30) n_section = 1 max_change_perc = 30. noise_level = 5 game = 'randjump' windows_transform = [] sampler = PairSampler(game, window_episode=180, forecast_horizon_range=fhr, n_section=n_section, noise_level=noise_level, max_change_perc=max_change_perc, windows_transform=windows_transform) #plt.plot(sampler.sample()[0]);plt.show() #""" n_episodes = 100 fld = os.path.join('data','PairSamplerDB', game+'_%i,%i'%(n_episodes, n_section)+str(fhr)+str(windows_transform)+'_B') sampler.build_db(n_episodes, fld) #""" if __name__ == '__main__': #scan_match() test_SinSampler() #p = [1,2,3,2,1,2,3] #print find_ideal(p) test_PairSampler() ================================================ FILE: src/simulators.py ================================================ from lib import * class Simulator: def play_one_episode(self, exploration, training=True, rand_price=True, print_t=False): state, valid_actions = self.env.reset(rand_price=rand_price) done = False env_t = 0 try: env_t = self.env.t except AttributeError: pass cum_rewards = [np.nan] * env_t actions = [np.nan] * env_t states = [None] * env_t prev_cum_rewards = 0. while not done: if print_t: print(self.env.t) action = self.agent.act(state, exploration, valid_actions) next_state, reward, done, valid_actions = self.env.step(action) cum_rewards.append(prev_cum_rewards+reward) prev_cum_rewards = cum_rewards[-1] actions.append(action) states.append(next_state) if training: self.agent.remember(state, action, reward, next_state, done, valid_actions) self.agent.replay() state = next_state return cum_rewards, actions, states def train(self, n_episode, save_per_episode=10, exploration_decay=0.995, exploration_min=0.01, print_t=False, exploration_init=1.): fld_model = os.path.join(self.fld_save,'model') makedirs(fld_model) # don't overwrite if already exists with open(os.path.join(fld_model,'QModel.txt'),'w') as f: f.write(self.agent.model.qmodel) exploration = exploration_init fld_save = os.path.join(self.fld_save,'training') makedirs(fld_save) MA_window = 100 # MA of performance safe_total_rewards = [] explored_total_rewards = [] explorations = [] path_record = os.path.join(fld_save,'record.csv') with open(path_record,'w') as f: f.write('episode,game,exploration,explored,safe,MA_explored,MA_safe\n') for n in range(n_episode): print('\ntraining...') exploration = max(exploration_min, exploration * exploration_decay) explorations.append(exploration) explored_cum_rewards, explored_actions, _ = self.play_one_episode(exploration, print_t=print_t) explored_total_rewards.append(100.*explored_cum_rewards[-1]/self.env.max_profit) safe_cum_rewards, safe_actions, _ = self.play_one_episode(0, training=False, rand_price=False, print_t=False) safe_total_rewards.append(100.*safe_cum_rewards[-1]/self.env.max_profit) MA_total_rewards = np.median(explored_total_rewards[-MA_window:]) MA_safe_total_rewards = np.median(safe_total_rewards[-MA_window:]) ss = [ str(n), self.env.title.replace(',',';'), '%.1f'%(exploration*100.), '%.1f'%(explored_total_rewards[-1]), '%.1f'%(safe_total_rewards[-1]), '%.1f'%MA_total_rewards, '%.1f'%MA_safe_total_rewards, ] with open(path_record,'a') as f: f.write(','.join(ss)+'\n') print('\t'.join(ss)) if n%save_per_episode == 0: print('saving results...') self.agent.save(fld_model) """ self.visualizer.plot_a_episode( self.env, self.agent.model, explored_cum_rewards, explored_actions, safe_cum_rewards, safe_actions, os.path.join(fld_save, 'episode_%i.png'%(n))) self.visualizer.plot_episodes( explored_total_rewards, safe_total_rewards, explorations, os.path.join(fld_save, 'total_rewards.png'), MA_window) """ def test(self, n_episode, save_per_episode=10, subfld='testing'): fld_save = os.path.join(self.fld_save, subfld) makedirs(fld_save) MA_window = 100 # MA of performance safe_total_rewards = [] path_record = os.path.join(fld_save,'record.csv') with open(path_record,'w') as f: f.write('episode,game,pnl,rel,MA\n') for n in range(n_episode): print('\ntesting...') safe_cum_rewards, safe_actions, _ = self.play_one_episode(0, training=False, rand_price=True) safe_total_rewards.append(100.*safe_cum_rewards[-1]/self.env.max_profit) MA_safe_total_rewards = np.median(safe_total_rewards[-MA_window:]) ss = [str(n), self.env.title.replace(',',';'), '%.1f'%(safe_cum_rewards[-1]), '%.1f'%(safe_total_rewards[-1]), '%.1f'%MA_safe_total_rewards] with open(path_record,'a') as f: f.write(','.join(ss)+'\n') print('\t'.join(ss)) if n%save_per_episode == 0: print('saving results...') """ self.visualizer.plot_a_episode( self.env, self.agent.model, [np.nan]*len(safe_cum_rewards), [np.nan]*len(safe_actions), safe_cum_rewards, safe_actions, os.path.join(fld_save, 'episode_%i.png'%(n))) self.visualizer.plot_episodes( None, safe_total_rewards, None, os.path.join(fld_save, 'total_rewards.png'), MA_window) """ def __init__(self, agent, env, visualizer, fld_save): self.agent = agent self.env = env self.visualizer = visualizer self.fld_save = fld_save if __name__ == '__main__': #print 'episode%i, init%i'%(1,2) a = [1,2,3] print(np.mean(a[-100:])) ================================================ FILE: src/visualizer.py ================================================ from lib import * def get_tick_labels(bins, ticks): ticklabels = [] for i in ticks: if i < len(bins): ticklabels.append('%.2f'%(bins[int(i)])) else: ticklabels.append('%.2f'%(bins[-1])+'+') return ticklabels class Visualizer: def __init__(self, action_labels): self.n_action = len(action_labels) self.action_labels = action_labels def plot_a_episode(self, env, model, explored_cum_rewards, explored_actions, safe_cum_rewards, safe_actions, fig_path): f, axs = plt.subplots(3,1,sharex=True, figsize=(14,14)) ax_price, ax_action, ax_Q = axs ls = ['-','--'] for i in range(min(2,env.prices.shape[1])): p = env.prices[:,i]/env.prices[0,i]*100 - 100 ax_price.plot(p, 'k'+ls[i], label='input%i - 100'%i) ax_price.plot(explored_cum_rewards, 'b', label='explored P&L') ax_price.plot(safe_cum_rewards, 'r', label='safe P&L') ax_price.legend(loc='best', frameon=False) ax_price.set_title(env.title+', ideal: %.1f, safe: %.1f, explored: %1.f'%( env.max_profit, safe_cum_rewards[-1], explored_cum_rewards[-1])) ax_action.plot(explored_actions, 'b', label='explored') ax_action.plot(safe_actions, 'r', label='safe', linewidth=2) ax_action.set_ylim(-0.4, self.n_action-0.6) ax_action.set_ylabel('action') ax_action.set_yticks(range(self.n_action)) ax_action.legend(loc='best', frameon=False) style = ['k','r','b'] qq = [] for t in xrange(env.t0): qq.append([np.nan] * self.n_action) for t in xrange(env.t0, env.t_max): qq.append(model.predict(env.get_state(t))) for i in xrange(self.n_action): ax_Q.plot([float(qq[t][i]) for t in xrange(len(qq))], style[i], label=self.action_labels[i]) ax_Q.set_ylabel('Q') ax_Q.legend(loc='best', frameon=False) ax_Q.set_xlabel('t') plt.subplots_adjust(wspace=0.4) plt.savefig(fig_path) plt.close() def plot_episodes(self, explored_total_rewards, safe_total_rewards, explorations, fig_path, MA_window=100): f = plt.figure(figsize=(14,10)) # width, height in inch (100 pixel) if explored_total_rewards is None: f, ax_reward = plt.subplots() else: figshape = (3,1) ax_reward = plt.subplot2grid(figshape, (0, 0), rowspan=2) ax_exploration = plt.subplot2grid(figshape, (2, 0), sharex=ax_reward) tt = range(len(safe_total_rewards)) if explored_total_rewards is not None: ma = pd.rolling_median(np.array(explored_total_rewards), window=MA_window, min_periods=1) std = pd.rolling_std(np.array(explored_total_rewards), window=MA_window, min_periods=3) ax_reward.plot(tt, explored_total_rewards,'bv', fillstyle='none') ax_reward.plot(tt, ma, 'b', label='explored ma', linewidth=2) ax_reward.plot(tt, std, 'b--', label='explored std', linewidth=2) ma = pd.rolling_median(np.array(safe_total_rewards), window=MA_window, min_periods=1) std = pd.rolling_std(np.array(safe_total_rewards), window=MA_window, min_periods=3) ax_reward.plot(tt, safe_total_rewards,'ro', fillstyle='none') ax_reward.plot(tt, ma,'r', label='safe ma', linewidth=2) ax_reward.plot(tt, std,'r--', label='safe std', linewidth=2) ax_reward.axhline(y=0, color='k', linestyle=':') #ax_reward.axhline(y=60, color='k', linestyle=':') ax_reward.set_ylabel('total reward') ax_reward.legend(loc='best', frameon=False) ax_reward.yaxis.tick_right() ylim = ax_reward.get_ylim() ax_reward.set_ylim((max(-100,ylim[0]), min(100,ylim[1]))) if explored_total_rewards is not None: ax_exploration.plot(tt, np.array(explorations)*100., 'k') ax_exploration.set_ylabel('exploration') ax_exploration.set_xlabel('episode') plt.savefig(fig_path) plt.close() def test_visualizer(): f = plt.figure()#figsize=(5,8)) axs_action = [] ncol = 3 nrow = 2 clim = (0,1) ax = plt.subplot2grid((nrow, ncol), (0,ncol-1)) ax.matshow(np.random.random((2,2)), cmap='RdYlBu_r', clim=clim) for action in range(3): row = 1 + action/ncol col = action%ncol ax = plt.subplot2grid((nrow, ncol), (row,col)) cax = ax.matshow(np.random.random((2,2)), cmap='RdYlBu_r', clim=clim) ax = plt.subplot2grid((nrow, ncol), (0,0), colspan=ncol-1) cbar = f.colorbar(cax, ax=ax) plt.show() class VisualizerSequential: def config(self): pass def __init__(self, model): self.model = model self.layers = [] for layer in self.model.layers: self.layers.append(str(layer.name)) self.inter_models = dict() model_input = self.model.input for layer in self.layers: self.inter_models[layer] = keras.models.Model( inputs=model_input, outputs=self.model.get_layer(layer).output) self.config() class VisualizerConv1D(VisualizerSequential): def config(self): self.n_channel = self.model.input.shape[2] n_col = self.n_channel for layer in self.layers: shape = self.inter_models[layer].output.shape if len(shape) == 3: n_col = max(n_col, shape[2]) self.figshape = (len(self.layers)+1, int(n_col)) def plot(self, x): f = plt.figure(figsize=(30,30)) for i in range(self.n_channel): ax = plt.subplot2grid(self.figshape, (0,i)) ax.plot(x[0,:,i], '.-') ax.set_title('input, channel %i'%i) for i_layer in range(len(self.layers)): layer = self.layers[i_layer] z = self.inter_models[layer].predict(x) print('plotting '+layer) if len(z.shape) == 3: for i in range(z.shape[2]): ax = plt.subplot2grid(self.figshape, (i_layer+1, i)) ax.plot(z[0,:,i], '.-') ax.set_title(layer+' filter %i'%i) else: ax = plt.subplot2grid(self.figshape, (i_layer+1, 0)) ax.plot(z[0,:], '.-') ax.set_title(layer) ax.set_ylim(-100,100) def print_w(self): layer = self.layers[0] ww = self.inter_models[layer].get_weights() for w in ww: print(w.shape) print(w)